In [1]:
import sys
sys.path.append('../../30_data_tools/')

In [9]:
import pandas as pd
from PIL import Image
from skimage.metrics import structural_similarity as ssim
from pathlib import Path
import numpy as np
import sqlite3
import json
from datetime import datetime

In [3]:
from helper import load_dotenv

# Qualität der generierten Muster bewerten

Dieses Notebook dient zur Erkundung, ob bewertet werden kann, ob eine erzeugte Kachel ein sinnvolles Muster zur Grafik hinzufügt oder nicht.
Dafür wird die erzeugte Kachel mit der Orignalkachel mit Hilfe der structured similarity (SSIM) verglichen.

In [4]:
config = load_dotenv()

In [5]:
con = sqlite3.connect(config['DB_PATH'])

In [None]:
images = pd.read_sql(
    '''
        SELECT cf.*, mf.mask_filename FROM (
        	SELECT * FROM related_file
        	WHERE variant_name = 'halftone300dpi' AND "type" = '4c'
        ) cf
        LEFT JOIN (
        	SELECT job, pdf_filename, filename AS mask_filename, 1 AS has_mask FROM related_file 
        	WHERE variant_name = 'halftone300dpi' AND "type" = 'masks'
        ) mf ON cf.job=mf.job AND cf.pdf_filename=mf.pdf_filename 
        WHERE mf.has_mask IS NOT NULL
    ''',
    con
)

images.loc[
    :,
    'image'
] = images.filename.str.replace('.jpg','', regex=True)

images = images.loc[
    images.job.str.match('\d{6}')
]

In [18]:
dfs = []

for pkl_file in config['GENERIC_INFORMATION_DATA_DIR'].glob('./*.pkl'):
    timestamp = datetime.fromtimestamp(int(pkl_file.name.replace( pkl_file.suffix, '' )))
    df = pd.read_pickle(pkl_file)
    df['timestamp'] = timestamp
    dfs.append(df)

data = pd.concat(dfs, ignore_index=True)

In [28]:
data.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
ssim,6700.0,0.917238,0.073111,0.458884,0.856096,0.924983,0.997031,1.0
scale,1516.0,1.5,0.0,1.5,1.5,1.5,1.5,1.5
trapezoidal_distortion_strength_top,131.0,0.124406,0.074717,0.00054,0.056195,0.123045,0.190663,0.24701
trapezoidal_distortion_strength_bottom,131.0,0.119531,0.068557,0.001051,0.061809,0.1131,0.17511,0.249386
trapezoidal_distortion_strength,1341.0,0.123295,0.072188,7.2e-05,0.06018,0.121458,0.185322,0.24938
stretch_x,1573.0,1.255189,0.143097,1.000027,1.131894,1.260829,1.380663,1.499871
stretch_y,1573.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
overlay_intensity_C,6040.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
overlay_intensity_M,6040.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
overlay_intensity_Y,6040.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
data.loc[
    data.basic_name.str.contains('508890.HBZP_1_M_269.p1')
]

In [None]:
data = pd.concat([pd.read_pickle(pkl_file) for pkl_file in config['GENERIC_INFORMATION_DATA_DIR'].glob('./*.pkl')])

data = pd.merge(
    data,
    images.loc[
        :,
        ['image','filename', 'variant_name','job']
    ],
    how="left",
    on="image"
)

In [None]:
def measure_similarity( row ):
    orig_img_path = config['DATA_DIR'] / row.job / row.variant_name / row.filename
    generated_img_path = config['GENERIC_GENERATED_DATA_DIR'] / f"{row.basic_name.replace('$PLACEHOLDER$',row.method)}.jpg"
    masks_path = config['GENERIC_GENERATED_DATA_DIR'] / f"{row.basic_name.replace('$PLACEHOLDER$','masks')}.json"

    if orig_img_path.exists() == False or generated_img_path.exists() == False or masks_path.exists() == False:
        raise "file does not exist"
    
    with masks_path.open() as masks_file:
        masks = json.load(masks_file)

    orig_img = np.array(Image.open(orig_img_path))
    generated_img = np.array(Image.open(generated_img_path))
    out = []
    
    for m in masks:
        ssim_value = ssim(
            orig_img[
                m[1]:m[1]+m[3],
                m[0]:m[0]+m[2],
                3
            ],
            generated_img[
                m[1]:m[1]+m[3],
                m[0]:m[0]+m[2],
                3
            ]
        )

        out.append((
            ssim_value,
            generated_img[
                m[1]:m[1]+m[3],
                m[0]:m[0]+m[2],
                3
            ]
        ))

    return out

In [None]:
filtered_data = data.loc[
    :,
    ['image','method','basic_name','filename','variant_name','job']
].drop_duplicates()

In [None]:
tiles = []

for idx in filtered_data.sample(n=25).index:
    row = filtered_data.loc[idx]

    row_results = measure_similarity(row)
    tiles += [
        (rr[0],rr[1], row.basic_name)
        for rr in row_results
    ]

In [None]:
def show_tile( tile ):
    print( tile[2] )
    print( tile[0] )
    return Image.fromarray( 255 - tile[1] ).convert('L')

In [None]:
i = 0

In [None]:
img = show_tile( tiles[i] )
print(i)

i += 1
if i >= len(tiles):
    i = 0

img