In [60]:
import sys
sys.path.append('../../30_data_tools/')

In [61]:
import pandas as pd
import plotly.express as px
from datetime import datetime
from scipy.stats import chisquare
from sklearn import preprocessing

In [62]:
from PIL import Image
import numpy as np
import sqlite3
from pathlib import Path

In [63]:
from helper import load_dotenv
from get_labelstudio_data import get_results_of_project

In [64]:
dotenv = load_dotenv()

In [65]:
con = sqlite3.connect( dotenv['DB_PATH'] )

In [66]:
def load_data( pkl_path ):
    data = pd.read_pickle(pkl_path)
    data.loc[
        :,
        'timestamp'
    ] = datetime.fromtimestamp( int(pkl_path.name.strip(pkl_path.suffix)) )

    return data

In [148]:
pkls = list(dotenv['GENERIC_INFORMATION_DATA_DIR'].glob('./*.pkl'))

In [149]:
pkl_data = pd.concat([load_data(pkl_path) for pkl_path in pkls])

OSError: [Errno 89] Operation canceled

# allgemeine Zusammenhänge

In [None]:
data.iloc[0]

In [None]:
data.columns

In [None]:
data.ssim.describe()

In [None]:
px.scatter(
    data.ssim.sort_values().reset_index().loc[:,'ssim']
)

In [None]:
data.shape

In [None]:
data.loc[
    :,
    [c for c in data.columns if c.startswith('use_')]
].sum()

In [None]:
data.loc[
    :,
    'area'
] = data.bbox.apply(lambda val: val[2] * val[3])

In [None]:
data.loc[
    :,
    ['ssim','area']
].corr()

In [None]:
df= data.loc[
    :,
    'ssim'
].to_frame()

df.loc[
    :,
    'pattern'
] = pd.factorize(data.pattern)[0]

In [None]:
df.corr('spearman')

In [None]:
df.corr('pearson')

# Zusammehang: ssim und Flächedeckung

In [None]:
def get_edge_share( row, edge_limit ):
    img = Image.open( row.img_path ).crop((
        row.bbox[0],
        row.bbox[1],
        row.bbox[0] + row.bbox[2],
        row.bbox[1] + row.bbox[3]
    ))

    k_separation = np.array(img)[:,:,3]
    relevant_part = k_separation[(k_separation < edge_limit) | (k_separation > (255 - edge_limit))]

    return relevant_part.shape[0] / (k_separation.shape[0] * k_separation.shape[1])

In [None]:
data.loc[
    :,
    'edge_share'
] = data.apply( get_edge_share, args=(10,), axis=1 )

In [None]:
edge_share = data.edge_share #returns a numpy array
edge_share_normalized = preprocessing.MinMaxScaler().fit_transform(edge_share.to_numpy().reshape((-1,1)))

ssim = data.ssim #returns a numpy array
ssim_normalized = preprocessing.MinMaxScaler().fit_transform(ssim.to_numpy().reshape((-1,1)))

In [None]:
data.loc[
    :,
    'edge_share_normalized'
] = edge_share_normalized

data.loc[
    :,
    'ssim_normalized'
] = ssim_normalized

In [None]:
data.loc[
    :,
    ['ssim','edge_share']
].corr()

In [None]:
data.loc[
    :,
    ['ssim_normalized','edge_share_normalized']
].corr()

In [None]:
data.iloc[0].img_path

# Kategorien von Masken

In [150]:
data = pd.read_sql(
    '''
        SELECT apm.*, m.overlay_intensity_K, m.ssim, m.bbox, gi."timestamp" FROM adjustment_per_mask apm 
        LEFT JOIN mask m
        ON
        	apm.pdf_filename = m.pdf_filename AND
        	apm.job = m.job AND
        	apm."type" = m."type" AND 
        	apm.variant_name = m.variant_name AND 
        	apm."method" = m."method" AND 
        	apm.idx = m.idx AND 
        	apm.mask_id = m.mask_id 
        LEFT JOIN generic_image gi 
        ON
        	apm.pdf_filename = gi.pdf_filename AND
        	apm.job = gi.job AND
        	apm."type" = gi."type" AND 
        	apm.variant_name = gi.variant_name AND 
        	apm."method" = gi."method" AND 
        	apm.idx = gi.idx 
    ''',
    con,
    parse_dates=['timestamp']
)

#data = data.loc[
#    data.mask_id.isin([r['id'] for r in get_results_of_project(2) if r['rectanglelabels'][0] == 'checked_moire'])
#]

In [152]:
data.shape

(3352, 14)

In [154]:
data.loc[data.mask_id.str.startswith('temp_')].shape

(903, 14)

In [155]:
grouped_by_mask_id = data.loc[
    :,
    ['mask_id','job']
].groupby(
    'mask_id'
).count().rename(columns={'job':'adjustment_count'})

In [156]:
grouped_by_mask_id

Unnamed: 0_level_0,adjustment_count
mask_id,Unnamed: 1_level_1
-7zexj6bxi,3
-8B3EQ0ID8,4
-CrmR5Y-k0,1
-GNW_7T2uP,2
-GTaAbknIB,3
...,...
zWscqiNFLb,2
zY1MnA47nS,3
zaAd0hbfxC,2
zfiKNQuM_9,2


In [157]:
data

Unnamed: 0,pdf_filename,job,type,variant_name,method,idx,mask_id,adjustment,features,execution_index,overlay_intensity_K,ssim,bbox,timestamp
0,004_2023_017_0_004_0.p1,508001,4c,halftone600dpi,soft_light,1,75sVbH0H3v,scale,"{""scale"": 1.5}",1,0.7,0.999251,4142;4714;276;480,2024-01-06 15:01:05
1,004_2023_017_0_004_0.p1,508001,4c,halftone600dpi,soft_light,1,75sVbH0H3v,uniform_trapezoidal_distortion,"{""trapezoidal_distortion_strength"": 0.17828443...",2,0.7,0.999251,4142;4714;276;480,2024-01-06 15:01:05
2,004_2023_017_0_004_0.p1,508001,4c,halftone600dpi,soft_light,1,7pB7srAQTN,uniform_trapezoidal_distortion,"{""trapezoidal_distortion_strength"": 0.05864151...",1,0.7,0.998508,4070;4696;504;576,2024-01-06 15:01:05
3,004_2023_017_0_004_0.p1,508001,4c,halftone600dpi,soft_light,1,qNlgII-mpn,uniform_trapezoidal_distortion,"{""trapezoidal_distortion_strength"": 0.15428249...",1,0.7,0.997123,3824;4384;1086;1392,2024-01-06 15:01:05
4,004_2023_017_0_004_0.p1,508001,4c,halftone600dpi,soft_light,2,temp_2678e5cb-00f8-4948-8c7e-24ee17b5b81f,blow_up_region,"{""blow_up_count"": 5.0, ""blow_up_radius"": [48, ...",1,,,,2024-01-17 18:58:42
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3347,tg52_230105_TM_Soul_Food_innen 209.p1,149128,4c,halftone600dpi,soft_light,1,wHrBv9wh6s,rotation,"{""rotation_degree"": -3.0}",1,1.0,0.923758,210;3824;1122;564,2024-01-20 01:56:20
3348,tg52_230105_TM_Soul_Food_innen 209.p1,149128,4c,halftone600dpi,soft_light,1,wHrBv9wh6s,uniform_trapezoidal_distortion,"{""trapezoidal_distortion_strength"": 0.22257242...",2,1.0,0.923758,210;3824;1122;564,2024-01-20 01:56:20
3349,tg52_230105_TM_Soul_Food_innen 213.p1,149128,4c,halftone600dpi,soft_light,5,JHRoHZKzs0,blow_up_region,"{""blow_up_count"": 5.0, ""blow_up_radius"": [84, ...",1,1.0,0.932870,3454;2484;654;576,2024-01-19 19:22:01
3350,tg52_230105_TM_Soul_Food_innen 213.p1,149128,4c,halftone600dpi,soft_light,5,JHRoHZKzs0,rotation,"{""rotation_degree"": 45.0}",2,1.0,0.932870,3454;2484;654;576,2024-01-19 19:22:01


In [78]:
tiles = list(Path('../Model/temp/').glob('./*/moire/*.jpg'))

In [79]:
selection = data.loc[
    data.mask_id.isin(grouped_by_mask_id.loc[grouped_by_mask_id.adjustment_count == 1].index)
]

In [80]:
tiles[0].name

'613256.tg46_006_007_JUL23_UPHK_CA_AS011_AT_DE_BASIS_BILD_002.p1.soft_light.1.4c_600.jpg.1546.jpg'

In [81]:
row = data.iloc[0]

In [None]:
res

In [83]:
row

pdf_filename           004_2023_017_0_004_0.p1
job                                     508001
type                                        4c
variant_name                    halftone600dpi
method                              soft_light
idx                                          1
mask_id                             75sVbH0H3v
adjustment                               scale
features                        {"scale": 1.5}
execution_index                              1
overlay_intensity_K                        0.7
ssim                                  0.999251
bbox                         4142;4714;276;480
timestamp                  2024-01-06 15:01:05
Name: 0, dtype: object

In [82]:
filename = f'{ row.job }.{ row.pdf_filename }.{ row.method }.{ row.idx }'

[t for t in tiles if filename in t.name]

[]

In [None]:
for _,row in selection.iterrows():
    filename = f'{ row.job }.{ row.pdf_filename }'
    print( row )

In [None]:
px.scatter(
    grouped_by_mask_id.sort_values('adjustment_count', ascending=False).adjustment_count
)