In [None]:
import sys
sys.path.append('../60_Code/Model/')
sys.path.append('../30_data_tools/')

In [None]:
from io import BytesIO
from pytorch_model_tools import get_datasets
from file_interaction import download_blob
from tqdm.auto import tqdm
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import pickle
import math
import pandas as pd
import re
from helper import load_dotenv
import sqlite3
from get_labelstudio_data import get_results_of_project
from load_constants import load_colors

from PIL import Image
from PIL import ImageFont, ImageDraw  
from pathlib import Path

In [None]:
dotenv = load_dotenv()
con = sqlite3.connect( dotenv['DB_PATH'] )

In [None]:
colors = load_colors()

In [None]:
real_val_tiles = [tile_path.name for tile_path in (dotenv['TILE_DATASET_DIR'] / 'real_val').glob('./**/*.jpg')]

In [None]:
len(real_val_tiles)

## noch offen

- ROC-Kurve/Area under Curve

0 = moire, 1 = no moire

# Funktionen

In [None]:
def load_data( model_results_name ):
    data = pickle.loads( download_blob(f'model_results/{ model_results_name }.pkl').getbuffer() )
    data.rename(columns={'category':'label'}, inplace=True)
    
    data.loc[:,'predicted_label'] = (data.result_moire < data.result_no_moire).astype('int').apply(lambda val: ['moire','no_moire'][val])
    data.loc[:,'classification_correct'] = data.predicted_label == data.label

    data = pd.merge(
        data,
        data.tile_name.str.extract('(.+)_(\d+)\.\d+\.jpg').rename(columns={0:'mask_id',1:'dpi'}),
        left_index=True,
        right_index=True
    )
    data.loc[
    pd.isna(data.dpi),
        'dpi'
    ] = data.loc[
        pd.isna(data.dpi)
    ].tile_name.str.extract(
        r'ok_sample_.+_(\d+)_\.\d+\.jpg'
    ).rename(columns={0:'dpi'}).dpi

    # real val tiles filterung
    data = data.loc[
        data.index.isin(
            data.loc[
                (data.tile_name.isin(real_val_tiles) == False) &
                (data.dataset == 'real_val')
            ].index        
        ) == False
    ]
    
    return data

In [None]:
def calc_metrics( data ):
    TP = data.loc[(data.label == 'moire') & (data.predicted_label == 'moire')].shape[0]
    TN = data.loc[(data.label == 'no_moire') & (data.predicted_label == 'no_moire')].shape[0]
    FP = data.loc[(data.label == 'no_moire') & (data.predicted_label == 'moire')].shape[0]
    FN = data.loc[(data.label == 'moire') & (data.predicted_label == 'no_moire')].shape[0]

    out = {
        "count_data" : data.shape[0],
        "TP" : TP,
        "TN" : TN,
        "FP" : FP,
        "FN" : FN,
        "TN-Rate" : 0,
        "TP-Rate" : 0,
        "accuracy" : 0,
        "precision" : 0,
        "recall" : 0
    }
    
    if (FP + TN) > 0:
        out["TN-Rate"] = TN / (FP + TN)

    if (FN + TP) > 0:
        out["TP-Rate"] = TP / (FN + TP)

    if out['count_data'] > 0:
        out["accuracy"] = (TP + TN) / out['count_data']

    if (TP + FP) > 0:
        out["precision"] = TP / (TP + FP)

    if (TP + FN) > 0:
        out["recall"] = TP / (TP + FN)
    

    return out

In [None]:
def get_confusion_matrix( metrics, relative=False ):
    values = np.array([
        [metrics['TP'],metrics['FN']],
        [metrics['FP'],metrics['TN']]
    ])

    if relative:
        values = values / metrics['count_data']
    
    return px.imshow(
        values,
        x=['moire','no_moire'],
        y=['moire','no_moire'],
        labels={
            'y' : 'label',
            'x' : 'prediction'
        },
        text_auto=True
    )

In [None]:
def get_roc( data, threshold ):
    selection_moire = data.loc[data.result_moire > threshold]
    selection_no_moire = data.loc[data.result_moire <= threshold]
    TP = selection_moire.loc[selection_moire.label == 'moire'].shape[0]
    FP = selection_moire.loc[selection_moire.label == 'no_moire'].shape[0]
    TN = selection_no_moire.loc[selection_no_moire.label == 'no_moire'].shape[0]
    FN = selection_no_moire.loc[selection_no_moire.label == 'moire'].shape[0]

    TP_rate = TP / (TP + FN)
    FP_rate = FP / (FP + TN)

    return TP_rate, FP_rate


def get_roc_by_tile( data, threshold, aggregation='max' ):
    all_tiles = data.loc[:,['mask_id','dataset','model_type','label']].drop_duplicates().copy()

    if aggregation == 'mode':
        data = data.copy()
        data.loc[data.result_moire > threshold,'predicted_label'] = 'moire'
        data.loc[data.result_moire <= threshold,'predicted_label'] = 'no_moire'
        grouped_data = data.loc[
            :,
            ['mask_id','dataset','model_type','label','predicted_label']
        ].groupby(['mask_id','dataset','model_type','label']).agg(pd.Series.mode).reset_index()
        
        grouped_data.loc[
            grouped_data.predicted_label.apply( lambda val: type(val) != str ),
            'predicted_label'
        ] = 'moire'

        selection_moire = grouped_data.loc[grouped_data.predicted_label == 'moire']
        selection_no_moire = grouped_data.loc[grouped_data.predicted_label == 'no_moire']
    else:
        selection_moire = data.loc[data.result_moire > threshold,['mask_id','dataset','model_type','label']].drop_duplicates().copy()
        
        if selection_moire.shape[0] != 0:
            selection_moire.loc[:,'is_selected'] = True
            selection_no_moire = pd.merge(
                all_tiles,
                selection_moire,
                how="left",
                on=['mask_id','dataset','model_type','label']
            )
            selection_no_moire = selection_no_moire.loc[pd.isna(selection_no_moire.is_selected)]
        else:
            selection_no_moire = all_tiles
    
    TP = selection_moire.loc[selection_moire.label == 'moire'].shape[0]
    FP = selection_moire.loc[selection_moire.label == 'no_moire'].shape[0]
    TN = selection_no_moire.loc[selection_no_moire.label == 'no_moire'].shape[0]
    FN = selection_no_moire.loc[selection_no_moire.label == 'moire'].shape[0]

    TP_rate = TP / (TP + FN)
    FP_rate = FP / (FP + TN)

    return TP_rate, FP_rate

In [None]:
def get_tile_result_grid( data, img_size=224, col_count=5, label_type='both' ):
    label_names = ['moire','no_moire']
    row_count = math.ceil(data.shape[0] / col_count)

    out_img = Image.new(
        'RGB',
        (img_size * col_count, img_size * row_count),
        color="white"
    )
    draw = ImageDraw.Draw(out_img)  
    label_color=[
        (0,200,0),
        (200,0,50)
    ]

    for i in range(data.shape[0]):
        row = i // col_count
        col = i % col_count

        result = data.iloc[i]
        tile_path = dotenv['TILE_DATASET_DIR'] / dataset_name / result.dataset / result.label / result.tile_name
        tile_img = Image.open( tile_path )
        out_img.paste(
            tile_img,
            ( col * tile_img.size[0], row * tile_img.size[1] )
        )
        draw.rectangle(
            (
                ( col * tile_img.size[0], row * tile_img.size[1] + round(img_size * 0.9) ),
                ( col * tile_img.size[0] + img_size, row * tile_img.size[1] + img_size )
            ),
            outline=None,
            fill=label_color[0] if result.classification_correct else label_color[1]
        )

        if label_type == 'label_name':
            label_text = f'{ result.predicted_label }/{ result.label }'
        elif label_type == 'moire_value':
            label_text = str( round(result.result_moire, 5) )
        elif label_type == 'description_values':
            label_text = f'{ round(result.frequency_gain, 5) }/{ round(result.ssim_value, 5) }'
        else:
            label_text = f'{ result.predicted_label }/{ result.label } - {round(result.result_moire, 5)}'
        
        draw.text(
            ( col * tile_img.size[0] + round(img_size * 0.5), row * tile_img.size[1] + round(img_size * 0.97) ),
            label_text,
            anchor='ms',
            font_size=15
        )
    
    return out_img

In [None]:
def get_roc_figure( data, get_tile_results=False, aggregation='max' ):
    fig = go.Figure()
    model_types = sorted(data.model_type.unique(), reverse=True)

    for j in tqdm(range(len(model_types))):
        model_type = model_types[j]

        if get_tile_results:
            roc = [get_roc_by_tile(data.loc[data.model_type == model_type], i / 100, aggregation=aggregation) for i in range(101)]
        else:
            roc = [get_roc(data.loc[data.model_type == model_type], i / 100) for i in range(101)]
    
        fig.add_trace(go.Scatter(
            x=[r[1] for r in roc],
            y=[r[0] for r in roc],
            mode="lines",
            name=model_type,
            marker={
                'color' : colors['COLOR_SEQUENCE'][j],
            }
        ))
    
    fig.update_layout(legend={'orientation':'h'})
    fig.update_layout(
        autosize=False,
        width=400,
        height=400,
    )

    return fig

# Auswertung

In [None]:
0 / 0

In [None]:
attachment_dir = dotenv['ATTACHMENT_DIR'] / 'model_auswertung'

In [None]:
model_names = [
    '2024-05-10_Resnet50_004',
    '2024-05-12_MobileNetV3_003',
    '2024-05-13_Resnet50_004',
    '2024-05-14_MobileNetV3_002'
]

model_types = {
    '2024-05-10_Resnet50_004' : 'Resnet50 - spatial',
    '2024-05-12_MobileNetV3_003' : 'MobileNetV3 - spatial',
    '2024-05-13_Resnet50_004' : 'Resnet50 - frequenz',
    '2024-05-14_MobileNetV3_002' : 'MobileNetV3 - frequenz'
}
dataset_name = '24-05-09_001_tile_dataset'

In [None]:
dfs = []


for model_name in model_names:
    df = load_data(f"{ model_name }_{ dataset_name }")
    df.loc[:,'model_name'] = model_name
    df.loc[:,'model_type'] = model_types[model_name]
    dfs.append(df)

data = pd.concat(dfs, ignore_index=True)

In [None]:
data.dataset.value_counts()

In [None]:
metrics = []

for dataset in data.dataset.unique():
    for model_name in data.model_name.unique():
        metric_dict = calc_metrics(
            data.loc[
                (data.dataset == dataset) &
                (data.model_name == model_name)
            ]
        )
        metric_dict['dataset'] = dataset
        metric_dict['model_type'] = model_types[model_name]
        metrics.append( metric_dict )

metrics_df = pd.DataFrame.from_dict(metrics).set_index(['dataset','model_type'])

In [None]:
metrics_df.loc[
    metrics_df.index.get_level_values('dataset').isin(['test','real_val'])
]

In [None]:
metrics_df.loc[
    metrics_df.index.get_level_values('dataset') == 'real_val'
].sort_values('recall', ascending=False)

In [None]:
metrics_df.loc[
    metrics_df.index.get_level_values('dataset') == 'test'
].sort_values('recall', ascending=False)

In [None]:
fig = px.imshow(
    metrics_df.loc[
            metrics_df.index.get_level_values('dataset') == 'test',
            ['TP','TN','FP','FN']
    ].reset_index('dataset').loc[:,['TP','TN','FP','FN']],
    text_auto=True,
    width=800,
    height=500,
    color_continuous_scale=[colors['COLOR_SEQUENCE'][len(colors['COLOR_SEQUENCE']) - i - 1] for i in range(len(colors['COLOR_SEQUENCE']))]
)
fig.update_coloraxes(showscale=False)
fig.write_image( attachment_dir / 'confusion_matrix_test.jpg')
fig.write_image( attachment_dir / 'confusion_matrix_test.pdf')

fig

In [None]:
px.imshow(
    metrics_df.loc[
            metrics_df.index.get_level_values('dataset') == 'real_val',
            ['TP','TN','FP','FN']
    ].reset_index('dataset').loc[:,['TP','TN','FP','FN']],
    text_auto=True,
    width=800,
    height=500,
    color_continuous_scale=[colors['COLOR_SEQUENCE'][len(colors['COLOR_SEQUENCE']) - i - 1] for i in range(len(colors['COLOR_SEQUENCE']))]
)
fig.update_coloraxes(showscale=False)
fig.write_image( attachment_dir / 'confusion_matrix_real_test.jpg')
fig.write_image( attachment_dir / 'confusion_matrix_real_test.pdf')

fig

In [None]:
fig = px.bar(
    metrics_df.loc[
        metrics_df.index.get_level_values('dataset') == 'test',
        ['accuracy','precision','recall']
    ].reset_index('dataset').loc[:,['accuracy','precision','recall']],
    barmode='group',
    text_auto=True,
    color_discrete_sequence=colors['COLOR_SEQUENCE']
)
fig.write_image( attachment_dir / 'metrics_test.jpg')
fig.write_image( attachment_dir / 'metrics_test.pdf')

fig

In [None]:
fig = px.bar(
    metrics_df.loc[
        metrics_df.index.get_level_values('dataset') == 'real_val',
        ['accuracy','precision','recall']
    ].reset_index('dataset').loc[:,['accuracy','precision','recall']],
    barmode='group',
    text_auto=True,
    color_discrete_sequence=colors['COLOR_SEQUENCE']
)
fig.write_image( attachment_dir / 'metrics_real_val.jpg')
fig.write_image( attachment_dir / 'metrics_real_val.pdf')

fig

In [None]:
fig = get_roc_figure( data.loc[data.dataset == 'test'] )
fig.write_image( attachment_dir / 'roc_test_normal.jpg' )
fig.write_image( attachment_dir / 'roc_test_normal.pdf' )

fig

In [None]:
fig = get_roc_figure( data.loc[data.dataset == 'real_val'] )
fig.write_image( attachment_dir / 'roc_real_test_normal.jpg' )
fig.write_image( attachment_dir / 'roc_real_test_normal.pdf' )

fig

# Gruppiert nach DPI

In [None]:
grouped_by_dpi = data.loc[
    (data.model_type == 'Resnet50 - spatial') & (data.dataset == 'test'),
    ['dpi','label']
].groupby('dpi').count().rename(columns={'label' : 'Testdatensatz'})

grouped_by_dpi.loc[
    :,
    'realer Testdatensatz'
] = data.loc[
    (data.model_type == 'Resnet50 - spatial') & (data.dataset == 'real_val'),
    ['dpi','label']
].groupby('dpi').count().label

In [None]:
fig = px.bar(
    grouped_by_dpi,
    barmode="group",
    text_auto="auto",
    labels={
        'dpi' : 'Auflösung (dpi)',
        'value' : 'Anzahl Klassifizierungskacheln',
        'variable' : 'Datensatz'
    },
    color_discrete_sequence=[colors['COLOR_SEQUENCE'][0],colors['COLOR_SEQUENCE'][2]],
    width=600,
    height=400
)
fig.update_layout(legend={'orientation':'h', 'y' : -0.2})
fig.write_image( attachment_dir / 'dpi_share.jpg' )
fig.write_image( attachment_dir / 'dpi_share.pdf' )

fig

In [None]:
dpi_metrics = []

for dpi in data.dpi.unique():
    for dataset in data.dataset.unique():
        for model_name in data.model_name.unique():
            metric = calc_metrics(
                data.loc[
                    (data.dpi == dpi) &
                    (data.dataset == dataset) &
                    (data.model_name == model_name)
                ]
            )
            metric['dpi'] = dpi
            metric['dataset'] = dataset
            metric['model_type'] = model_types[model_name]
            dpi_metrics.append(metric)

dpi_metrics_df = pd.DataFrame.from_dict(dpi_metrics).set_index(['dataset','dpi','model_type'])

In [None]:
dpi_metrics_df.loc[
    dpi_metrics_df.index.get_level_values('dataset').isin(['test','real_val'])
].sort_index().round(3)

In [None]:
dpi_metrics_df.loc[
    (dpi_metrics_df.index.get_level_values('dataset') == 'test')
].sort_values('recall', ascending=False)

In [None]:
dpi_metrics_df.loc[
    (dpi_metrics_df.index.get_level_values('dataset') == 'real_val')
].sort_values('recall', ascending=False)

In [None]:
fig = px.bar(
    metrics_df.loc[
        metrics_df.index.get_level_values('dataset') == 'real_val',
        ['accuracy','precision','recall']
    ].reset_index('dataset').loc[:,['accuracy','precision','recall']],
    barmode='group',
    text_auto=True,
    color_discrete_sequence=colors['COLOR_SEQUENCE']
)

fig

In [None]:
fig = get_roc_figure( data.loc[(data.dataset == 'test') & (data.dpi == '300')] )
fig.write_image( attachment_dir / 'roc_test_300.jpg' )
fig.write_image( attachment_dir / 'roc_test_300.pdf' )

fig = get_roc_figure( data.loc[(data.dataset == 'test') & (data.dpi == '200')] )
fig.write_image( attachment_dir / 'roc_test_200.jpg' )
fig.write_image( attachment_dir / 'roc_test_200.pdf' )

fig = get_roc_figure( data.loc[(data.dataset == 'test') & (data.dpi == '150')] )
fig.write_image( attachment_dir / 'roc_test_150.jpg' )
fig.write_image( attachment_dir / 'roc_test_150.pdf' )

fig

In [None]:
fig = get_roc_figure( data.loc[(data.dataset == 'real_val') & (data.dpi == '300')] )
fig.write_image( attachment_dir / 'roc_real_test_300.jpg' )
fig.write_image( attachment_dir / 'roc_real_test_300.pdf' )

fig = get_roc_figure( data.loc[(data.dataset == 'real_val') & (data.dpi == '200')] )
fig.write_image( attachment_dir / 'roc_real_test_200.jpg' )
fig.write_image( attachment_dir / 'roc_real_test_200.pdf' )

fig = get_roc_figure( data.loc[(data.dataset == 'real_val') & (data.dpi == '150')] )
fig.write_image( attachment_dir / 'roc_real_test_150.jpg' )
fig.write_image( attachment_dir / 'roc_real_test_150.pdf' )

fig

In [None]:
data_selection

In [None]:
px.imshow(
    dpi_metrics_df.reset_index().loc[
        (dpi_metrics_df.index.get_level_values('dataset') == 'test'),
        ['dpi','model_name','accuracy','precision','recall']
    ].set_index(['model_name','dpi']).sort_index(),
    text_auto='.3f',
    color_continuous_scale=[colors['COLOR_SEQUENCE'][len(colors['COLOR_SEQUENCE']) - i - 1] for i in range(len(colors['COLOR_SEQUENCE']))]
)

In [None]:
px.imshow(
    dpi_metrics_df.reset_index().loc[
        (dpi_metrics_df.index.get_level_values('dataset') == 'real_val'),
        ['dpi','model_name','accuracy','precision','recall']
    ].set_index(['model_name','dpi']).sort_index(),
    text_auto='.3f',
    color_continuous_scale=[colors['COLOR_SEQUENCE'][len(colors['COLOR_SEQUENCE']) - i - 1] for i in range(len(colors['COLOR_SEQUENCE']))]
)

# Kacheldataset

In [None]:
def get_grouped_by_tile( data, separate_dpi=False, aggregation="max" ):
    column_selection_list = ['mask_id','dataset','model_type','label','predicted_label']
    column_group_list = ['mask_id','dataset','model_type','label']

    if separate_dpi:
        column_selection_list.append('dpi')
        column_group_list.append('dpi')
        
    if aggregation == 'mode':
        grouped_data = data.loc[:,column_selection_list].groupby(column_group_list).agg(pd.Series.mode).reset_index()

        grouped_data.loc[
            grouped_data.predicted_label.apply( lambda val: type(val) != str ),
            'predicted_label'
        ] = 'moire'

        return grouped_data
    elif aggregation == 'mean':
        column_selection_list.remove('predicted_label')
        grouped_data = data.loc[:,column_selection_list + ['result_moire']].groupby(column_group_list).mean().reset_index()
        grouped_data.loc[grouped_data.result_moire > 0.5,'predicted_label'] = 'moire'
        grouped_data.loc[grouped_data.result_moire <= 0.5,'predicted_label'] = 'no_moire'

        return grouped_data
    else:
        return data.loc[:,column_selection_list].groupby(column_group_list).min().reset_index()


def get_tile_metrics_df( data, separate_dpi=False, aggregation="max" ):
    grouped_data = get_grouped_by_tile( data, separate_dpi=separate_dpi, aggregation=aggregation )
    group_columns = ['dataset','model_type']
    if separate_dpi:
        group_columns.append('dpi')
    
    tile_metrics = []
    tasks = []

    for dataset in ['test','real_val']:
        for model_type in grouped_data.model_type.unique():
            if separate_dpi:
                for dpi in grouped_data.dpi.unique():
                    tasks.append({
                        'dataset' : dataset,
                        'model_type' : model_type,
                        'dpi' : dpi
                    })

            else:
                tasks.append({
                    'dataset' : dataset,
                    'model_type' : model_type,
                })

    
    for task in tasks:
        selection = grouped_data
        
        for key in task:
            selection = selection.loc[
                (selection[key] == task[key])
            ]
        
        metric = calc_metrics(selection)

        for key in task:
            metric[key] = task[key]

        tile_metrics.append(metric)

    return pd.DataFrame.from_dict(tile_metrics).set_index(group_columns).round(3).sort_index(ascending=False)

In [None]:
get_tile_metrics_df( grouped_by_tile )

In [None]:
grouped_by_tile = get_grouped_by_tile( data, aggregation='mean' )

In [None]:
get_roc_figure(
    data.loc[(data.dataset == 'test')],
    get_tile_results=True,
    aggregation="mode"
).write_image( attachment_dir / 'roc_tile_test_mode.pdf' )

get_roc_figure(
    data.loc[(data.dataset == 'test') & (data.dpi == '300')],
    get_tile_results=True,
    aggregation="mode"
).write_image( attachment_dir / 'roc_tile_test_mode_300.pdf' )

get_roc_figure(
    data.loc[(data.dataset == 'real_val')],
    get_tile_results=True,
    aggregation="mode"
).write_image( attachment_dir / 'roc_tile_real_test_mode.pdf' )

get_roc_figure(
    data.loc[(data.dataset == 'real_val') & (data.dpi == '300')],
    get_tile_results=True,
    aggregation="mode"
).write_image( attachment_dir / 'roc_tile_real_test_mode_300.pdf' )

In [None]:
get_roc_figure(
    data.loc[(data.dataset == 'test')],
    get_tile_results=True
).write_image( attachment_dir / 'roc_tile_test.pdf' )

get_roc_figure(
    data.loc[(data.dataset == 'real_val')],
    get_tile_results=True
).write_image( attachment_dir / 'roc_tile_real_test.pdf' )

In [None]:
get_roc_figure(
    data.loc[(data.dataset == 'test') & (data.dpi == '300')],
    get_tile_results=True
).write_image( attachment_dir / 'roc_tile_test_300.pdf' )

get_roc_figure(
    data.loc[(data.dataset == 'real_val') & (data.dpi == '300')],
    get_tile_results=True
).write_image( attachment_dir / 'roc_tile_real_test_300.pdf' )

get_roc_figure(
    data.loc[(data.dataset == 'test') & (data.dpi == '200')],
    get_tile_results=True
).write_image( attachment_dir / 'roc_tile_test_200.pdf' )

get_roc_figure(
    data.loc[(data.dataset == 'real_val') & (data.dpi == '200')],
    get_tile_results=True
).write_image( attachment_dir / 'roc_tile_real_test_200.pdf' )

get_roc_figure(
    data.loc[(data.dataset == 'test') & (data.dpi == '150')],
    get_tile_results=True
).write_image( attachment_dir / 'roc_tile_test_150.pdf' )

get_roc_figure(
    data.loc[(data.dataset == 'real_val') & (data.dpi == '150')],
    get_tile_results=True
).write_image( attachment_dir / 'roc_tile_real_test_150.pdf' )

In [None]:
grouped_by_tile_dpi = get_grouped_by_tile( data, aggregation='mean', separate_dpi=True )

In [None]:
grouped_by_tile

In [None]:
get_tile_metrics_df( data, separate_dpi=True ).reset_index().set_index(['dataset','dpi','model_type']).sort_index(ascending=False)

In [None]:
get_tile_metrics_df( data, separate_dpi=False, aggregation='mode' )

In [None]:
get_tile_metrics_df( data, separate_dpi=True, aggregation='mode' )

# Ensemble Classifier

In [None]:
grouped_by_tile = get_grouped_by_tile( data, separate_dpi=True, aggregation='max' )
grouped_by_tile.loc[:,'classification_correct'] = grouped_by_tile.label == grouped_by_tile.predicted_label

compare_models = pd.merge(
    grouped_by_tile.loc[
        (grouped_by_tile.dataset == 'test') &
        (grouped_by_tile.dpi == '300') &
        (grouped_by_tile.model_type == 'Resnet50 - spatial'),
        ['mask_id','label','classification_correct']
    ].rename(columns={'classification_correct':'classification_correct_a', 'result_moire' : 'result_moire_a', 'result_no_moire' : 'result_no_moire_a'}),
    grouped_by_tile.loc[
        (grouped_by_tile.dataset == 'test') &
        (grouped_by_tile.dpi == '300') &
        (grouped_by_tile.model_type == 'Resnet50 - frequenz'),
        ['mask_id','label','classification_correct']
    ].rename(columns={'classification_correct':'classification_correct_b', 'result_moire' : 'result_moire_b', 'result_no_moire' : 'result_no_moire_b'}),
    on=['mask_id','label']
)

In [None]:
TT = compare_models.loc[
    (compare_models.classification_correct_a == True) &
    (compare_models.classification_correct_b == True)
].shape[0]

TF = compare_models.loc[
    (compare_models.classification_correct_a == True) &
    (compare_models.classification_correct_b == False)
].shape[0]

FT = compare_models.loc[
    (compare_models.classification_correct_a == False) &
    (compare_models.classification_correct_b == True)
].shape[0]

FF = compare_models.loc[
    (compare_models.classification_correct_a == False) &
    (compare_models.classification_correct_b == False)
].shape[0]

In [None]:
TT, TF, FT, FF

In [None]:
# precision
compare_models.loc[
    ((compare_models.classification_correct_a) |
    (compare_models.classification_correct_b)) &
    (compare_models.label == 'moire')
].shape[0] / (compare_models.loc[
    ((compare_models.classification_correct_a) |
    (compare_models.classification_correct_b)) &
    (compare_models.label == 'moire')
].shape[0] + compare_models.loc[
    (compare_models.classification_correct_a == False) &
    (compare_models.classification_correct_b == False) &
    (compare_models.label == 'no_moire')
].shape[0])

In [None]:
# recall
compare_models.loc[
    ((compare_models.classification_correct_a) |
    (compare_models.classification_correct_b)) &
    (compare_models.label == 'moire')
].shape[0] / compare_models[compare_models.label == 'moire'].shape[0]

## compare tiles

In [None]:
def weight(x):
    return abs(x - 0.5) * -1

In [None]:
weight(0.8)

In [None]:
def get_arch_results( data, architecture ):
    compare_model_results = pd.merge(
        data.loc[
            data.model_type == f'{ architecture } - spatial',
            :
        ].rename(columns={'classification_correct':'classification_correct_a', 'result_moire' : 'result_moire_a', 'result_no_moire' : 'result_no_moire_a'}),
        data.loc[
            data.model_type == f'{ architecture } - frequenz',
            ['tile_name','label','classification_correct','result_moire']
        ].rename(columns={'classification_correct':'classification_correct_b', 'result_moire' : 'result_moire_b', 'result_no_moire' : 'result_no_moire_b'}),
        on=['tile_name','label']
    )
    
    compare_model_results.loc[
        (compare_model_results.classification_correct_a == True) &
        (compare_model_results.classification_correct_b == True),
        'result_group'
    ] = 'TT'
    
    compare_model_results.loc[
        (compare_model_results.classification_correct_a == True) &
        (compare_model_results.classification_correct_b == False),
        'result_group'
    ] = 'TF'
    
    compare_model_results.loc[
        (compare_model_results.classification_correct_a == False) &
        (compare_model_results.classification_correct_b == True),
        'result_group'
    ] = 'FT'
    
    compare_model_results.loc[
        (compare_model_results.classification_correct_a == False) &
        (compare_model_results.classification_correct_b == False),
        'result_group'
    ] = 'FF'
    
    compare_model_results.loc[
        :,
        'model_type'
    ] = architecture
    
    compare_model_results.loc[
        compare_model_results.apply(lambda row: weight(row.result_moire_a) < weight(row.result_moire_b), axis=1),
        'result_moire'
    ] = compare_model_results.loc[
        compare_model_results.apply(lambda row: weight(row.result_moire_a) < weight(row.result_moire_b), axis=1)
    ].result_moire_a
    
    compare_model_results.loc[
        pd.isna(compare_model_results.result_moire),
        'result_moire'
    ] = compare_model_results.loc[
        pd.isna(compare_model_results.result_moire)
    ].result_moire_b

    return compare_model_results

In [None]:
compare_model_results = pd.concat([
    get_arch_results(
        data.loc[
            (data.dataset.isin(['test','real_val'])),
            :
        ],
        'Resnet50'
    ),
    get_arch_results(
        data.loc[
            (data.dataset.isin(['test','real_val'])),
            :
        ],
        'MobileNetV3'
    )],
    ignore_index=True
)

In [None]:
calc_metrics(
    get_arch_results(
        data.loc[
            (data.dataset == 'test') &
            (data.dpi == '300'),
            :
        ],
        'Resnet50'
    )
)

In [None]:
calc_metrics(
    get_arch_results(
        data.loc[
            (data.dataset == 'test') &
            (data.dpi == '300'),
            :
        ],
        'MobileNetV3'
    )
)

In [None]:
calc_metrics(
    get_arch_results(
        data.loc[
            (data.dataset == 'real_val') &
            (data.dpi == '300'),
            :
        ],
        'Resnet50'
    )
)

In [None]:
calc_metrics(
    get_arch_results(
        data.loc[
            (data.dataset == 'real_val') &
            (data.dpi == '300'),
            :
        ],
        'MobileNetV3'
    )
)

In [None]:
get_tile_metrics_df(
    pd.concat([
        get_arch_results(
            data.loc[
                (data.dataset.isin(['test','real_val'])),
                :
            ],
            'Resnet50'
        ),
        get_arch_results(
            data.loc[
                (data.dataset.isin(['test','real_val'])),
                :
            ],
            'MobileNetV3'
        )],
        ignore_index=True
    )
)

In [None]:
get_tile_metrics_df(
    pd.concat([
        get_arch_results(
            data.loc[
                (data.dataset.isin(['test','real_val'])) &
                (data.dpi == '300'),
                :
            ],
            'Resnet50'
        ),
        get_arch_results(
            data.loc[
                (data.dataset.isin(['test','real_val'])) &
                (data.dpi == '300'),
                :
            ],
            'MobileNetV3'
        )],
        ignore_index=True
    )
)

In [None]:
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score

In [None]:
fpr, tpr, _ = roc_curve(
    (data.loc[(data.model_type == 'Resnet50 - spatial') & (data.dataset == 'test') & (data.dpi == '300')].label == 'moire').astype('int8').to_numpy(),
    data.loc[(data.model_type == 'Resnet50 - spatial') & (data.dataset == 'test') & (data.dpi == '300')].result_moire.to_numpy()
)

In [None]:
roc_auc_score(
    (data.loc[(data.model_type == 'Resnet50 - spatial') & (data.dataset == 'test') & (data.dpi == '300')].label == 'moire').astype('int8').to_numpy(),
    data.loc[(data.model_type == 'Resnet50 - spatial') & (data.dataset == 'test') & (data.dpi == '300')].result_moire.to_numpy()
)

In [None]:
roc_auc_score(
    (compare_model_results.loc[
        (compare_model_results.model_type == 'Resnet50') &
        (compare_model_results.dataset == 'test') &
        (compare_model_results.dpi == '300')
    ].label == 'moire').astype('int8').to_numpy(),
    compare_model_results.loc[
        (compare_model_results.model_type == 'Resnet50') &
        (compare_model_results.dataset == 'test') &
        (compare_model_results.dpi == '300')
    ].result_moire.to_numpy()
)

In [None]:
fpr, tpr, _ = roc_curve(
    (compare_model_results.loc[
        (compare_model_results.model_type == 'Resnet50') &
        (compare_model_results.dataset == 'test') &
        (compare_model_results.dpi == '300')
    ].label == 'moire').astype('int8').to_numpy(),
    compare_model_results.loc[
        (compare_model_results.model_type == 'Resnet50') &
        (compare_model_results.dataset == 'test') &
        (compare_model_results.dpi == '300')
    ].result_moire.to_numpy()
)

In [None]:
px.line(
    x=fpr,
    y=tpr
)

In [None]:
px.line(
    x=fpr,
    y=tpr
)

In [None]:
get_roc_figure(
    compare_model_results.loc[
        compare_model_results.dataset == 'test'
    ]
).write_image( attachment_dir / 'roc_test_ensemble.pdf' )

get_roc_figure(
    compare_model_results.loc[
        compare_model_results.dataset == 'real_val'
    ]
).write_image( attachment_dir / 'roc_real_test_ensemble.pdf' )

In [None]:
get_roc_figure(
    compare_model_results.loc[
        (compare_model_results.dataset == 'test') &
        (compare_model_results.dpi == '300')
    ]
).write_image( attachment_dir / 'roc_test_300_ensemble.pdf' )

get_roc_figure(
    compare_model_results.loc[
        (compare_model_results.dataset == 'real_val') &
        (compare_model_results.dpi == '300')
    ]
).write_image( attachment_dir / 'roc_real_test_300_ensemble.pdf' )

In [None]:
get_roc_figure(
    compare_model_results.loc[
        compare_model_results.dataset == 'test'
    ],
    get_tile_results=True
).write_image( attachment_dir / 'roc_tile_test_ensemble.pdf' )

get_roc_figure(
    compare_model_results.loc[
        compare_model_results.dataset == 'real_val'
    ],
    get_tile_results=True
).write_image( attachment_dir / 'roc_tile_real_test_ensemble.pdf' )

In [None]:
get_roc_figure(
    compare_model_results.loc[
        (compare_model_results.dataset == 'test') &
        (compare_model_results.dpi == '300')
    ],
    get_tile_results=True
).write_image( attachment_dir / 'roc_tile_test_300_ensemble.pdf' )

get_roc_figure(
    compare_model_results.loc[
        (compare_model_results.dataset == 'real_val') &
        (compare_model_results.dpi == '300')
    ],
    get_tile_results=True
).write_image( attachment_dir / 'roc_tile_real_test_300_ensemble.pdf' )

In [None]:
px.scatter(
    x=compare_model_results.result_moire_a,
    y=compare_model_results.result_moire_b,
    color=compare_model_results.result_group,
    height=700,
    width=700,
    labels={
        'x' : 'Moiréwahrscheinlichkeit Resnet50 spatial',
        'y' : 'Moiréwahrscheinlichkeit Resnet50 frequenz'
    }
)

# Rest

In [None]:
grouped_by_tilename = data.loc[
    data.dataset == 'real_val',
    ['tile_name','label','mask_id']
].groupby(['tile_name','label']).count().rename(columns={'mask_id':'total_count'})

grouped_by_tilename.loc[
    :,
    'classification_correct'
] = data.loc[
    data.dataset == 'real_val',
    ['tile_name','label','classification_correct']
].groupby(['tile_name','label']).sum().classification_correct

# Auswertung nach erkannten Kacheln

In [None]:
dataset = 'real_val'

tile_names = data.loc[
    data.dataset == dataset
].tile_name.unique()

tile_result_frame = data.loc[
    data.dataset == dataset,
    ['tile_name','model_name','classification_correct']
].set_index(['tile_name','model_name']).unstack('tile_name')
tile_result_frame.columns = [c[1] for c in tile_result_frame.columns]

tile_result_frame = tile_result_frame.T

In [None]:
from PIL import Image

In [None]:
n = 2

In [None]:
grouped_models = data.loc[
    (data.dataset == 'real_val') &
    (data.model_name.isin(["2024-05-10_Resnet50_004","2024-05-13_Resnet50_004"])),
    ['tile_name','result_moire','result_no_moire']
].groupby('tile_name').max()

grouped_models.loc[
    :,
    'label'
] = data.loc[
    (data.dataset == 'real_val') &
    (data.model_name.isin(["2024-05-10_Resnet50_004","2024-05-13_Resnet50_004"])),
    ['tile_name','label']
].groupby('tile_name').first().label

grouped_models.loc[
    grouped_models.result_moire > grouped_models.result_no_moire,
    'predicted_label'
] = 'moire'
grouped_models.predicted_label.fillna("no_moire", inplace=True)
grouped_models.loc[:,'classification_correct'] = grouped_models.label == grouped_models.predicted_label

In [None]:
tile_result_frame.loc[:,'2024-05-10_Resnet50_004'].sum(), tile_result_frame.loc[:,'2024-05-13_Resnet50_004'].sum()

In [None]:
tile_result_frame.shape

In [None]:
grouped_models.classification_correct.sum()

In [None]:
def get_model_confusion_data( model_a, model_b ):
    results = []

    for result_a in [True,False]:
        row = []
        results.append(row)
        
        for result_b in [True,False]:
            row.append(
                tile_result_frame.T.loc[
                    (tile_result_frame.T[model_a] == result_a) &
                    (tile_result_frame.T[model_b] == result_b)
                ].shape[0]
            )

    return pd.DataFrame(
        results,
        index=[True,False],
        columns=[True,False]
    )

def get_model_confusion_matrix( model_a, model_b, is_relative=False ):
    data = get_model_confusion_data( model_a, model_b )
    
    if is_relative:
        data = (data / data.sum().sum()).round(3)
    
    return px.imshow(
        data,
        text_auto=True,
        labels={
            'y' : model_a,
            'x' : model_b
        }
    )

In [None]:
get_model_confusion_matrix(
    '2024-05-10_Resnet50_004',
    '2024-05-13_Resnet50_004',
    is_relative=True
)

In [None]:
data_selection = data.loc[
    (data.tile_name.isin(
        tile_result_frame.loc[
            (tile_result_frame['2024-05-10_Resnet50_004'] == False) &
            (tile_result_frame['2024-05-13_Resnet50_004'])
        ].index
    )) &
    (data.model_name.isin(['2024-05-10_Resnet50_004','2024-05-13_Resnet50_004'])),
    ['tile_name','model_name','result_moire']
].set_index(['tile_name','model_name']).unstack('model_name')
data_selection.columns = [c[1] for c in data_selection.columns]

data_selection

In [None]:
px.scatter(data_selection.iloc[:250])

# Rest

In [None]:
get_tile_result_grid(
    data.loc[
        (data.label == 'moire') &
        (data.predicted_label == 'no_moire') &
        (data.dataset == 'real_val')
    ].sample(frac=1).iloc[:25],
)

In [None]:
result

In [None]:
result = data.iloc[0]

In [None]:
px.line(
    data.loc[
        (data.label == 'moire') &
        (data.predicted_label == 'no_moire') &
        (data.dataset == 'train')
    ].sort_values('result_moire').reset_index().result_moire
)

In [None]:
data.loc[
    (data.label == 'moire') &
    (data.predicted_label == 'moire') &
    (data.dataset == 'train')
].sort_values('result_moire', ascending=False).iloc[:50]

In [None]:
get_tile_result_grid(
    data.loc[
        (data.label == 'moire') &
        (data.predicted_label == 'moire') &
        (data.dataset == 'train')
    ].sort_values('result_moire', ascending=False).iloc[:50],
)

In [None]:
def get_tile_result_grid( data, img_size=224, col_count=5, label_type='both' ):
    label_names = ['moire','no_moire']
    row_count = math.ceil(data.shape[0] / col_count)

    out_img = Image.new(
        'RGB',
        (img_size * col_count, img_size * row_count),
        color="white"
    )
    draw = ImageDraw.Draw(out_img)  
    label_color=[
        (0,200,0),
        (200,0,50)
    ]

    for i in range(data.shape[0]):
        row = i // col_count
        col = i % col_count

        result = data.iloc[i]
        tile_path = dotenv['TILE_DATASET_DIR'] / dataset_name / result.dataset / result.label / result.tile_name
        tile_img = Image.open( tile_path )
        out_img.paste(
            tile_img,
            ( col * tile_img.size[0], row * tile_img.size[1] )
        )
        draw.rectangle(
            (
                ( col * tile_img.size[0], row * tile_img.size[1] + round(img_size * 0.9) ),
                ( col * tile_img.size[0] + img_size, row * tile_img.size[1] + img_size )
            ),
            outline=None,
            fill=label_color[0] if result.classification_correct else label_color[1]
        )

        if label_type == 'label_name':
            label_text = f'{ result.predicted_label }/{ result.label }'
        elif label_type == 'moire_value':
            label_text = str( round(result.result_moire, 5) )
        else:
            label_text = f'{ result.predicted_label }/{ result.label } - {round(result.result_moire, 5)}'
        
        draw.text(
            ( col * tile_img.size[0] + round(img_size * 0.5), row * tile_img.size[1] + round(img_size * 0.97) ),
            label_text,
            anchor='ms',
            font_size=15
        )
    
    return out_img

In [None]:
results = []

for tile_path in tqdm(tile_paths):
    tile = Image.open(tile_path)
    transform = transforms.Compose([transforms.PILToTensor()])

    tensor = transform(tile) / 255
    tensor = tensor.reshape((1,3,224,224))
    with torch.no_grad():
        pred = model(tensor)

    results.append((
        tile_path,
        pred,
        int(torch.argmax(pred))
    ))

In [None]:
font = ImageFont.load_default()

In [None]:
IMG_SIZE = 224

label_names = ['moire','no_moire']
grid_size = (5,5)

offset = 0

while offset < len(results):
    out_img = Image.new(
        'RGB',
        (IMG_SIZE * grid_size[0], IMG_SIZE * grid_size[1]),
        color="white"
    )
    draw = ImageDraw.Draw(out_img)  
    label_color=[
        (0,200,0),
        (200,0,50)
    ]
    
    for y in range(grid_size[1]):
        for x in range(grid_size[0]):
            i = y * grid_size[1] + x
    
            if len(results) > offset + i:
                result = results[offset+i]
                label = result[0].parent.name
                target_label = label_names[result[2]]
                
                tile_img = Image.open( result[0] )
                out_img.paste(
                    tile_img,
                    ( x * tile_img.size[0], y * tile_img.size[1] )
                )
                draw.rectangle(
                    (
                        ( x * tile_img.size[0], y * tile_img.size[1] + round(IMG_SIZE * 0.9) ),
                        ( x * tile_img.size[0] + IMG_SIZE, y * tile_img.size[1] + IMG_SIZE )
                    ),
                    outline=None,
                    fill=label_color[0] if label == target_label else label_color[1]
                )
                draw.text(
                    ( x * tile_img.size[0] + round(IMG_SIZE * 0.5), y * tile_img.size[1] + round(IMG_SIZE * 0.97) ),
                    f'{ label }/{ target_label }',
                    anchor='ms',
                    font_size=15
                )
                
    offset += 50
    out_img.save( f'./tile_grid_{ (str(offset)).zfill(3) }.jpg' )

out_img

In [None]:
results