In [None]:
import sys
sys.path.append('../30_data_tools/')
sys.path.append('../60_Code/Model/')

import sqlite3
import pandas as pd
from tqdm.auto import tqdm
from helper import load_dotenv
from file_interaction import get_related_filepath, open_img
from classification_tools import preclassifier, cut_img_into_tiles
import plotly.express as px

In [None]:
from load_constants import load_colors

In [None]:
colors = load_colors()

In [None]:
dotenv = load_dotenv()

In [None]:
with sqlite3.connect( dotenv['DB_PATH'] ) as con:
    data = pd.read_sql(
        '''
            SELECT * FROM related_file rf 
            WHERE job='24-03-05-01_randomTrainPages' AND variant_name = 'ps2400dpi150lpi' AND "type" = '4c_600' 
        ''',
        con
    )

In [None]:
sample = data.sample(n=250)

In [None]:
out = []

for i in tqdm(range(sample.shape[0])):
    row = sample.iloc[i]

    filepath = get_related_filepath(
        row.job,
        row.variant_name,
        f'{ row.pdf_filename }.{ row.type }.jpg'
    )
    img = open_img( filepath )
    tiles = cut_img_into_tiles( img, 600, [300] )
    tiles_filtered = preclassifier( tiles )

    out.append((
        row.name,
        len(tiles),
        len(tiles_filtered)
    ))

In [None]:
results = pd.DataFrame(out, columns=['data_idx','tiles_found','tiles_filtered'])
results.loc[
    :,
    'filtered_share'
] = 1 - (results.tiles_filtered / results.tiles_found)

results

In [None]:
fig = px.bar(
    results.filtered_share,
    color_discrete_sequence=colors['COLOR_SEQUENCE'][2:],
    labels={
        'index' : 'geprüfte Seiten',
        'value' : 'Anteil entfernter Kacheln'
    },
    width=900,
    height=300
)
fig.add_hline(y=results.filtered_share.mean(), line_color=colors['COLOR_SEQUENCE'][0])
fig.add_hline(y=results.filtered_share.mean() - results.filtered_share.std(), line_dash="dash", line_color=colors['COLOR_SEQUENCE'][0])
fig.add_hline(y=results.filtered_share.mean() + results.filtered_share.std(), line_dash="dash", line_color=colors['COLOR_SEQUENCE'][0])

fig.update_layout(yaxis_range=[0,1])
fig.update_layout(showlegend=False)
fig.update_xaxes(showticklabels=False)

fig.write_image( dotenv['ATTACHMENT_DIR'] / 'klassifizierungsmodell' / 'anteil_gefilterter_kacheln.pdf' )
fig.write_image( dotenv['ATTACHMENT_DIR'] / 'klassifizierungsmodell' / 'anteil_gefilterter_kacheln.jpg' )

fig

In [None]:
results.filtered_share.mean(), results.filtered_share.std()