In [1]:
import sys
sys.path.append('../../30_data_tools/')

In [2]:
import pandas as pd
import plotly.express as px
from datetime import datetime
from scipy.stats import chisquare
from sklearn import preprocessing

In [3]:
from PIL import Image
import numpy as np

In [4]:
from helper import load_dotenv

In [5]:
dotenv = load_dotenv()

In [7]:
def load_data( pkl_path ):
    data = pd.read_pickle(pkl_path)
    data.loc[
        :,
        'timestamp'
    ] = datetime.fromtimestamp( int(pkl_path.name.strip(pkl_path.suffix)) )

    return data

In [8]:
data = pd.concat([load_data(pkl_path) for pkl_path in pkls])

# allgemeine Zusammenhänge

In [None]:
data.iloc[0]

In [None]:
data.columns

In [None]:
data.ssim.describe()

In [None]:
px.scatter(
    data.ssim.sort_values().reset_index().loc[:,'ssim']
)

In [None]:
data.shape

In [None]:
data.loc[
    :,
    [c for c in data.columns if c.startswith('use_')]
].sum()

In [None]:
data.loc[
    :,
    'area'
] = data.bbox.apply(lambda val: val[2] * val[3])

In [None]:
data.loc[
    :,
    ['ssim','area']
].corr()

In [None]:
df= data.loc[
    :,
    'ssim'
].to_frame()

df.loc[
    :,
    'pattern'
] = pd.factorize(data.pattern)[0]

In [None]:
df.corr('spearman')

In [None]:
df.corr('pearson')

# Zusammehang: ssim und Flächedeckung

In [None]:
def get_edge_share( row, edge_limit ):
    img = Image.open( row.img_path ).crop((
        row.bbox[0],
        row.bbox[1],
        row.bbox[0] + row.bbox[2],
        row.bbox[1] + row.bbox[3]
    ))

    k_separation = np.array(img)[:,:,3]
    relevant_part = k_separation[(k_separation < edge_limit) | (k_separation > (255 - edge_limit))]

    return relevant_part.shape[0] / (k_separation.shape[0] * k_separation.shape[1])

In [None]:
data.loc[
    :,
    'edge_share'
] = data.apply( get_edge_share, args=(10,), axis=1 )

In [None]:
edge_share = data.edge_share #returns a numpy array
edge_share_normalized = preprocessing.MinMaxScaler().fit_transform(edge_share.to_numpy().reshape((-1,1)))

ssim = data.ssim #returns a numpy array
ssim_normalized = preprocessing.MinMaxScaler().fit_transform(ssim.to_numpy().reshape((-1,1)))

In [None]:
data.loc[
    :,
    'edge_share_normalized'
] = edge_share_normalized

data.loc[
    :,
    'ssim_normalized'
] = ssim_normalized

In [None]:
data.loc[
    :,
    ['ssim','edge_share']
].corr()

In [None]:
data.loc[
    :,
    ['ssim_normalized','edge_share_normalized']
].corr()

In [None]:
data.iloc[0].img_path