In [None]:
import typing as t

import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt 
from scipy.ndimage.filters import convolve
from scipy.stats import ks_2samp

# add src to path
sys.path.append('../..')
from utils.db_helper import get_image_data
from utils.color_space_operations import get_difference_img_gen, calculate_difference_image

In [None]:
GRAYSCALE=False
base_save_path=base_save_path = os.path.join("..", "..", "..", "exp", "Histograms")


In [None]:
SOURCE_DIR_STYLE = R"F:\master-thesis-databases\classification_db\fake"
SOURCE_DIR_FFHQ = R"F:\master-thesis-databases\classification_db\real"

In [None]:
def hist_peek_point(np_img: np.ndarray, bins=256, hist_range=(0,255), channels=9)-> t.Tuple[float, int]:
    peek_points = []
    for colorspace in range(channels):
        np_hist, bins = np.histogram(np_img[:,:,colorspace], density=True, bins=bins, range=hist_range)
        y = np_hist.max()
        idx = np.argwhere(np_hist==y)
        if len(idx)>1:
            idx = int(idx[0])
        else:
            idx = int(idx)
            
        x = int(bins[idx])
        peek_points.append((x,y))
    return peek_points

In [None]:
def peek_points_from_diff_dataset(dataset_gen: t.Generator[np.ndarray, None, None], bins=511, max_number_of_images=10, grayscale=False)-> t.Tuple[t.List[float], t.List[int]]:
    print("Processing database...")
    peek_points, peek_points_y = [], []
    for np_img in dataset_gen:
        x = hist_peek_point(np_img)
        peek_points.append(x)
    return peek_points

# Peek points

In [None]:
color_components = ("R", "G", "B", "H", "S", "V", "Y", "Cb", "Cr")
categories = ['real', 'fake']
GRAYSCALE=False

In [None]:
ffhq_gen = get_image_data(SOURCE_DIR_FFHQ, type='float', grayscale=False, max_number_of_images=100)
style_gen = get_image_data(SOURCE_DIR_STYLE, type='float', grayscale=False, max_number_of_images=100)

FFHQ_peek_points = peek_points_from_diff_dataset(get_difference_img_gen(ffhq_gen), max_number_of_images=100, grayscale=GRAYSCALE)
Style_peek_points = peek_points_from_diff_dataset(get_difference_img_gen(style_gen), max_number_of_images=100, grayscale=GRAYSCALE)

In [None]:
np_FFHQ_peek_points = np.asarray(FFHQ_peek_points)
np_Style_peek_points = np.asarray(Style_peek_points)

In [None]:
for colorspace in range(len(color_components)):
    fig = plt.figure(figsize=(15,15))
    ax1 = fig.add_subplot(111)

    ax1.scatter(np_FFHQ_peek_points[:,colorspace,0], np_FFHQ_peek_points[:,colorspace,1], color='blue', label=categories[0])
    ax1.scatter(np_Style_peek_points[:,colorspace,0], np_Style_peek_points[:,colorspace,1], color='orange', label=categories[1])
    fig.suptitle(f'Diff histogram peek points comparision for color: {color_components[colorspace]}')
    plt.legend(loc='upper left')
    plt.show()
#fig.savefig('../../../exp/Histograms/Peek_points_comparision_v2.png')

## Kolmogorov-Smirnoff test for color componenets

In [None]:
save_path = os.path.join(base_save_path, "peek_points_diff+kstest.csv")

In [None]:
results = []
for colorspace in range(len(color_components)):
    x_ks_test =  ks_2samp(np_FFHQ_peek_points[:,colorspace, 0], np_Style_peek_points[:,colorspace, 0])
    y_ks_test =  ks_2samp(np_FFHQ_peek_points[:,colorspace, 1], np_Style_peek_points[:,colorspace, 1])
    results.append((color_components[colorspace], x_ks_test, y_ks_test))


In [None]:
df_results = pd.DataFrame(results, columns=["Color_component", "X", "Y"])
# df_results.to_csv(save_path)
df_results