In [None]:
import sys 
import typing as t
from collections import namedtuple

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams.update({'figure.max_open_warning': 0})
from skimage.feature import greycomatrix, greycoprops
from scipy.stats import ks_2samp

# add src to path
sys.path.append('../..')
from utils.db_helper import get_image_data
from utils.color_space_operations import all_colorspaces_from_rgb, comatrix_from_image

In [None]:
SOURCE_DIR_STYLE = R"F:\master-thesis-databases\classification_db\fake"
SOURCE_DIR_FFHQ = R"F:\master-thesis-databases\classification_db\real"


In [None]:
GRAYSCALE=False
base_save_path = os.path.join("..", "..", "..", "exp", "Comatrix")
DISTANCES = [1,2]
ANGLES = [0, np.pi/4, np.pi/2, 3*np.pi/4]


In [None]:
# def comatrix_from_image(np_img: np.ndarray, distances: t.List[int], angles: t.List[float])-> np.ndarray:
#     if len(np_img.shape) == 3:
#          np_comatrix = np.empty((256,256,np_img.shape[-1],len(distances),len(angles)))
#          for i in range(np_img.shape[-1]):
#             np_comatrix[:,:,i] = greycomatrix(np_img[:,:,i], distances, angles)
#     else:
#         raise ValueError('Image array must have 3 dimensions')
#     return np_comatrix

In [None]:
def comatrix_from_dataset(src_path: str, distances: t.List[int], angles: t.List[float], grayscale=False)-> np.ndarray:
    print("Loading dataset...")
    dataset_gen = get_image_data(src_path, type='int', grayscale=grayscale, max_number_of_images=100)
    final_type = np.float64
    print("Processing database...")
    np_comatrix_dataset = np.zeros((256,256,9,len(distances),len(angles)), 
                                   dtype=final_type)
    number_of_images = 0
    for np_image in dataset_gen:
        np_image = all_colorspaces_from_rgb(np_image, type='int')
        np_tmp_comatrix = comatrix_from_image(np_image, distances, angles)
        np_comatrix_dataset += np_tmp_comatrix
        number_of_images +=1
        if number_of_images % 100 == 0:
            print(f"Image number: {number_of_images}")
        elif np_comatrix_dataset.max() > 0.95*np.finfo(final_type).max:
            raise ValueError("Dataset to large, datatype overflow")
            break
    print (f"{number_of_images} processed from path: {src_path}")
    return np_comatrix_dataset / number_of_images

In [None]:
# generate comatrixes
np_comatrix_ffhq = comatrix_from_dataset(SOURCE_DIR_FFHQ, DISTANCES, ANGLES, grayscale=GRAYSCALE)
np_comatrix_style = comatrix_from_dataset(SOURCE_DIR_STYLE, DISTANCES, ANGLES, grayscale=GRAYSCALE)

## Comatrix

In [None]:
if GRAYSCALE:
    color_channels = {
    "0": 'Gray',
    }
else:
    color_channels = {
        "0": 'R',
        "1": 'G',
        '2': 'B',
        '3': 'H',
        '4': 'S',
        '5': 'V',
        '6': 'Y',
        '7': 'Cb',
        '8': 'Cr'
    }
    
datasets = {
    'real': np_comatrix_ffhq,
    'fake': np_comatrix_style
}

### Plot comatrixes

In [None]:
i=0
for distance in DISTANCES:
    j=0
    for angle in ANGLES:
        fig, axs = plt.subplots(nrows=int(len(datasets)*len(color_channels)/3), ncols=3, figsize=(15,15))
        fig.suptitle(f'Comatrixes for distance: {distance} and angle: {angle} rads')
        plt.subplots_adjust(hspace=0.5)
        row=0
        dataset_nr=0
        for name, dataset in datasets.items():
            # if len(color_channels) > 1:
            for col in range(len(color_channels)):
                if col%3 == 0 and col>0:
                    row+=1
                axs[row+dataset_nr,col%3].imshow(dataset[:,:,col,i,j])
                axs[row+dataset_nr,col%3].set_title(f"{name}_{color_channels[str(col)]}")
            dataset_nr+=1
            
        #saving image
        filename = f"{distance}_{angle}.png"
        save_path = os.path.join(base_save_path, filename)
        plt.show()
        #fig.savefig(save_path)
        j+=1
    i+=1

# Greycoprops

## Based on avg comatrix from datasets

In [None]:
save_path = os.path.join(base_save_path, "greycoprops.csv")
greycoprops_properties = ("contrast", "dissimilarity", "homogeneity", "ASM", "energy", "correlation")
df_metrics = pd.DataFrame(columns=['Property', 'Color', 'Color_number', 'Angle', 'Distance', 'Value_fake', 'Value_real', 'Diff_Value'])

In [None]:
index=0
for prop in greycoprops_properties:
    for i in range(len(color_channels)):
        result_real = greycoprops(np_comatrix_ffhq[:,:,i,:,:], prop=prop)
        result_fake = greycoprops(np_comatrix_style[:,:,i,:,:], prop=prop)
        diff = np.abs((result_real-result_fake)/(result_real+result_fake))
        dist, angle = np.unravel_index(np.argmax(diff), diff.shape)
        df_metrics.loc[index] = [prop, color_channels[str(i)], i, ANGLES[angle], DISTANCES[dist], result_real[dist, angle], result_fake[dist, angle], np.max(diff)]
        index+=1

In [None]:
df_metrics = df_metrics.sort_values('Diff_Value', ascending=False)
#df_metrics.to_csv(save_path)
df_metrics.head()

## Compare values of greycoprops for single images

In [None]:
rows_to_analyze = 4
number_of_images = 100
save_path = os.path.join(base_save_path, "greycoprops_scatter_with_outliers.png")

In [None]:
ffhq_gen = get_image_data(SOURCE_DIR_FFHQ, type='int', grayscale=False, max_number_of_images=number_of_images)
style_gen = get_image_data(SOURCE_DIR_STYLE, type='int', grayscale=False, max_number_of_images=number_of_images)

In [None]:
def greycoprops_from_image(np_img: np.ndarray, distances: t.List[int], angles: t.List[float], prop: str):
    from skimage.feature import greycoprops
    np_comatrix = comatrix_from_image(np_img, distances, angles)
    if len(np_img.shape) == 3:
        np_result = np.empty((np_img.shape[-1], len(distances), len(angles)))
        for i in range(np_img.shape[-1]):
            np_result[i] = greycoprops(np_comatrix[:,:,i,:,:], prop=prop)
    else:
        raise ValueError ("Image shape not supported")

    return np_result

In [None]:
def get_greycoprops_from_dataset(src_path: str, distance: t.List[int], angle: t.List[float], color: int, prop: str, number_of_images):
    dataset_gen = get_image_data(SOURCE_DIR_FFHQ, type='int', grayscale=False, max_number_of_images=number_of_images)
    results = []
    distance = [distance] if type(distance) is not list else distance
    angle = [angle] if type(angle) is not list else angle
    for np_img in dataset_gen:
        np_all_img = all_colorspaces_from_rgb(np_img, type='int')
        greycoprop = greycoprops_from_image(np_all_img, distance, angle, prop=prop)[color,0,0]
        results.append(greycoprop)
    return results

In [None]:
def delete_max_outliers(np_array: np.ndarray, percentage=0.90)-> np.ndarray:
    np_result = np.sort(np_array)[::-1]
    trim_threshold = int(percentage*np_result.size)
    np_result = np_result[:trim_threshold]
    np.random.shuffle(np_result)
    return np_result

In [None]:
# calculate greycoprops properties for both real and fake dataset
real = df_metrics[:rows_to_analyze].apply(lambda x: get_greycoprops_from_dataset(ffhq_gen, x['Distance'], x['Angle'], x['Color_number'], x['Property'], number_of_images), axis=1)
fake = df_metrics[:rows_to_analyze].apply(lambda x: get_greycoprops_from_dataset(style_gen, x['Distance'], x['Angle'], x['Color_number'], x['Property'], number_of_images), axis=1)

In [None]:
x = range(int(number_of_images))
fig, axs = plt.subplots(nrows=2, ncols=2, figsize=(15,7))
fig.suptitle("Greycoprops from datasets as plot")
for i in range(rows_to_analyze):
    axs[int(i/2), i%2].scatter(x,delete_max_outliers(real.values[i], percentage=1.0))
    axs[int(i/2), i%2].scatter(x,delete_max_outliers(fake.values[i], percentage=1.0))
    axs[int(i/2), i%2].legend(['real', 'fake'])
    row = df_metrics.loc[real.keys()[i]]
    axs[int(i/2), i%2].set_title(f"Property: {row['Property']}, Distance: {row['Distance']}, Angle: {row['Angle']}, Color: {row['Color']}")
plt.savefig(save_path)

## Kolmogorov-Smirnoff test

In [None]:
save_path = os.path.join(base_save_path, "greycoprops+kstest.csv")

In [None]:
# conduct Kormognov-Smirnoff
np_results = np.empty((rows_to_analyze, 2))
for i in range(rows_to_analyze):
    single_ks_test =  ks_2samp(real.iloc[i], fake.iloc[i])
    metrics_row = df_metrics.loc[real.keys()[i]]
    np_results[i] = single_ks_test

In [None]:
# add to datagram with results and 
df_results = df_metrics.iloc[:rows_to_analyze]
df_results.insert(len(df_metrics.columns), "Statistics",np_results[:,0])
df_results.insert(len(df_metrics.columns), "pValue",np_results[:,1])
df_results.to_csv(save_path)
df_results