In [None]:
# import dependencies
import os
import git
import numpy as np
import pandas as pd
import seaborn as sns
import math
import json
import matplotlib.pyplot as plt


def get_git_root(path):
        git_repo = git.Repo(path, search_parent_directories=True)
        git_root = git_repo.git.rev_parse("--show-toplevel")
        return git_root

os.chdir(get_git_root("."))

In [None]:
# configuration of runs

ENTITY="simonluder"
PROJECT="MSE_P7"
ARTIFACT = "validation_results"

RUNS = [
    "2D_GeoShape_32_linear_tabular_1704819570",
    "2D_GeoShape_32_linear_cnn_image_1704819570",
    "2D_GeoShape_32_linear_clip_text_1704819570",
    "2D_GeoShape_32_linear_clip_image_1704819570",
    "2D_GeoShape_64_linear_tabular_1705056127",
    "2D_GeoShape_64_linear_cnn_image_1705051540",
    "2D_GeoShape_64_linear_clip_text_1705056262",
    "2D_GeoShape_64_linear_clip_image_1705056262", 
    "2D_GeoShape_sub100_32_linear_tabular_1705411529", 
    "2D_GeoShape_sub100_32_linear_cnn_image_1705405821", 
    "2D_GeoShape_sub100_32_linear_clip_text_1705410786",
    "2D_GeoShape_sub100_32_linear_clip_image_1705569629"
    
    ]

LOG_DIR = "runs/"

download = True

color_palette = {"clip_text": "#7852A9", "clip_image":"#80de81", "tabular": "#4285c6", "cnn_image":"#8FD4CB"}

In [None]:
# functions

def get_config(filepath):

    with open(filepath, 'r') as f:
        return json.load(f)
       

def get_metrics(filepath):

    with open(filepath, 'r') as f:
        data = json.load(f)

    data_train = []
    data_val = []
    data_test = []

    for entry in data:

        entry_train = entry.get("train")
        entry_val = entry.get("val")
        entry_test = entry.get("test")

        if entry_train:
            data_train.append(entry_train)

        if entry_val:
            data_val.append(entry_val)

        if entry_test:
            data_test.append(entry_test)

    return data_train, data_val, data_test

def postprocess_df_val(df_val):
    df = df_val.explode('samples')
    df = df.reset_index(drop=True)
    df_samples = pd.json_normalize(df["samples"])
    df = pd.concat([df.drop(columns=['samples']), df_samples], axis=1)

    df["path_original"] = df["path_original"].str.replace("/workspace", ".")
    df.replace([np.inf, -np.inf], np.nan, inplace=True)

    return df

def postprocess(df):
    
    pattern = r'(clip_text|clip_image|tabular|cnn_image)'
    df['encoder'] = df['run'].str.extract(pattern, expand=False)

    pattern = r'(_32_|_64_)'
    df['image_size'] = df['run'].str.extract(pattern, expand=False).str.replace("_", "").astype(int)

    pattern = r'(sub100)'
    df['subset'] = df['run'].str.extract(pattern, expand=False).str.replace("sub", "")
    df.loc[df['subset'].isna(), "subset"] = "1000"
    df['subset'] = df['subset'].astype(int)
    return df


In [None]:
# load training datasets

ds_path = "./data"

# load dataset
datasets = list()
for dataset in os.listdir(ds_path):
    dataset_path = os.path.join(ds_path, dataset, "labels.csv")
    datasets.append(pd.read_csv(dataset_path))
df_datasets = pd.concat(datasets)


# load config
configs = list()
for run in RUNS:
    config = get_config(filepath = f"runs/{run}/config.json")
    configs.append(config)

df_config = pd.DataFrame.from_records(configs)
df_config["test_images"] = df_config["test_images"].str.replace("/workspace", ".")
df_config["test_labels"] = df_config["test_labels"].str.replace("/workspace", ".")


# load metrics
df_train_list = []
df_val_list = []
df_test_list = []
for run in RUNS:

    # load jsons
    data_train, data_val, data_test = get_metrics( filepath = f"runs/{run}/metrics.json" )

    df_train = pd.DataFrame.from_records(data_train)
    df_val = pd.DataFrame.from_records(data_val)
    df_test = pd.DataFrame.from_records(data_test)


    df_train["run"] = run
    df_val["run"] = run
    df_test["run"] = run

    # postprocessing
    if len(df_val):
        df_val = postprocess_df_val(df_val)

    # postprocessing
    if len(df_test):
        df_test = postprocess_df_val(df_test)

    df_train = postprocess(df_train)
    df_val = postprocess(df_val)
    df_test = postprocess(df_test)

    df_train_list.append(df_train)
    df_val_list.append(df_val)
    df_test_list.append(df_test)

df_train = pd.concat(df_train_list)
df_val = pd.concat(df_val_list)
df_test = pd.concat(df_test_list)


In [None]:
from torch.utils.data import DataLoader
import torch
from PIL import Image
from utils.metrics import iou_pytorch, batch_center_of_mass, max_diameter_and_angle, center_shapes
import torchvision
import torch.nn.functional as F
from utils.metrics import min_angle_distance, max_diameter_and_angle, contour_length

image_channels = 1
batch_size = 8

if image_channels == 3:
    transforms = torchvision.transforms.Compose([
            torchvision.transforms.ToTensor(),
            torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
            ])
elif image_channels == 1:
    transforms = torchvision.transforms.Compose([
        torchvision.transforms.Grayscale(num_output_channels=1),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize((0.5), (0.5))
        ])
    

        
class ImageImageDataset(torch.utils.data.Dataset):
    """
    A PyTorch Dataset class to be used in a PyTorch DataLoader to create batches of Images as tensor and Images raw.
    The feature vectors have been one-hot encoded for categorical values and column-wise normalized.
    
    Attributes:
        transform (callable, optional): Optional transform to be applied on a sample.
        image_files (list): List of image file paths.
        captions (list): raw image.
    """
    def __init__(self, df, transform=None):
        self.transform = transform
        self.image_files1 = df["path_original"].tolist()
        self.image_files2 = df["path_generated"].tolist()

       
    def __len__(self):
        return len(self.image_files1)

    def __getitem__(self, idx):
        image1 = Image.open(self.image_files1[idx])
        image2 = Image.open(self.image_files2[idx])

        if self.transform:
            image1 = self.transform(image1)
        if self.transform:
            image2 = self.transform(image2)
            
        return image1, image2, self.image_files1[idx]
    

df = df_val.copy()
df = df.loc[df["epoch"]==925]
df = df.loc[df["run"]=="2D_GeoShape_32_linear_cnn_image_1704819570"]


dataset = ImageImageDataset(df=df, transform=transforms)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

images1, images2, label = next(iter(dataloader))

images1 = (images1 + 1) / 2
images2 = (images2 + 1) / 2

images1 = images1 > 0.5
images2 = images2 > 0.5

images1 = center_shapes(images1, threshold=0.5)
images2 = center_shapes(images2, threshold=0.5)


ious = iou_pytorch(images1, images2)

fig, ax =  plt.subplots(2, math.ceil(len(images1)/2), figsize=(14,6))

for i, (img1, img2, iou) in enumerate(zip(images1, images2, ious)):

    
    max_diameter1, max_angle1, pos1 = max_diameter_and_angle(img1)
    max_diameter2, max_angle2, pos2 = max_diameter_and_angle(img2)
    c1 = contour_length(img1)
    c2 = contour_length(img2)

    print(pos1)
    absolute_angle_diff = min_angle_distance(max_angle1, max_angle2)
    absolute_diameter_diff = abs(max_diameter1 - max_diameter2)
    absolute_contour_diff = abs(c1 - c2)
 

    print(absolute_angle_diff, absolute_diameter_diff, absolute_contour_diff)
   
    print(label[i].split('/')[-1])

    image = np.zeros((img1.shape[1], img1.shape[2], 3))
    image[:,:,0] = img1[0,:,:]
    image[:,:,1] = img2[0,:,:]
    ax[i%2, i//2].imshow(image)
    ax[i%2, i//2].set_title(f"IoU: {iou:.3f}")
#    ax[0].imshow(img1[0,:,:], cmap="gray")
#    ax[1].imshow(img2[0,:,:], cmap="gray")
   
plt.suptitle("Visual overlap of generated shapes (red) and the corresponding ground truth (green).\nThe intersection ot both shapes is yellow.")
plt.tight_layout()
plt.show()


### L2 Center Distance

In [None]:
import cv2
import numpy as np

idx = 520


img1 = cv2.imread(df_val["path_generated"].iat[idx])
img2 = cv2.imread(df_val["path_original"].iat[idx])

sample_name = df_val["path_generated"].iat[idx].split("/")[-1]

max_diameter1, max_angle1, pos1 = max_diameter_and_angle(img1)
c1 = contour_length(img1)

max_diameter2, max_angle2, pos2 = max_diameter_and_angle(img2)
c2 = contour_length(img2)

absolute_angle_diff = min_angle_distance(max_angle1, max_angle2)
absolute_diameter_diff = abs(max_diameter1 - max_diameter2)
absolute_contour_diff = abs(c1 - c2)

print(c1, c2)
absolute_angle_diff, absolute_diameter_diff, absolute_contour_diff

### Shape Orientation

In [None]:
fig, ax =  plt.subplots(1, 2, figsize=(10,5))
ax[0].imshow((img1[:,:,::-1]))
for p1 in pos1:
    ax[0].plot(p1[0], p1[1], "-o", label="Diameter")
ax[0].set_title("Generated shape")

ax[1].imshow((img2[:,:,::-1]))
for p2 in pos2:
    ax[1].plot(p2[0], p2[1], "-o")
ax[1].set_title("Ground truth")

plt.text(x=0.5, y=1.1, s="Visual example of the shape diameter", fontsize=18, ha="center", transform=fig.transFigure)
plt.text(x=0.5, y=0.98, s= f"{sample_name}\nγ_diff = {absolute_angle_diff:.2f}, D_diff = {absolute_diameter_diff:.2f}", fontsize=12, ha="center", transform=fig.transFigure)

### Shape contour

In [None]:
# Unpack the points
line1 = [[1,1], [4,4]]
(x1, y1), (x2, y2) = line1


# Calculate the direction of the lines
dx1 = x2 - x1
dy1 = y2 - y1


# Calculate the angle between the lines
angle1 = np.arctan([dy1, dx1])
angle1