In [None]:
import torch
import matplotlib.pyplot as plt
import os
import yaml
import sys
import numpy as np
from tqdm import tqdm
sys.path.append('../')
from datamodule.datamodule import select_data
from models.models import Classifier, CooperativeOpticalModelRemote
from scipy.spatial.distance import pdist, squareform

from sklearn import datasets, decomposition
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import umap

In [None]:
print(plt.style.available)
plt.style.use('seaborn-v0_8-whitegrid')

In [None]:
config = yaml.load(open('../../config.yaml', 'r'), Loader=yaml.FullLoader)
config['paths']['path_root'] = '../../'
config['paths']['path_data'] = 'data/baseline'

In [None]:
filenames = [os.path.join(config['paths']['path_root'], config['paths']['path_data'], i) for i in os.listdir(os.path.join(config['paths']['path_root'], config['paths']['path_data']))]
filenames.sort()

In [None]:
bench_images = [torch.load(f, weights_only=True)['bench_image'].squeeze().numpy() for f in tqdm(filenames)]
sim_images = [torch.load(f, weights_only=True)['sim_output'].squeeze().numpy() for f in tqdm(filenames)]
ideal_images = [torch.load(f, weights_only=True)['resampled_sample'].squeeze().numpy() for f in tqdm(filenames)]

targets = [torch.argmax(torch.load(f, weights_only=True)['target']).numpy() for f in tqdm(filenames)]
targets = np.asarray(targets).squeeze()
unique_targets = np.unique(targets)

In [None]:
def create_feature_vectors(classifier, images):
    feature_vectors = []
    for image in tqdm(images):
        image = torch.from_numpy(image).squeeze().unsqueeze(0).unsqueeze(0)
        image = torch.cat([image, image, image], dim=1).double()
        feature_vectors.append(classifier.feature_extractor(image))
    return feature_vectors

In [None]:
# Load in a pretrained classifier
# For every image in a dataset, get the feature embedding
checkpoint_path = '../../results/classifier_baseline_bench_resampled_sample/version_0/checkpoints/last.ckpt'
classifier = Classifier.load_from_checkpoint(checkpoint_path).double().cpu()

with torch.no_grad():
    bench_image_feature_embeddings = create_feature_vectors(classifier, bench_images)
    sim_image_feature_embeddings = create_feature_vectors(classifier, sim_images)
    ideal_image_feature_embeddings = create_feature_vectors(classifier, ideal_images)

In [None]:
np_bench_image_feature_embeddings = np.asarray([i.squeeze() for i in bench_image_feature_embeddings])
np_sim_image_feature_embeddings = np.asarray([i.squeeze() for i in sim_image_feature_embeddings])
np_ideal_image_feature_embeddings = np.asarray([i.squeeze() for i in ideal_image_feature_embeddings])

# PCA

In [None]:
path_pca_results = '../../results/feature_space_analysis/pca'
os.makedirs(path_pca_results, exist_ok=True)

In [None]:
num_components = 2

In [None]:
colors = ['#E8ECFB', '#D9CCE3', '#D1BBD7', '#CAACCB', '#BA8DB4', 
          '#AE76A3', '#AA6F9E', '#994F88', '#882E72', '#1965B0', 
          '#437DBF', '#5289C7', '#6195CF', '#7BAFDE', '#4EB265', 
          '#90C987', '#CAE0AB', '#F7F056', '#F7CB45', '#F6C141', 
          '#F4A736', '#F1932D', '#EE8026', '#E8601C', '#E65518', 
          '#DC050C', '#A5170E', '#72190E', '#42150A']

colors2 = [ '#a6cee3',
            '#1f78b4',
            '#b2df8a',
            '#33a02c',
            '#fb9a99',
            '#e31a1c',
            '#fdbf6f',
            '#ff7f00',
            '#cab2d6',
            '#6a3d9a']

color_indices = [9,10,14,15,17,18,21,24,26,28]
color2_indices = [0,1,2,3,4,5,6,7,8,9]
len(color_indices)

In [None]:
pca = PCA(n_components=2)
pca.fit(np_ideal_image_feature_embeddings)
bench_transform = pca.transform(np_bench_image_feature_embeddings)
ideal_transform = pca.transform(np_ideal_image_feature_embeddings)
sim_transform = pca.transform(np_sim_image_feature_embeddings)

In [None]:
fig,ax = plt.subplots(1,3, figsize=(15,5))

for target in unique_targets:
        indices = np.where(targets == target)[0]
        bench_transform_values = bench_transform[indices]
        ideal_transform_values = ideal_transform[indices]
        sim_transform_values = sim_transform[indices]
        color_idx = color2_indices[target]
        color = colors2[color_idx]
        x_vals = ideal_transform_values[:,0]
        y_vals = ideal_transform_values[:,1]
        ax[0].scatter(x_vals, y_vals, color=color, label = target)
        ax[0].set_title("Ideal image embeddings")
        x_vals = sim_transform_values[:,0]
        y_vals = sim_transform_values[:,1]
        ax[1].scatter(x_vals, y_vals, color=color, label = target)
        ax[1].set_title("Simulated image embeddings")

        x_vals = bench_transform_values[:,0]
        y_vals = bench_transform_values[:,1]
        ax[2].scatter(x_vals, y_vals, color=color, label = target)
        ax[2].set_title("Bench image embeddings")


for ax in ax.flatten():
    ax.set_aspect('equal')
    ax.legend(frameon=True, framealpha=1)
    ax.set_xlim(-5.5, 5.5)
    ax.set_ylim(-5.5, 5.5)
plt.tight_layout()
fig.savefig(os.path.join(path_pca_results, 'pca_embeddings.pdf'))

# TSNE

In [None]:
path_tsne_results = '../../results/feature_space_analysis/tsne'
os.makedirs(path_tsne_results, exist_ok=True)

In [None]:
tsne = TSNE(n_components = 2, learning_rate='auto', init='random', perplexity=50).fit(np_ideal_image_feature_embeddings)

In [None]:

unique_targets = np.unique(targets)
for j in tqdm(range(5,50)):
    plt.close('all')
    tsne = TSNE(n_components = 2, learning_rate='auto', init='random', perplexity=j).fit(np_ideal_image_feature_embeddings)
    fig,ax = plt.subplots(1,1,figsize=(5,5))

    for target in unique_targets:
        indices = np.where(targets == target)[0]
        tsne_vals = tsne[indices]
        x_vals = tsne_vals[:,0]
        y_vals = tsne_vals[:,1]
        color_idx = color_indices[target]
        color = colors[color_idx]
        ax.scatter(x_vals, y_vals, color=color, label = target)
    ax.legend()
    ax.set_title(f"Ideal - Ideal Perplexity = {j:03d}")
    fig.savefig(os.path.join(path_tsne_results, f'ideal_ideal_perplexity_{j:03d}.png'))

In [None]:
os.path.join(path_tsne_results, f"ideal_ideal_perplexity_{j}.png")

In [None]:
targets = np.asarray(targets)
print(targets.shape)

In [None]:
np.where(targets==1)[0]

In [None]:
np.unique(targets)

# UMAP

In [None]:
path_umap_results = '../../results/feature_space_analysis/umap'
os.makedirs(path_umap_results, exist_ok=True)

In [None]:
umap_transform = umap.UMAP(n_neighbors=5, random_state=42).fit(np_ideal_image_feature_embeddings)

In [None]:
ideal_umap = umap_transform.transform(np_ideal_image_feature_embeddings)
bench_umap = umap_transform.transform(np_bench_image_feature_embeddings)
sim_umap = umap_transform.transform(np_sim_image_feature_embeddings)

In [None]:
fig,ax = plt.subplots(1,3, figsize=(15,5))

for target in unique_targets:
        indices = np.where(targets == target)[0]
        bench_transform_values = bench_umap[indices]
        ideal_transform_values = ideal_umap[indices]
        sim_transform_values = sim_umap[indices]
        color_idx = color2_indices[target]
        color = colors2[color_idx]
        x_vals = ideal_transform_values[:,0]
        y_vals = ideal_transform_values[:,1]
        ax[0].scatter(x_vals, y_vals, color=color, label = target, alpha=1)
        ax[0].set_title("Ideal image embeddings")
        x_vals = sim_transform_values[:,0]
        y_vals = sim_transform_values[:,1]
        ax[1].scatter(x_vals, y_vals, color=color, label = target, alpha=1)
        ax[1].set_title("Simulated image embeddings")
        x_vals = bench_transform_values[:,0]
        y_vals = bench_transform_values[:,1]
        ax[2].scatter(x_vals, y_vals, color=color, label = target, alpha=1)
        ax[2].set_title("Bench image embeddings")

for ax in ax.flatten():
    ax.set_aspect('equal')
    ax.legend(frameon=True, framealpha=0.5)
    ax.set_xlim(-10, 20)
    ax.set_ylim(-10, 20)
plt.tight_layout()
fig.savefig(os.path.join(path_umap_results, 'umap_feature_embeddings.pdf'))

# Post training

In [None]:
config = yaml.load(open('../../results/coop_bench_alpha_0.0_beta_0.0_gamma_0.0_delta_1.0/version_2/config.yaml', 'r'), Loader=yaml.FullLoader)

config['paths']['path_data'] = 'data/post_training'
path_pca_results_pt = '../../results/path_pca_results_pt'
os.makedirs(path_pca_results_pt, exist_ok=True)

In [None]:
config['classifier']['checkpoint_path']

In [None]:
filenames = [os.path.join(config['paths']['path_root'], config['paths']['path_data'], i) for i in os.listdir(os.path.join(config['paths']['path_root'], config['paths']['path_data']))]
filenames.sort()

In [None]:
pt_bench_images = [torch.load(f, weights_only=True)['bench_image'].squeeze().detach().numpy() for f in tqdm(filenames)]
pt_sim_images = [torch.load(f, weights_only=True)['sim_output'].squeeze().detach().numpy() for f in tqdm(filenames)]
pt_ideal_images = [torch.load(f, weights_only=True)['resampled_sample'].squeeze().detach().numpy() for f in tqdm(filenames)]

pt_targets = [torch.argmax(torch.load(f, weights_only=True)['target']).numpy() for f in tqdm(filenames)]
pt_targets = np.asarray(targets).squeeze()
pt_unique_targets = np.unique(targets)

In [None]:
# Load in a pretrained classifier
# For every image in a dataset, get the feature embedding
checkpoint_path = '../../results/classifier_baseline_bench_resampled_sample/version_0/checkpoints/last.ckpt'
classifier = Classifier.load_from_checkpoint(checkpoint_path).double().cpu()

with torch.no_grad():
    pt_bench_image_feature_embeddings = create_feature_vectors(classifier, bench_images)
    pt_sim_image_feature_embeddings = create_feature_vectors(classifier, sim_images)


In [None]:
pt_np_bench_image_feature_embeddings = np.asarray([i.squeeze() for i in pt_bench_image_feature_embeddings])
pt_np_sim_image_feature_embeddings = np.asarray([i.squeeze() for i in pt_sim_image_feature_embeddings])

In [None]:
pt_bench_transform = pca.transform(pt_np_bench_image_feature_embeddings)
pt_sim_transform = pca.transform(pt_np_sim_image_feature_embeddings)

In [None]:
fig,ax = plt.subplots(1,3, figsize=(15,5))

for target in unique_targets:
        pt_indices = np.where(pt_targets == target)[0]
        indices = np.where(targets == target)[0]
        bench_transform_values = pt_bench_transform[pt_indices]
        ideal_transform_values = ideal_transform[indices]
        sim_transform_values = pt_sim_transform[pt_indices]
        color_idx = color2_indices[target]
        color = colors2[color_idx]
        x_vals = ideal_transform_values[:,0]
        y_vals = ideal_transform_values[:,1]
        ax[0].scatter(x_vals, y_vals, color=color, label = target)
        ax[0].set_title("Ideal image embeddings")
        x_vals = sim_transform_values[:,0]
        y_vals = sim_transform_values[:,1]
        ax[1].scatter(x_vals, y_vals, color=color, label = target)
        ax[1].set_title("Simulated image embeddings")

        x_vals = bench_transform_values[:,0]
        y_vals = bench_transform_values[:,1]
        ax[2].scatter(x_vals, y_vals, color=color, label = target)
        ax[2].set_title("Bench image embeddings")


for ax in ax.flatten():
    ax.set_aspect('equal')
    ax.legend(frameon=True, framealpha=1)
    ax.set_xlim(-5.5, 5.5)
    ax.set_ylim(-5.5, 5.5)
plt.tight_layout()
fig.savefig(os.path.join(path_pca_results_pt, 'pca_embeddings.pdf'))