In [82]:
import numpy as np

In [83]:
test_latent_dir = '/data/scratch/bariskurtkaya/dataset/NIRCAM/1386/models/modelv1/latent_v1/test_374_latent_space.npy'
train_latent_dir = '/data/scratch/bariskurtkaya/dataset/NIRCAM/1386/models/modelv1/latent_v1/train_374_latent_space.npy'

In [84]:
test_latent = np.load(test_latent_dir, allow_pickle=True)

train_latent = np.load(train_latent_dir, allow_pickle=True)

In [85]:
import pandas as pd


def get_latent_vector_matrix(latent):
    encoded_samples = []

    for idx in range(latent.shape[0]):
        vector = np.reshape(latent[idx][0], (-1, 1))
        encoded_sample = {f"Enc. Variable {i}": enc for i, enc in enumerate(vector)}
        encoded_sample['label'] = latent[idx][1].cpu().numpy()[0]
        encoded_samples.append(encoded_sample)

    return pd.DataFrame(encoded_samples)

In [86]:
test_encoded = get_latent_vector_matrix(test_latent)
train_encoded = get_latent_vector_matrix(train_latent)

In [None]:
from sklearn.manifold import TSNE
import plotly.express as px

tsne = TSNE(n_components=2)
tsne_results = tsne.fit_transform(train_encoded.drop(['label'],axis=1))
fig = px.scatter(tsne_results, x=0, y=1,
                 color=train_encoded.label.astype(str),
                 labels={'0': 'tsne-2d-one', '1': 'tsne-2d-two'})
fig.show()

In [None]:
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sn
import pandas as pd

In [None]:
def get_latent_vector_with_labels(latent):
    encoded_samples = []
    
    for _, vector in enumerate(np.array(latent[:, 0])):
        encoded_samples.append(vector)

    return np.array(encoded_samples), np.array(latent[:, 1])

In [None]:
test_encoded_samples, test_labels = get_latent_vector_with_labels(test_latent)
train_encoded_samples, train_labels = get_latent_vector_with_labels(train_latent)

In [None]:
test_encoded_samples.shape, test_labels.shape, train_encoded_samples.shape, train_labels.shape

In [None]:
def metric_calculation(true_positive, true_negative, false_positive, false_negative):
    accuracy = (true_positive + true_negative) / (true_positive + true_negative + false_positive + false_negative)
    precision = true_positive / (true_positive + false_positive)
    recall = true_positive / (true_positive + false_negative)
    f1_score = 2 * precision * recall / (precision + recall)
    return accuracy, precision, recall, f1_score

In [None]:
kmeans = KMeans(n_clusters=2, random_state=0, n_init="auto").fit(train_encoded_samples)

In [None]:
class_first_true = 0
class_first_false = 0

class_second_true = 0
class_second_false = 0 

predictions = kmeans.predict(test_encoded_samples)

# Needed if prediction labels are reversed
labels = np.abs(test_labels - 1)

for idx, prediction in enumerate(predictions):
    if prediction == 0 and prediction == labels[idx]:
        class_first_true += 1
    elif prediction == 0 and prediction != labels[idx]:
        class_first_false += 1
    elif prediction == 1 and prediction == labels[idx]:
        class_second_true += 1
    else:
        class_second_false += 1

accuracy, precision, recall, f1_score = metric_calculation(class_first_true, class_second_true, class_first_false, class_second_false)

In [None]:
f'accuracy: {accuracy}, precision: {precision}, recall: {recall}, f1_score: {f1_score}'

In [None]:
confussion = [[class_first_true, class_first_false], [class_second_false, class_second_true]]

df_cm = pd.DataFrame(confussion)

plt.figure(figsize = (10,7))
sn.heatmap(df_cm, annot=True, fmt='g')

In [None]:
gm = GaussianMixture(n_components=2, random_state=0).fit(train_encoded_samples)

In [None]:
class_first_true = 0
class_first_false = 0

class_second_true = 0
class_second_false = 0 

predictions = gm.predict(test_encoded_samples)

# Needed if prediction labels are reversed
labels = np.abs(test_labels - 1)

for idx, prediction in enumerate(predictions):
    if prediction == 0 and prediction == labels[idx]:
        class_first_true += 1
    elif prediction == 0 and prediction != labels[idx]:
        class_first_false += 1
    elif prediction == 1 and prediction == labels[idx]:
        class_second_true += 1
    else:
        class_second_false += 1

accuracy, precision, recall, f1_score = metric_calculation(class_first_true, class_second_true, class_first_false, class_second_false)

In [None]:
f'accuracy: {accuracy}, precision: {precision}, recall: {recall}, f1_score: {f1_score}'

In [None]:
confussion = [[class_first_true, class_first_false], [class_second_false, class_second_true]]

df_cm = pd.DataFrame(confussion)

plt.figure(figsize = (10,7))
sn.heatmap(df_cm, annot=True, fmt='g')