# TSNE

In [1]:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np
import os
import torch.nn.functional as F
import torch

from dataset import Dataset
from initialize import initialize_model, load_config
from sklearn.manifold import TSNE
from torch.utils.data import DataLoader

In [2]:
dataset_config = {
  **load_config("./dataset.cfg"),
  "datasets_dir": "../../../../../datasets"
}

model_config = load_config("./model.cfg")
run_config = load_config("./run.cfg")

device = run_config["device"] 

In [3]:
# Load Split Indexes
train_indexes = np.load("./train_indexes.npy")
test_indexes = np.load("./test_indexes.npy")

print(f"train_indexes ({len(train_indexes)}): {train_indexes}")
print(f"test_indexes ({len(test_indexes)}): {test_indexes}")

train_indexes (240): [ 50 102 134 284  25 252 144 151 242 307 314 198 154 156 177 142 277 231
 207 170  60  79 303 108 173  11 165 220  26 300  56 201 129 114  21 248
 183 163 155 136 159 306 210 313 227 203 181 226 244  54 193 239 276 219
  86  92 130 294 115 139   6 229 111  18 302 272 194  40 288 119  97  43
 271 281  15  51 234 150 103  87  57   9 153 285  59 208 254 310   7 172
 120 240 215 317  46  36 152  76 141 237 273  75  38 184  70 143 223 269
  24  71 291  81 316 216 305  89 233   2 132  10 283  96 218 297 164 188
  14 225 182  49 189 169 251 309 301 308 171 105  72 257  64  67  90 250
 304  78 241 200  33 311 298 299 256  69 107  39 287  13 191 258   1   4
 179  82 185 279 162  31  91  27 278  28 228  41  45 262  93 268 167 101
  65  30 270  98 202 217 124 161 280 290 249 180  66 247 158  37 116 212
 140  16  95 168 253 135  42 109 121 260 197 224 123  61 236 106  63 125
  20 264 266 205 222 199 261  53 209 118 127 149  88 157  62 186 131  84
   8 122 295  19 204  44]
test

In [4]:
EPOCH = "last"

In [5]:
indexes = train_indexes
# indexes = test_indexes

dataset = Dataset(dataset_config, indexes = indexes)
dataloader = DataLoader(dataset, batch_size=1, shuffle=False)

In [8]:
if not os.path.isdir("tsne"):
  os.mkdir("tsne")

checkpoint_path = f"checkpoints/{EPOCH}.pth"
checkpoint = torch.load(checkpoint_path, map_location=torch.device(device))

model = initialize_model(model_config["name"])
print(model)
model.load_state_dict(checkpoint["model_state_dict"])
model.to(device)
model.eval()

# Assuming you have a dataset named 'dataset' and a trained model named 'model'
# Step 1: Extract latent space representations
latent_space = []
labels = []

with torch.no_grad():
  for index, (video, target) in enumerate(dataloader):
    video = video.to(device)
    target = target.to(device)

    output = model(video)

    if model in ["vae", "unet_vae"]:
        output = output[0].detach().cpu()
    else: 
        output = output.detach().cpu()

    output = F.normalize(output, p=2, dim=-1)

    latent_space.append(output.numpy())  # Assuming outputs are numpy arrays
    labels.append(indexes[index])  # Assuming targets are numpy arrays

latent_space = np.concatenate(latent_space, axis=0)
print(latent_space.shape)
# latent_space = latent_space.reshape(latent_space.shape[0], -1)

Model(
  (conv1): Conv3d(200, 100, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
  (conv1_bn): BatchNorm3d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv3d(100, 50, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
  (conv2_bn): BatchNorm3d(50, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv3d(50, 25, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
  (conv3_bn): BatchNorm3d(25, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): Conv3d(25, 12, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
  (conv4_bn): BatchNorm3d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1): Linear(in_features=49152, out_features=6144, bias=True)
  (fc2): Linear(in_features=6144, out_features=1, bias=True)
  (max_pool): MaxPool3d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (relu): ReLU()
)
(240,)


In [9]:
latent_space = latent_space.reshape(latent_space.shape[0], -1)
print(latent_space.shape)

(240, 1)


In [10]:
# Step 2: Reduce dimensionality with t-SNE
tsne = TSNE(n_components=1, perplexity=1, random_state=42)
latent_space_tsne = tsne.fit_transform(latent_space)

# Step 3: Plot the reduced latent space
plt.figure(figsize=(8, 6))

# Create a plot
BUILD_LAYERS = 159
dataset_index = [divmod(layer_index, 159)[0] for layer_index in test_indexes]
dataset_markers = np.array(["o" if index == 0 else "s" for index in dataset_index])
colored_layers = np.array([divmod(layer_index, 159)[1] for layer_index in test_indexes])
cmap = cm.get_cmap("viridis")
# plt.scatter(targets, predictions, c=colored_layers, cmap=cmap, marker=dataset_markers)

unique_markers = np.unique(dataset_markers)  # or yo can use: np.unique(m)

x = latent_space_tsne[:, 0].flatten()
y = latent_space_tsne[:, 1].flatten()

for marker in ["o", "s"]:
    mask = dataset_markers == marker
    # mask is now an array of booleans that can be used for indexing
    label = "Velocity"
    if (marker == "o"):
        label = "Spacing"
    plt.scatter(x[mask], y[mask], marker=marker, c=colored_layers[mask], cmap=cmap, label=label)
plt.colorbar(label='Class')
plt.xlabel('t-SNE Dimension 1')
plt.ylabel('t-SNE Dimension 2')
plt.title(f"t-SNE Plot of Latent Space (Epoch: {EPOCH})")
plt.savefig(f"tsne/{EPOCH}.png")
plt.show()

: 