# TensorBoard clustering
### Here embeddings needed to calculate t-SNE, PCA and UMAP in TB are created
- Later code from this notebook will be integrated to `test_model.ipynb`

In [1]:
import os
from pathlib import Path
import matplotlib.pyplot as plt
import confinement_mode_classifier as cmc
from datetime import datetime
import time 
import torchvision
import torch
from tqdm.notebook import tqdm
from torch.optim import lr_scheduler
import torch.nn.functional as F
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter

import re

path = Path(os.getcwd())
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")

Seed set to 42


Device: cpu


### Load the model

In [6]:
data_dir_path = f'{path}/data/LH_alpha'
file_names = os.listdir(data_dir_path)


#Chose what shots will be used in notebook. Removed shots has different dimensions
batch_size = 32
shot_numbers = [re.search(r'shot_(\d+)', file_name).group(1) for file_name in file_names]
removed_shots = ['13182', '20009','20112', '20143', '20145', '20146', '20147', '16987', '20144']
shot_numbers = [valid_shot for valid_shot in shot_numbers if valid_shot not in removed_shots]


shot_df, _, _, _ = cmc.load_and_split_dataframes(path, shot_numbers, shots_for_testing=[], shots_for_validation=[])




pretrained_model = torchvision.models.resnet18(weights='IMAGENET1K_V1', )
# Parameters of newly constructed modules have requires_grad=True by default
num_ftrs = pretrained_model.fc.in_features
pretrained_model.fc = nn.Linear(num_ftrs, 3) #3 classes: L-mode, H-mode, ELM
pretrained_model = pretrained_model.to(device)

model_path = f'{path}/runs/15-01-2024, 01-03-15 Ensembled_2xris1_models_classifier_training_classifier_training_all_layers'

#TEST one camera model
# pretrained_model.load_state_dict(torch.load(f'{model_path}/model_fc_trained.pt'))
# pretrained_model.eval()
# pretrained_model.to(device)
# test_dataloader = cmc.get_dloader(test_df, path=path, batch_size=batch_size, shuffle=False, balance_data=False)


#TEST ensembled model
ensembled_model = cmc.TwoImagesModel(modelA=pretrained_model, modelB=pretrained_model, hidden_units=30).to(device)
ensembled_model.load_state_dict(torch.load(f'{model_path}/model.pt', map_location=torch.device('cpu')))
ensembled_model.eval()
ensembled_model.to(device)

test_dataloader = cmc.get_dloader(shot_df, path=path, batch_size=batch_size, shuffle=False, 
                                  balance_data=True, second_img_opt='RIS1', ris_option='RIS1')

### Remove classifier in order to study higher dims

In [7]:
ensembled_model.classifier = nn.Identity()

In [8]:
if torch.cuda.is_available():
    # Get the current device (assuming one GPU is available)
    device = torch.device("cuda:0")

    # Print GPU properties
    print(torch.cuda.get_device_properties(device))

    # Get GPU memory information
    print(f"Total GPU memory: {torch.cuda.get_device_properties(device).total_memory / 1e9} GB")
    print(f"Allocated GPU memory: {torch.cuda.memory_allocated(device) / 1e9} GB")
    print(f"Reserved GPU memory: {torch.cuda.memory_reserved(device) / 1e9} GB")

### Tensorboard clustering

In [9]:
def embed_imgs(model, data_loader, max_iter):
    # Encode all images in the data_laoder using model, and return both images and encodings
    img_list, embed_list, mode_list = [], [], []
    model.eval()
    batch_number = 0
    for batch in tqdm(data_loader, desc="Encoding images", leave=False, ):
        batch_number+=1
        with torch.no_grad():
            z = model(batch['img'].to(device).float())
        img_list.append(F.interpolate(batch['img'][:, 0], size=(250, 320), mode='bilinear', align_corners=False))
        embed_list.append(z)
        mode_list.append(batch['label'])

        if batch_number > max_iter:
            break

    replacement_map = {0: 'L-mode', 1: 'H-mode', 2: 'ELM'}
    new_list = [replacement_map[value.item()] for value in torch.cat(mode_list)]
    return {'img':torch.cat(img_list, dim=0), 'output': torch.cat(embed_list, dim=0), 'label': new_list}

img_embeds = embed_imgs(ensembled_model, test_dataloader, max_iter=19)

Encoding images:   0%|          | 0/1084 [00:00<?, ?it/s]

In [None]:

writer = SummaryWriter(f"{path}/runs/15-01-2024, 01-03-15 Ensembled_2xris1_models_classifier_training_classifier_training_all_layers/tsne clustering")

import numpy as np
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])
NUM_IMGS = 625
writer.add_embedding(img_embeds['output'][:NUM_IMGS], # Encodings per image
                     metadata=img_embeds['label'][:NUM_IMGS], # Adding the labels per image to the plot
                     label_img=(img_embeds['img'][:NUM_IMGS]*(std[:, None, None]) + 2*mean[:, None, None])) 
                    #I intentionally adding 5 times mean, because in tensorboard the color imaging works poorly with dimmer imgs
writer.close()