## In this notebook an ensembled model will be created and trained
Ensembling is possible in two options:
- Combine two RIS1 models, with that the second will receive an image from preceding time
- Combine RIS1 and RIS2 models.

In [1]:
import os
import re
import time 
from pathlib import Path
from datetime import datetime

import matplotlib.pyplot as plt
from torch import cuda
import torchvision
import torch
from torch.optim import lr_scheduler
from tqdm import tqdm
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter
import pandas as pd

import confinement_mode_classifier as cmc

path = Path(os.getcwd())
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")

In [2]:
ris_option = 'RIS1'
second_img_opt = 'RIS2'
num_workers = 32
num_epochs_for_fc = 10
num_epochs_for_all_layers = 10
batch_size = 16
learning_rate_min = 0.001
learning_rate_max = 0.01
comment_for_model_name = ris_option + 'x' + second_img_opt  + f'{...}'

first_model_path = f'{path}/runs/24-02-22, 14-36-03 RIS1, onecycleLR, extended dset_all_layers/model.pt'
second_model_path = f'{path}/runs/24-02-22, 18-07-52 RIS2, onecycleLR, extended dset_all_layers/model.pt'

In [3]:
shot_usage = pd.read_csv(f'{path}/data/shot_usage.csv')
shot_for_ris = shot_usage[shot_usage['used_for_ris1'] & shot_usage['used_for_ris2']]
shot_numbers = shot_for_ris['shot']
shots_for_testing = shot_for_ris[shot_for_ris['used_as'] == 'test']['shot']
shots_for_validation = shot_for_ris[shot_for_ris['used_as'] == 'val']['shot']


shot_df, test_df, val_df, train_df = cmc.load_and_split_dataframes(path, shot_numbers, 
                                                                   shots_for_testing, 
                                                                   shots_for_validation, use_ELMS=False)

#Get dataloaders. second_img_opt='RIS1' indicates that two RIS1 models will be ensembled
test_dataloader = cmc.get_dloader(test_df, path=path, batch_size=batch_size,
                                   shuffle=False, balance_data=True, 
                                   ris_option=ris_option, second_img_opt=second_img_opt, 
                                   num_workers=num_workers)

val_dataloader = cmc.get_dloader(val_df, path=path, batch_size=batch_size,
                                   shuffle=False, balance_data=True, 
                                   ris_option=ris_option, second_img_opt=second_img_opt, 
                                   num_workers=num_workers)

train_dataloader = cmc.get_dloader(train_df, path=path, batch_size=batch_size,
                                   shuffle=False, balance_data=True, 
                                   ris_option=ris_option, second_img_opt=second_img_opt, 
                                   num_workers=num_workers)

dataloaders = {'train':train_dataloader, 'val':val_dataloader}
dataset_sizes = {x: len(dataloaders[x].dataset) for x in ['train', 'val']}

In [4]:
import copy
pretrained_model = torchvision.models.resnet18(weights='IMAGENET1K_V1', )
# Parameters of newly constructed modules have requires_grad=True by default
num_ftrs = pretrained_model.fc.in_features
pretrained_model.fc = nn.Linear(num_ftrs, 2) #3 classes: L-mode, H-mode, ELM
pretrained_model = pretrained_model.to(device)

#Load pretrained model. RIS1 in this case
pretrained_model.load_state_dict(torch.load(first_model_path))


#Load pretrained RIS2 model
ris2_model = copy.deepcopy(pretrained_model)
ris2_model.load_state_dict(torch.load(second_model_path))


untrained_ensembled_model = cmc.TwoImagesModel(modelA=pretrained_model, modelB=ris2_model, hidden_units=30).to(device)

### Freeze all the weights except the classifier's weights

In [5]:


for name, param in untrained_ensembled_model.named_parameters():
    # Check if the current parameter is part of the MLP
    if 'classifier' in name or 'fc' in name or 'last_fully_connected' in name:
        param.requires_grad = True
    else:
        param.requires_grad = False

# Verify that only the MLP parameters have requires_grad set to True
for name, param in untrained_ensembled_model.named_parameters():
    if param.requires_grad:
        print(f"{name}: requires_grad = {param.requires_grad}")

modelA.fc.weight: requires_grad = True
modelA.fc.bias: requires_grad = True
modelB.fc.weight: requires_grad = True
modelB.fc.bias: requires_grad = True
classifier.0.weight: requires_grad = True
classifier.0.bias: requires_grad = True
classifier.2.weight: requires_grad = True
classifier.2.bias: requires_grad = True


In [6]:
timestamp =  datetime.fromtimestamp(time.time()).strftime("%y-%m-%d, %H-%M-%S ") + comment_for_model_name
writer = SummaryWriter(f'runs/{timestamp}_classifier_training')

In [7]:
sample_input = next(iter(train_dataloader))['img'].to(device).float()
writer.add_graph(untrained_ensembled_model, sample_input)

### Train the classifier

In [8]:
#
criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer = torch.optim.Adam(untrained_ensembled_model.parameters(), lr=learning_rate_min) #pouzit adam

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.OneCycleLR(optimizer, max_lr=learning_rate_max, total_steps=50) #!!!

model_path = Path(f'{path}/runs/{timestamp}_classifier_training/model.pt')

ensembled_model = cmc.train_model(untrained_ensembled_model, criterion, optimizer, exp_lr_scheduler, 
                       dataloaders, writer, dataset_sizes, num_epochs=num_epochs_for_fc, 
                       chkpt_path = model_path.with_name(f'{model_path.stem}_chkpt{model_path.suffix}'))

torch.save(ensembled_model.state_dict(), model_path)

Epoch 1/10
----------


100%|██████████| 2562/2562 [05:19<00:00,  8.02it/s]


train Loss: 0.0468 Acc: 0.9866


100%|██████████| 490/490 [01:04<00:00,  7.62it/s]


val Loss: 1.3270 Acc: 0.7987
Epoch 2/10
----------


100%|██████████| 2562/2562 [05:19<00:00,  8.01it/s]


train Loss: 0.0308 Acc: 0.9884


100%|██████████| 490/490 [01:04<00:00,  7.61it/s]


val Loss: 1.2196 Acc: 0.7980
Epoch 3/10
----------


100%|██████████| 2562/2562 [05:19<00:00,  8.01it/s]


train Loss: 0.0269 Acc: 0.9898


100%|██████████| 490/490 [01:04<00:00,  7.63it/s]


val Loss: 1.1463 Acc: 0.8041
Epoch 4/10
----------


100%|██████████| 2562/2562 [05:20<00:00,  7.99it/s]


train Loss: 0.0290 Acc: 0.9890


100%|██████████| 490/490 [01:04<00:00,  7.62it/s]


val Loss: 1.6495 Acc: 0.7890
Epoch 5/10
----------


100%|██████████| 2562/2562 [05:19<00:00,  8.01it/s]


train Loss: 0.0276 Acc: 0.9894


100%|██████████| 490/490 [01:04<00:00,  7.61it/s]


val Loss: 0.5761 Acc: 0.8066
Epoch 6/10
----------


100%|██████████| 2562/2562 [05:19<00:00,  8.02it/s]


train Loss: 0.0289 Acc: 0.9895


100%|██████████| 490/490 [01:04<00:00,  7.62it/s]


val Loss: 1.3576 Acc: 0.8034
Epoch 7/10
----------


100%|██████████| 2562/2562 [05:19<00:00,  8.02it/s]


train Loss: 0.0311 Acc: 0.9886


100%|██████████| 490/490 [01:04<00:00,  7.61it/s]


val Loss: 2.0158 Acc: 0.7886
Epoch 8/10
----------


100%|██████████| 2562/2562 [05:20<00:00,  7.99it/s]


train Loss: 0.0423 Acc: 0.9875


100%|██████████| 490/490 [01:04<00:00,  7.62it/s]


val Loss: 6.4999 Acc: 0.8165
Epoch 9/10
----------


100%|██████████| 2562/2562 [05:20<00:00,  7.99it/s]


train Loss: 0.0397 Acc: 0.9871


100%|██████████| 490/490 [01:04<00:00,  7.63it/s]


val Loss: 0.7669 Acc: 0.8103
Epoch 10/10
----------


  0%|          | 0/2562 [00:00<?, ?it/s]

## Train all the weights

In [None]:
# Clear cash
if cuda.is_available():
    # Do i have a single GPU?
    cuda.empty_cache()
    
    # Do i have multiple GPUs?
    for i in range(cuda.device_count()):
        cuda.reset_max_memory_allocated(i)
        cuda.empty_cache()

writer = SummaryWriter(f'runs/{timestamp}_all_layers')

#Unfreeze all layers
for name, param in ensembled_model.named_parameters():
    param.requires_grad = True

#Check that all parameters are being optimized
for name, param in ensembled_model.named_parameters():
    if param.requires_grad:
        print(f"{name}: requires_grad = {param.requires_grad}")

model_path = Path(f'{path}/runs/{timestamp}_all_layers/model.pt')

ensembled_model = cmc.train_model(ensembled_model, criterion, optimizer, exp_lr_scheduler, 
                                  dataloaders, writer, dataset_sizes, num_epochs=num_epochs_for_all_layers,
                                  chkpt_path=model_path.with_name(f'{model_path.stem}_chkpt{model_path.suffix}'))


torch.save(ensembled_model.state_dict(), model_path)



Epoch 1/6
----------


  0%|          | 0/960 [00:00<?, ?it/s]

train Loss: 0.1129 Acc: 0.9773


  0%|          | 0/515 [00:00<?, ?it/s]

val Loss: 0.3720 Acc: 0.8913
Epoch 2/6
----------


  0%|          | 0/960 [00:00<?, ?it/s]

train Loss: 0.0470 Acc: 0.9870


  0%|          | 0/515 [00:00<?, ?it/s]

val Loss: 0.8909 Acc: 0.7913
Epoch 3/6
----------


  0%|          | 0/960 [00:00<?, ?it/s]

train Loss: 0.0215 Acc: 0.9932


  0%|          | 0/515 [00:00<?, ?it/s]

val Loss: 0.3452 Acc: 0.8666
Epoch 4/6
----------


  0%|          | 0/960 [00:00<?, ?it/s]

train Loss: 0.0267 Acc: 0.9924


  0%|          | 0/515 [00:00<?, ?it/s]

val Loss: 0.4024 Acc: 0.8079
Epoch 5/6
----------


  0%|          | 0/960 [00:00<?, ?it/s]

train Loss: 0.0305 Acc: 0.9917


  0%|          | 0/515 [00:00<?, ?it/s]

val Loss: 0.2661 Acc: 0.9095
Epoch 6/6
----------


  0%|          | 0/960 [00:00<?, ?it/s]

train Loss: 0.0262 Acc: 0.9914


  0%|          | 0/515 [00:00<?, ?it/s]

val Loss: 0.3210 Acc: 0.8900
Training complete in 93m 34s
Best val Acc: 0.909478


In [None]:
metrics = cmc.test_model(f'{path}/runs/{timestamp}_all_layers/', ensembled_model, test_dataloader, max_batch=0)