## L-mode H-mode Classifier
- In this notebook a single camera model will be created and trained. 
- First import ResNet model, modify its fc. layer and train this fc.layer
- Then train the whole model
- We can either train the model on imgs from RIS1 camera or RIS2, and then ensemble these models in `ModelEnsembling.ipynb`

In [1]:
import os
from pathlib import Path
import matplotlib.pyplot as plt
import confinement_mode_classifier as cmc
from datetime import datetime
import time 
import re
import torchvision
import torch
from torch.optim import lr_scheduler
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter
import pytorch_lightning as pl
pl.seed_everything(42)

Seed set to 42


42

In [2]:
path = Path(os.getcwd())
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")

#Where are csv spreadsheets with time, mode, img_path, h_alpha columns are saved
data_dir_path = f'{path}/data/LH_alpha'
file_names = os.listdir(data_dir_path)


Choose shots, which will be used in training

In [3]:
shot_numbers = [re.search(r'shot_(\d+)', file_name).group(1) for file_name in file_names]
removed_shots = ['19915', '19925', '13182', '20009', '20112', 
                 '20143', '20145', '20146', '20147', '20144', '20098']
shot_numbers = [valid_shot for valid_shot in shot_numbers if valid_shot not in removed_shots]

shots_for_testing = ['18130', '16773', '16534', '19094', '18133', '17837', '18128']
shots_for_validation = ['16769', '19379', '18057', '18132', '18261', '18267', '18260']

### Create dataloader
- It is convinient to have dloaders in dictionary in order to easily switch between training and validation

In [4]:
batch_size = 32
shot_df, test_df, val_df, train_df = cmc.load_and_split_dataframes(path,shot_numbers, shots_for_testing, 
                                                                   shots_for_validation, use_ELMS=False)


test_dataloader = cmc.get_dloader(test_df, path, batch_size, ris_option='RIS1', balance_data=True, shuffle=False)
val_dataloader = cmc.get_dloader(val_df, path, batch_size, ris_option='RIS1', balance_data=True, shuffle=False)
train_dataloader = cmc.get_dloader(train_df, path, batch_size, ris_option='RIS1', balance_data=True, shuffle=False)

dataloaders = {'train':train_dataloader, 'val':val_dataloader}
dataset_sizes = {x: len(dataloaders[x].dataset) for x in ['train', 'val']}

Time stamp will be added to the model's folder name

In [5]:
timestamp =  datetime.fromtimestamp(time.time()).strftime("%y-%m-%d, %H-%M-%S ") + input('add comment: ')
writer = SummaryWriter(f'runs/{timestamp}_last_fc')

### Import ResNet pretrained model
 And freeze all layers except last f.c. layer


In [6]:
pretrained_model = torchvision.models.resnet18(weights='IMAGENET1K_V1')
for param in pretrained_model.parameters():
    param.requires_grad = False
 
# Parameters of newly constructed modules have requires_grad=True by default
num_ftrs = pretrained_model.fc.in_features
pretrained_model.fc = nn.Linear(num_ftrs, 3) #3 classes: L-mode, H-mode, ELM
pretrained_model = pretrained_model.to(device)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /run/user/1665/cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 96.7MB/s]


### Train last fc of the model

In [7]:
#
criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer = torch.optim.Adam(pretrained_model.parameters(), lr=1e-3)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.OneCycleLR(optimizer, max_lr=1e-2, total_steps=50) #!!!

#Model will be saved to this folder along with metrics and tensorboard scalars
model_path = Path(f'{path}/runs/{timestamp}_last_fc/model.pt')

num_epochs = 5
model = cmc.train_model(pretrained_model, criterion, optimizer, exp_lr_scheduler, 
                       dataloaders, writer, dataset_sizes, num_epochs=num_epochs, 
                       chkpt_path=model_path.with_name(f'{model_path.stem}_chkpt{model_path.suffix}'))

torch.save(model.state_dict(), model_path)

Epoch 1/5
----------


  0%|          | 0/1535 [00:00<?, ?it/s]

train Loss: 0.3263 Acc: 0.8699


  0%|          | 0/424 [00:00<?, ?it/s]

val Loss: 0.2636 Acc: 0.8761
Epoch 2/5
----------


  0%|          | 0/1535 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7ff9905fcc10>
Traceback (most recent call last):
  File "/compass/Shared/Users/bogdanov/.venv/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1478, in __del__
    self._shutdown_workers()
  File "/compass/Shared/Users/bogdanov/.venv/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1461, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7ff9905fcc10>
Traceback (most recent call last):
  File "/compass/Shared/Users/bogdanov/.venv/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1478, in __del__
    self._shutdown_workers()
  File "/compass/Shared/Users/bogdanov/.venv/lib/python3.8/site-packages/torc

train Loss: 0.2189 Acc: 0.9073


  0%|          | 0/424 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7ff9905fcc10>
Traceback (most recent call last):
  File "/compass/Shared/Users/bogdanov/.venv/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1478, in __del__
    self._shutdown_workers()
  File "/compass/Shared/Users/bogdanov/.venv/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1461, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7ff9905fcc10>
Traceback (most recent call last):
  File "/compass/Shared/Users/bogdanov/.venv/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1478, in __del__
    self._shutdown_workers()
  File "/compass/Shared/Users/bogdanov/.venv/lib/python3.8/site-packages/torc

val Loss: 0.2384 Acc: 0.8965
Epoch 3/5
----------


  0%|          | 0/1535 [00:00<?, ?it/s]

train Loss: 0.1911 Acc: 0.9186


  0%|          | 0/424 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7ff9905fcc10>
Traceback (most recent call last):
  File "/compass/Shared/Users/bogdanov/.venv/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1478, in __del__
    self._shutdown_workers()
  File "/compass/Shared/Users/bogdanov/.venv/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1461, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7ff9905fcc10>
Traceback (most recent call last):
  File "/compass/Shared/Users/bogdanov/.venv/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1478, in __del__
    self._shutdown_workers()
  File "/compass/Shared/Users/bogdanov/.venv/lib/python3.8/site-packages/torc

val Loss: 0.3315 Acc: 0.8587
Epoch 4/5
----------


  0%|          | 0/1535 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7ff9905fcc10>
Traceback (most recent call last):
  File "/compass/Shared/Users/bogdanov/.venv/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1478, in __del__
    self._shutdown_workers()
  File "/compass/Shared/Users/bogdanov/.venv/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1461, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7ff9905fcc10>
Traceback (most recent call last):
  File "/compass/Shared/Users/bogdanov/.venv/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1478, in __del__
    self._shutdown_workers()
  File "/compass/Shared/Users/bogdanov/.venv/lib/python3.8/site-packages/torc

train Loss: 0.1752 Acc: 0.9259


  0%|          | 0/424 [00:00<?, ?it/s]

val Loss: 0.1946 Acc: 0.9201
Epoch 5/5
----------


  0%|          | 0/1535 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7ff9905fcc10>
Traceback (most recent call last):
  File "/compass/Shared/Users/bogdanov/.venv/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1478, in __del__
    self._shutdown_workers()
  File "/compass/Shared/Users/bogdanov/.venv/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1461, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7ff9905fcc10>
Traceback (most recent call last):
  File "/compass/Shared/Users/bogdanov/.venv/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1478, in __del__
    self._shutdown_workers()
  File "/compass/Shared/Users/bogdanov/.venv/lib/python3.8/site-packages/torc

train Loss: 0.1650 Acc: 0.9321


  0%|          | 0/424 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7ff9905fcc10>
Traceback (most recent call last):
  File "/compass/Shared/Users/bogdanov/.venv/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1478, in __del__
    self._shutdown_workers()
  File "/compass/Shared/Users/bogdanov/.venv/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1461, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7ff9905fcc10>
Traceback (most recent call last):
  File "/compass/Shared/Users/bogdanov/.venv/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1478, in __del__
    self._shutdown_workers()
  File "/compass/Shared/Users/bogdanov/.venv/lib/python3.8/site-packages/torc

val Loss: 0.1754 Acc: 0.9271
Training complete in 21m 10s
Best val Acc: 0.927116


### Train the whole model

In [None]:
writer = SummaryWriter(f'runs/{timestamp}_all_layers')
for param in model.parameters():
    param.requires_grad = True

model_path = Path(f'{path}/runs/{timestamp}_all_layers/model.pt')



model = cmc.train_model(model, criterion, optimizer, exp_lr_scheduler, 
                        dataloaders, writer, dataset_sizes, num_epochs=14,
                        chkpt_path=model_path.with_name(f'{model_path.stem}_chkpt{model_path.suffix}'))
torch.save(model.state_dict(), model_path)

Epoch 1/14
----------


  0%|          | 0/1535 [00:00<?, ?it/s]

train Loss: 0.0997 Acc: 0.9688


  0%|          | 0/424 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f41c11faaf0>
Traceback (most recent call last):
  File "/compass/Shared/Users/bogdanov/.venv/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1478, in __del__
    self._shutdown_workers()
  File "/compass/Shared/Users/bogdanov/.venv/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1461, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f41c11faaf0>
Traceback (most recent call last):
  File "/compass/Shared/Users/bogdanov/.venv/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1478, in __del__
    self._shutdown_workers()
  File "/compass/Shared/Users/bogdanov/.venv/lib/python3.8/site-packages/torc

val Loss: 0.1251 Acc: 0.9593
Epoch 2/14
----------


  0%|          | 0/1535 [00:00<?, ?it/s]

train Loss: 0.0546 Acc: 0.9808


  0%|          | 0/424 [00:00<?, ?it/s]

val Loss: 0.0752 Acc: 0.9742
Epoch 3/14
----------


  0%|          | 0/1535 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f41c11faaf0>
Traceback (most recent call last):
  File "/compass/Shared/Users/bogdanov/.venv/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1478, in __del__
    self._shutdown_workers()
  File "/compass/Shared/Users/bogdanov/.venv/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1461, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f41c11faaf0>
Traceback (most recent call last):
  File "/compass/Shared/Users/bogdanov/.venv/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1478, in __del__
    self._shutdown_workers()
  File "/compass/Shared/Users/bogdanov/.venv/lib/python3.8/site-packages/torc

### Test model with all layers trained

In [None]:

#metrics = cmc.test_model(f'runs/{timestamp}', model, test_dataloader, comment='all_layers')

# writer.add_figure(f'Confusion matrix for the model with trained f.c. layer', fig_confusion_matrix)
# writer.add_scalar(f'Accuracy on test_dataset with trained f.c. layer', accuracy)
# writer.add_scalar(f'F1 metric on test_dataset with trained f.c. layer', f1)
# writer.add_scalar(f'Precision on test_dataset with trained f.c. layer', precision)
# writer.add_scalar(f'Recall on test_dataset with trained f.c. layer', recall)
# writer.close()