In [1]:
import numpy as np
import os
import matplotlib.pyplot as plt
import torch
import torch.nn as nn

# from itertools import starmap
from utils import save_model, save_plots
from CNN_execution import plot_roc_curve, ect_train_validate, report_trained_model, find_numpy_files

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Parameters required to define the model. 
# Will remain same throught the excerise.
 
NUM_EPOCHS = 50 # number of epochs to train the network for; type=int
LEARNING_RATE = 1e-3 # learning rate for training; type=float
# loss function
lossfcn = nn.CrossEntropyLoss()

# device
device = ('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
num_dirs = 5
num_thresh = 5

classes = [
    i
        for i in os.listdir('data')
        if os.path.isdir(os.path.join('data', i))
]
class_items = {
    i: find_numpy_files(os.path.join('data', i))
        for i in classes
}

num_data_to_use_for_training = min( [len(class_items[i]) for i in classes] )
num_data_to_use_for_training = 30
print(f"Using {num_data_to_use_for_training} data for training")

class_items = {
    class_name: np.random.choice( file_paths, num_data_to_use_for_training, replace=False)
        for class_name, file_paths in class_items.items()
}

Using 30 data for training


In [4]:
help(ect_train_validate)

# trained_outputs = ect_train_validate(
#     num_dirs=num_dirs,
#     num_thresh=num_thresh,
#     input_path=None,
#     output_ect_path='example_data/outputs',
#     output_model_path='example_data/best_model.pth',
#     log_level='INFO',
#     recompute_ect=False
# )

Help on function ect_train_validate in module CNN_execution:

ect_train_validate(num_dirs, num_thresh, input_path=None, output_ect_path='example_data/ect_output', in_memory=False, output_model_path='outputs/best_model.pth', num_epochs=50, learning_rate=0.001, lossfcn=CrossEntropyLoss(), batch_size=4, valid_split=0.2, num_workers=0, device=device(type='cuda'), recompute_ect=True, log_level='INFO')
    Function to train and validate the CNN model using the ECT dataset.
    Usage:
        ect_train_validate(
            num_dirs, num_thresh, input_path=None,
            output_ect_path="example_data/ect_output", in_memory=False,
            output_model_path="outputs/best_model.pth",
            num_epochs=50, learning_rate=1e-3, lossfcn=nn.CrossEntropyLoss(),
            batch_size=4, valid_split=0.2, num_workers=0,
            device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
            recompute_ect=True, log_level='INFO'
        )
    Parameters:
        num_dirs: 

In [5]:
help(save_model)
# save_model(
#     epochs=trained_outputs["num_epochs"],
#     model=trained_outputs["model"],
#     optimizer=trained_outputs["optimizer"],
#     criterion=trained_outputs["lossfcn"],
#     output_model_path='example_data/best_model.pth',
# )


Help on function save_model in module utils:

save_model(epochs, model, optimizer, criterion, output_model_path='outputs/best_model.pth')
    Function to save the trained model.
    Adapted from https://debuggercafe.com/saving-and-loading-the-best-model-in-pytorch/



In [6]:
help(save_plots)
# loss, acc = plt.figure(figsize=(9,5)).subplots(1, 2)
# save_plots(
#     train_acc= trained_outputs["train_acc"],
#     valid_acc= trained_outputs["valid_acc"],
#     train_loss= trained_outputs["train_loss"],
#     valid_loss= trained_outputs["valid_loss"],
#     loss=loss,
#     accuracy=acc,
#     accuracy_path='example_data/accuracy.png',
#     loss_path='example_data/loss.png'
# )


Help on function save_plots in module utils:

save_plots(train_acc, valid_acc, train_loss, valid_loss, accuracy=None, loss=None, fig_size=(10, 7), dpi=300, accuracy_path='outputs/accuracy.png', loss_path='outputs/loss.png')
    Function to save the loss and accuracy plots.
    Usage:
        save_plots(
            train_acc, valid_acc, train_loss,valid_loss,
            accuracy = None, loss = None,
            fig_size=(10, 7), dpi=300,
            accuracy_path = 'outputs/accuracy.png', loss_path = 'outputs/loss.png'
        )
    Parameters:
        train_acc: list of training accuracy values
        valid_acc: list of validation accuracy values
        train_loss: list of training loss values
        valid_loss: list of validation loss values
        accuracy: matplotlib axis to plot accuracy. If None, a new figure is created.
        loss: matplotlib axis to plot loss. If None, a new figure is created.
        fig_size: tuple, size of the figure. Default is (10, 7)
        dpi: i

In [7]:
help(report_trained_model)
# report_trained_model(
#     num_dirs=num_dirs,
#     num_thresh=num_thresh,
#     train_dataset=trained_outputs["train_dataset"],
#     train_loader=trained_outputs["train_loader"],
#     test_loader=trained_outputs["test_loader"],
#     test_dataset=trained_outputs["test_dataset"],
#     model_path='example_data/best_model.pth',
#     output_cf='example_data/confusion_matrix.png',
#     output_report='example_data/accuracy.txt',
#     log_level='INFO'
# )

Help on function report_trained_model in module CNN_execution:

report_trained_model(num_dirs, num_thresh, train_dataset, train_loader, test_loader, test_dataset, device=device(type='cuda'), model_path='outputs/best_model.pth', output_cf='outputs/confusion_matrix.png', output_report='outputs/outputCLFreport.csv', log_level='INFO')
    Function to report the trained model.
    Usage:
        report_trained_model(
            num_dirs, num_thresh,
            train_dataset, train_loader, test_loader, test_dataset,
            device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
            model_path= 'outputs/best_model.pth',
            output_cf='outputs/confusion_matrix.png',
            output_report='outputs/outputCLFreport.csv',
            log_level='INFO'
        )
    Parameters:
        num_dirs: int, number of directions for ECT calculation.
        num_thresh: int, number of thresholds for ECT calculation.
        train_dataset: torch.utils.data.Dataset, trai

In [8]:
help(plot_roc_curve)
# plot_roc_curve(
#     model=trained_outputs["model"],
#     test_loader=trained_outputs["test_loader"],
#     test_dataset=trained_outputs["test_dataset"],
#     output_path='example_data/roc_curve.png'
# )

Help on function plot_roc_curve in module CNN_execution:

plot_roc_curve(model, test_loader, test_dataset, device=device(type='cuda'), axis=None, output_path='outputs/roc_curve.png')
    Function to plot the ROC curve for the trained model.
    Usage:
        plot_roc_curve(model, test_loader, test_dataset, device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
    Parameters:
        model: torch.nn model, trained model.
        test_loader: torch.utils.data.DataLoader, test data loader.
        test_dataset: torch.utils.data.Dataset, test dataset.
        device: torch.device, device to run the model. Optional, default is 'cuda' if available else 'cpu'.



The following python code will extract the data from the zip files in the data folder and put them in the appropriate folders. The code will create a folder for each class and extract all the class member zip files into that folder. The code will also handle the case where the class member zip files are named with a prefix of the class name.

```python

toplevel = [i for i in os.listdir('data') if not ( i.startswith('.') or i.startswith('2') ) and os.path.isfile(os.path.join('data',i)) ]
classes = {}
for i in toplevel:
    added = False
    i = i[:-4] # Remove the .zip
    for j in toplevel:
        if i == j[:-4]:
            continue
        common = os.path.commonprefix([i,j])
        if len(common) > 5:
            added = True
            classes[common] = classes.get(common,[]) + [j]
    if not added:
        classes[i] = classes.get(i,[]) + [i+'.zip']
[os.makedirs(os.path.join('data',i), exist_ok=True) for i in classes]

import zipfile as zp

for folder, zipfiles in classes.items():
    for z in zipfiles:
        with zp.ZipFile(os.path.join('data',z), 'r') as f:
            names = f.namelist()
            for name in names:
                if name.endswith('.npy'):
                    f.extract(name, os.path.join('data',folder))