In [None]:
%load_ext autoreload
%autoreload

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import os
import sys
from scipy.signal import find_peaks
import matplotlib.pyplot as plt

import torch
import pickle

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
import torchvision.transforms as transforms
from torch.utils.data import ConcatDataset, random_split, DataLoader, WeightedRandomSampler
from torch.utils.tensorboard import SummaryWriter
from torchsampler import ImbalancedDatasetSampler


sys.path.insert(0, "..")
from valtes_radartoolbox.training.preprocessing import get_dataset_min_max, custom_collate_fn, Normalize, ScalerX, ScalerY
from valtes_radartoolbox.training.model import ModifiedResNet
from valtes_radartoolbox.training.evaluation import create_confusion_matrix, weighted_accuracy
from valtes_radartoolbox.training.train import train_single_epoch, train
from valtes_radartoolbox.data.dataset import ValtesDataset, TLVDataset, TrainDataset
from valtes_radartoolbox.data.frame import FrameLabel

import random

In [None]:

# Set random seeds for reproducibility
torch.manual_seed(9)
np.random.seed(2)

# Check for GPU availability
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Define hyperparameters
NUM_EPOCHS = 60
batch_size = [8, 16, 32, 64]  # Different batch sizes to test
window_sizes = [6, 7, 8, 9, 10, 12]  # Different window sizes to test
num_classes = 4  # Number of output classes

result_acc = []  # To store results for each combination of batch size and window size

# Iterate over all batch sizes and window sizes
for i in batch_size:
    batch_acc = []  # To store accuracy results for each window size with the current batch size
    for j in window_sizes:
        # Initialize datasets for training
        dataset = TrainDataset("Config_4_2023-11-15_John", label_set="labels/state_labels.csv", window_size=j)
        dataset2 = TrainDataset("Config_4_2023-11-15_Artem", label_set="labels/state_labels.csv", window_size=j)
        dataset3 = TrainDataset("20231120_144541", label_set="labels/state_labels.csv", window_size=j)
        dataset4 = TrainDataset("20231120_145729", label_set="labels/state_labels.csv", window_size=j)
        dataset5 = TrainDataset("20231120_150635", label_set="labels/state_labels.csv", window_size=j)
        dataset6 = TrainDataset("20231120_153152", label_set="labels/state_labels.csv", window_size=j)  # Artem labels
        dataset7 = TrainDataset("20231120_152424", label_set="labels/state_labels.csv", window_size=j)  # Artem labels

        # Datasets for augmentation
        dataset_augm = TrainDataset("Config_4_2023-11-15_John", label_set="labels/state_labels.csv", window_size=j)
        dataset2_augm = TrainDataset("Config_4_2023-11-15_Artem", label_set="labels/state_labels.csv", window_size=j)
        dataset3_augm = TrainDataset("20231120_144541", label_set="labels/state_labels.csv", window_size=j)
        dataset4_augm = TrainDataset("20231120_145729", label_set="labels/state_labels.csv", window_size=j)
        dataset5_augm = TrainDataset("20231120_150635", label_set="labels/state_labels.csv", window_size=j)
        dataset6_augm = TrainDataset("20231120_153152", label_set="labels/state_labels.csv", window_size=j)  # Artem labels
        dataset7_augm = TrainDataset("20231120_152424", label_set="labels/state_labels.csv", window_size=j)  # Artem labels

        # Datasets for augmentation with different transformation
        dataset_augmY = TrainDataset("Config_4_2023-11-15_John", label_set="labels/state_labels.csv", window_size=j)
        dataset2_augmY = TrainDataset("Config_4_2023-11-15_Artem", label_set="labels/state_labels.csv", window_size=j)
        dataset3_augmY = TrainDataset("20231120_144541", label_set="labels/state_labels.csv", window_size=j)
        dataset4_augmY = TrainDataset("20231120_145729", label_set="labels/state_labels.csv", window_size=j)
        dataset5_augmY = TrainDataset("20231120_150635", label_set="labels/state_labels.csv", window_size=j)
        dataset6_augmY = TrainDataset("20231120_153152", label_set="labels/state_labels.csv", window_size=j)  # Artem labels
        dataset7_augmY = TrainDataset("20231120_152424", label_set="labels/state_labels.csv", window_size=j)  # Artem labels

        # Combine training datasets
        datasets_train = [dataset2, dataset3, dataset4, dataset5, dataset6, dataset2_augm, dataset3_augm, dataset4_augm, dataset5_augm, dataset6_augm, 
                          dataset2_augmY, dataset3_augmY, dataset4_augmY, dataset5_augmY, dataset6_augmY]
        # Testing datasets
        datasets_test = [dataset7, dataset]

        # Set experiment name and initialize model
        EXPERIMENT_NAME = f"2023-11-22_batch_size_{i}_SGD_lr_0.001"
        model = ModifiedResNet(num_classes=num_classes, window_size=j, freeze_weights=False).double().to(device)
        
        # Define loss function, optimizer, and scheduler
        LOSS_FN = nn.CrossEntropyLoss().double()
        OPTIMIZER = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
        SCHEDULER = torch.optim.lr_scheduler.StepLR(OPTIMIZER, step_size=30, gamma=0.1)

        # Combine datasets and split into training and validation sets
        concat_dataset_train = ConcatDataset(datasets_train)
        concat_dataset_test = ConcatDataset(datasets_test)
        train_length = int(0.95 * len(concat_dataset_train))
        train_dataset, _ = random_split(concat_dataset_train, [train_length, len(concat_dataset_train)-train_length], generator=torch.Generator().manual_seed(42))
        val_length = int(0.95 * len(concat_dataset_test))
        val_dataset, _ = random_split(concat_dataset_test, [val_length, len(concat_dataset_test)-val_length], generator=torch.Generator().manual_seed(42))
        
        # Create DataLoader for validation
        dataloader_val = DataLoader(val_dataset, batch_size=i, shuffle=True, collate_fn=custom_collate_fn)
        
        # Get dataset normalization parameters and apply normalization
        min_, max_ = get_dataset_min_max(dataloader_val)
        normalize = Normalize(min_, max_)
        
        # Define and apply scaling transforms for different datasets
        doppler_Scaler = ScalerX(1.7, j)
        changesX = [normalize, doppler_Scaler]
        doppler_Scaler = ScalerY(1.7, j)
        changesY = [normalize, doppler_Scaler]
        [dataset.set_transform(transforms.Compose([normalize])) for dataset in train_dataset.dataset.datasets[:5]]
        [dataset.set_transform(transforms.Compose(changesX)) for dataset in train_dataset.dataset.datasets[5:10]]
        [dataset.set_transform(transforms.Compose(changesY)) for dataset in train_dataset.dataset.datasets[10:15]]
        [dataset.set_transform(transforms.Compose([normalize])) for dataset in val_dataset.dataset.datasets]

        # Create sampler for imbalanced dataset and DataLoader for training
        sampler = ImbalancedDatasetSampler(
            train_dataset,
            labels=np.concatenate([dataset.frame_labels for dataset in train_dataset.dataset.datasets])[train_dataset.indices]
        )
        dataloader_train = DataLoader(train_dataset, batch_size=i, sampler=sampler, collate_fn=custom_collate_fn)
        
        # Train the model and record the results
        avg_vacc, avg_wvacc = train(EXPERIMENT_NAME, model, OPTIMIZER, SCHEDULER, LOSS_FN, dataloader_train, dataloader_val, NUM_EPOCHS, i, j)
        
        # Store results for current batch size and window size
        values_acc = {"batch_size": i, "window_size": j, "avg_vacc": avg_vacc, "avg_wvacc": avg_wvacc}
        batch_acc.append(values_acc)
        print(f"Batch_size - {i}, Window_size - {j}")
    
    result_acc.append(batch_acc)


In [None]:
# Specify the file path where you want to save the pickle file
pickle_file_path = "3cl_augm_scalerX+Y_60epoches_3sets.pkl"

# Save the variable to a pickle file
with open(pickle_file_path, 'wb') as pickle_file:
    pickle.dump(result_acc, pickle_file)

print(f"Variable saved as pickle file: {pickle_file_path}")