In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
from torch.utils.data import Dataset, DataLoader
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import time
import os
import copy
import pandas as pd
from sklearn.model_selection import train_test_split
cudnn.benchmark = True

In [2]:
# Custom dataset class
class CustomTensorDataset(Dataset):
    def __init__(self, dataset, transform_list=None):
        [data_X, data_y] = dataset
        X_tensor, y_tensor = torch.Tensor(data_X), torch.Tensor(data_y)
        tensors = (X_tensor, y_tensor)
        assert all(tensors[0].size(0) == tensor.size(0) for tensor in tensors)
        self.tensors = tensors
        self.transforms = transform_list

    def __getitem__(self, index):
        x = self.tensors[0][index]
        if self.transforms:
          #for transform in self.transforms: 
          #  x = transform(x)
            x = self.transforms(x)
        y = self.tensors[1][index]
        return x, y

    def __len__(self):
        return self.tensors[0].size(0)

In [3]:
# function to preprocess data and prepare for training
def preprocess_data(data_path, labels_path, batch_size, split_test_size=0.3, split_random_state=42):
    # obtain workers number
    core_count = 8 #len(os.sched_getaffinity(0))
    print('CPU cores: ', core_count)
    
    # Loading Data
    data = np.load(data_path)
    labels = np.load(labels_path)

    # reshape data
    reshaped_data = data.reshape((300,300,3,data.shape[1]))
    # normalize parameters
    stds = reshaped_data.std(axis=(0,1,3))
    means = reshaped_data.mean(axis=(0,1,3))
    # move axis on data
    reshaped_data = np.moveaxis(reshaped_data, source=[0, 1, 2, 3], destination=[2, 3, 1, 0])
    labels = np.array(labels,dtype=int)
    
    data_transforms = {
        'train': transforms.Compose([
            transforms.ToPILImage(),
            transforms.ToTensor(),
            transforms.RandomHorizontalFlip(),
            transforms.RandomVerticalFlip(),
            transforms.RandomRotation(degrees=(0, 360)),
            transforms.RandomAdjustSharpness(sharpness_factor=2),
            transforms.GaussianBlur(kernel_size=(5, 9), 
                                    sigma=(0.1, 5)),
#             transforms.ColorJitter(brightness=(0.75,1.25), 
#                                    contrast=(0,2), 
#                                    saturation=(0,2), 
#                                    hue=None),
#             transforms.RandomPerspective(),
#             transforms.RandomAffine(degrees=(0, 360), 
#                                     translate=(0.1, 0.3), 
#                                     scale=(0.5, 0.75), 
#                                     shear=(0, 0.2, 0, 0.2)),
            transforms.Normalize(means, stds)
            ]),
        'val': transforms.Compose([
            transforms.ToPILImage(),
            transforms.ToTensor(),
            transforms.Normalize(means, stds)
            ]),
    }
    
    # split data into training and validation sets
    x_train, x_val, t_train, t_val = train_test_split(reshaped_data, labels, test_size=split_test_size, random_state=split_random_state)
    train_set = [x_train,t_train]
    val_set = [x_val,t_val]
    
    #dataset class and dataloaders
    image_datasets = {'train': CustomTensorDataset(dataset=train_set, transform_list=data_transforms['train']), 'val': CustomTensorDataset(dataset=val_set, transform_list=data_transforms['train'])}
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], 
                                                  batch_size=batch_size, 
                                                  shuffle=True, 
                                                  num_workers=core_count) for x in ['train', 'val']}
    
    dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
    
    return dataloaders, dataset_sizes

In [4]:
# function to describe training process of the neural network
def model_function(model,  criterion, optimizer, scheduler, dataset_sizes, dataloaders, num_epochs=25):
    since = time.time()
    
    # use GPU when available
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    train_dict = {'train':[[],[]], 'val':[[],[]]}

    for epoch in range(num_epochs):
        print(f'Epoch {epoch+1}/{num_epochs}')
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                labels = labels.type(torch.LongTensor) 
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]
            train_dict[phase][0].append(epoch_loss)
            train_dict[phase][1].append(epoch_acc.cpu().item())

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best val Acc: {best_acc:4f}')

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, train_dict

In [5]:
# Load model and train data
def train_model(dataloaders, epochs, dataset_sizes, gamma, step_size, learning_rate, class_num=10):
    
    # use GPU when available
    use_cuda = torch.cuda.is_available()
    # get device info
    device = torch.device("cuda" if use_cuda else "cpu")
    # Print GPU name
    print('GPU: ', torch.cuda.get_device_name(device=device))
    
    # load model
    model_conv = torchvision.models.efficientnet_v2_l(weights='EfficientNet_V2_L_Weights.IMAGENET1K_V1')
    for param in model_conv.parameters():
        param.requires_grad = False
    # Parameters of newly constructed modules have requires_grad=True by default
    num_ftrs = model_conv.classifier[1].in_features
    model_conv.classifier = nn.Linear(num_ftrs, class_num)
    model_conv = model_conv.to(device)
    criterion = nn.CrossEntropyLoss()
    # Only parameters of final layer are being optimized
    optimizer_conv = optim.Adam(model_conv.classifier.parameters(), lr=learning_rate)
    # Decay LR by a factor of gamma every step_size epochs
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=step_size, gamma=gamma)
    # train model
    best_model, train_dict_fixed = model_function(model_conv, 
                                               criterion,
                                               optimizer_conv, 
                                               exp_lr_scheduler,
                                               dataset_sizes=dataset_sizes, 
                                               num_epochs=epochs,
                                               dataloaders=dataloaders)
    return best_model, train_dict_fixed

In [6]:
# full train function that saves training progress and best model to disk
def Train(data_path, labels_path, batch_size, epoch_count, gamma, step_size, learning_rate, split_test_size=0.3, split_random_state=42, class_count=10):
    
    # current working directory
    wd = os.getcwd()
    print('Working directory: ', wd)

    # start time
    t = time.localtime()
    current_time = time.strftime("%H:%M:%S", t)
    print(current_time)
    
    dataloaders, dataset_sizes = preprocess_data(data_path=data_path, labels_path=labels_path, batch_size=batch_size, split_test_size=0.3, split_random_state=42)
    
    best_model, train_dict_fixed = train_model(dataloaders=dataloaders, 
                                     epochs=epoch_count,
                                     dataset_sizes=dataset_sizes,
                                     gamma=gamma, 
                                     step_size=step_size, 
                                     learning_rate=learning_rate, 
                                     class_num=class_count)
                                               
    # save weights state dictionary of best model to disk in working directory
    torch.save(best_model.state_dict(), wd + '/training_best_model_state_dict.pt')
    
    # save training epoch data to disk
    df = pd.DataFrame()
    df['train_loss'] = train_dict_fixed['train'][0]
    df['val_loss'] = train_dict_fixed['val'][0]
    df['train_acc'] = train_dict_fixed['train'][1]
    df['val_acc'] = train_dict_fixed['val'][1]
    df.to_csv(wd+'/training_accuracy_and_loss.csv')
    
    print('Saved model to Disk','\n','Finished Training')
    
    return best_model, train_dict_fixed

In [7]:
best_model, train_dict_fixed = Train(data_path='nodup_data.npy', 
      labels_path='nodup_labels.npy',
      batch_size=64,
      split_test_size=0.3, 
      split_random_state=42, 
      epoch_count=1, 
      gamma=0.1, 
      step_size=5, 
      learning_rate=0.01
     )

Working directory:  E:\GIT_REPOS\final-project-rpjc_ml
12:52:32
CPU cores:  8


FileNotFoundError: [Errno 2] No such file or directory: 'nodup_data.npy'