In [1]:
import torch, pandas as pd, numpy as np
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from PIL import Image
from torch.utils.data import Dataset, DataLoader, TensorDataset, ConcatDataset
import matplotlib.pyplot as plt
from skimage import transform
from sklearn.metrics import accuracy_score
import pandas as pds

import os
import time
import copy
import torchvision
from torchvision import transforms, utils, models
from tqdm.notebook import tqdm

plt.ion()

In [2]:
batch_size = 48
output_nodes = 1
AVA_NP_REGRESS_FILES = '/media/matt/New Volume/ava/np_regress_files/'
PATH = 'modifiedResNet.pt'

In [3]:
"""
Our modified version with changing the final layer (using CNN as classifier)
"""
class modifiedResNet(nn.Module):
    def __init__(self,num_outputs,existing_model=None,frozen=False):
        super(modifiedResNet, self).__init__()
        self.num_outputs = num_outputs
        self.existing_model = existing_model
        self.resnet = models.resnet50(pretrained=True)
        if(frozen):
            for param in self.resnet.parameters():
                param.requires_grad = False
        self.resnet.fc = nn.Linear(self.resnet.fc.in_features, num_outputs)

    def forward(self, x):
        out = self.resnet(x)
        return out

In [4]:
def get_dataset(data_filepath, idx):
    dataset_list = []
    for i,f in enumerate(os.listdir(data_filepath), idx):
        data = np.load(data_filepath + f)
        if (i - idx) > 1023:
            break
        tensor_x = torch.Tensor(data['x'])
        tensor_x = torch.unsqueeze(tensor_x, 0)
        # print(tensor_x.size())
        tensor_y = torch.Tensor(data['y'])
        tensor_y = torch.unsqueeze(tensor_y, 0)
        # print(tensor_y.size())

        dataset_list.append(TensorDataset(tensor_x,tensor_y))

    dataset = ConcatDataset(dataset_list)
    return dataset

In [5]:
"""
NEW Train method
"""
def train_model(model,save_filepath,training_loader,validation_loader, epochs):

    epochs_list = []
    train_loss_list = []
    val_loss_list = []
    training_len = len(training_loader.dataset)
    validation_len = len(validation_loader.dataset)

    data_loaders = {"train": training_loader, "val": validation_loader}

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    loss_func = nn.MSELoss() #nn.CrossEntropyLoss()

    # training and testing
    for epoch in tqdm(range(epochs), position=0, leave=True):

        train_loss = 0.0
        val_loss = 0.0
        temp_loss = 100000000000000.0
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train(True)
            else:
                model.train(False)

            running_loss = 0.0
            for i, (x, y) in enumerate(data_loaders[phase]):
                output = model(x)
                o = torch.squeeze(output).type(torch.FloatTensor)
                l = torch.squeeze(y).type(torch.FloatTensor)
#                 if epoch % 10 == 0:
#                     print(o, l)
                loss = loss_func(o, l)
                optimizer.zero_grad()           

                if phase == 'train':
                    loss.backward()
                    optimizer.step()                                      

                running_loss += loss.item()
            
            if phase == 'train':
                train_loss = running_loss
            else:
                val_loss = running_loss
        if epoch % 10 == 0:
            print('[%d, %5d] train loss: %.6f val loss: %.6f' % (epoch + 1, i + 1, train_loss, val_loss))
        if val_loss < temp_loss:
            torch.save(model, save_filepath)
            temp_loss = val_loss
        epochs_list.append(epoch)
        train_loss_list.append(train_loss)
        val_loss_list.append(val_loss)
    
    loss_df = pds.DataFrame(
        {
            'epoch': epochs_list,
            'training loss': train_loss_list,
            'validation loss': val_loss_list
        }
    )
    # Writing loss csv, change path to whatever you want to name it
    loss_df.to_csv('loss.csv', index=None)
    return train_loss_list, val_loss_list

In [None]:
epochs = 100
model = modifiedResNet(output_nodes)

training_dataset = get_dataset(AVA_NP_REGRESS_FILES,0)
training_loader = DataLoader(dataset=training_dataset,batch_size=batch_size,shuffle=True)

validation_dataset = get_dataset(AVA_NP_REGRESS_FILES,1024)
validation_loader = DataLoader(dataset=validation_dataset,batch_size=batch_size)

training_loss, validation_loss = train_model(model,PATH,training_loader,validation_loader,epochs)

  0%|          | 0/100 [00:00<?, ?it/s]

[1,    22] train loss: 60.847301 val loss: 2521.596123
[11,    22] train loss: 7.381043 val loss: 8.716837


In [None]:
fig, ax = plt.subplots(nrows=1, ncols=2)
fig.tight_layout()
ax[0,0].plot(range(epochs), validation_loss)
ax[0,0].set_title('Validation Loss')
ax[0,0].set_ylabel('Loss')
ax[0,0].set_xlabel('Epoch')

ax[0,1].plot(range(epochs), training_loss)
ax[0,1].set_title('Training Loss')
ax[0,1].set_ylabel('Loss')
ax[0,1].set_xlabel('Epoch')


# ax[1,0].plot(np.arange(v_labels_list.shape[0]), v_labels_list[:,0], color='blue')
# ax[1,0].plot(np.arange(v_labels_list.shape[0]), v_output_list[:,0], color='red')
# ax[1,0].set_title('Validation Amps per Sample')
# ax[1,0].set_ylabel('Amp')
# ax[1,0].set_xlabel('Sample')

# ax[1,1].plot(np.arange(t_labels_list.shape[0]), t_labels_list[:,0], color='blue')
# ax[1,1].plot(np.arange(t_labels_list.shape[0]), t_output_list[:,0], color='red')
# ax[1,1].set_title('Training Amps per Sample')
# ax[1,1].set_ylabel('Amp')
# ax[1,1].set_xlabel('Sample')

plt.show()