Training resnet model

In [None]:
import torch
from torch.utils.data import DataLoader, TensorDataset
from torchsummary import summary
from torch.autograd import Variable
import torch.nn as nn
import torch.optim as optim
import torchvision
import pickle
import os
import numpy as np

In [None]:
#some constants
CAT_CNT = 12

In [None]:
def create_model():
    
    #load a pretrained resnet model
    res = torchvision.models.resnet50(pretrained=True)
    
    #freeze model weights
    for param in res.parameters():
        param.requres_grad = False
    
    #counting in-features for fully connected layer
    n_inputs = res.fc.in_features
    
    #create fully connected layer with 12 out features + activation layer + softmax
    res.fc = nn.Sequential(nn.Linear(n_inputs, 500),
                          nn.ReLU(),
                          nn.Linear(500, CAT_CNT),
                          nn.ReLU(),
                          nn.LogSoftmax(dim = 1))
    
    return res
    

In [None]:
model = create_model()
#print(summary(model, (3, 128, 128)))

In [None]:
def create_dataloaders():
    #unpickling the data files
    #files are trainX_128, trainY_128, validX_128, validY_128

    data_path = os.path.join(".", "balanced_pickled")
    trainX = pickle.load(open(os.path.join(data_path, "trainX_128" ), "rb"))
    trainY = pickle.load(open(os.path.join(data_path, "trainY_128" ), "rb"))
    validX = pickle.load(open(os.path.join(data_path, "validX_128" ), "rb"))
    validY = pickle.load(open(os.path.join(data_path, "validY_128" ), "rb"))
    
    #generate data from the pickled np datasets,transforming to torch tensors
    trainX = np.transpose(trainX, (0,3,2,1))
    validX = np.transpose(validX, (0,3,2,1))
    
    tensor_trainX = Variable(torch.from_numpy(np.array(trainX)).float(), requires_grad=False)
    tensor_trainY = Variable(torch.from_numpy(np.array(trainY)).long(), requires_grad=False)
    train = TensorDataset(tensor_trainX, tensor_trainY)
    trainLoader = {DataLoader(train, batch_size = 20, shuffle = True)}

    tensor_validX = torch.stack([torch.Tensor(i) for i in validX])
    tensor_validY = torch.stack([torch.Tensor(i) for i in validY])
    valid = TensorDataset(tensor_validX, tensor_validY)
    validLoader = { DataLoader(valid)}
    
    return (trainLoader, validLoader)


In [None]:
def train_model(model):    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    (t_loader, v_loader) = create_dataloaders()
    
    epochs = 3
    steps = 0
    print_every = 1
    train_losses, test_losses = [], []
    
    for i in range (epochs):
        running_loss = 0.0
        
        tl = next(iter(t_loader))
    
        for i, (inputs, labels) in enumerate(tl):
        
            steps +=1
            print(labels.shape)
            
            #clears the gradients of all optimized tensors
            optimizer.zero_grad()
            
            #forwards + backwards + optimize
            logits = model.forward(inputs)
            loss = criterion(logits, torch.max(labels,1)[1])
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

            # print statistics
            running_loss += loss.item()
            if i % 2000 == 1999:    # print every 2000 mini-batches
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0

In [None]:
train_model(model)