In [1]:
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, models, transforms
from dataset_ import Dataset
from torch.utils.data import DataLoader

import os
import cv2
import numpy as np

from time import time
from visdom import Visdom
import copy

# Define important global variables 
# Change 'pretrained' variable to decide to get the pretrained weight or not
# Change 'feature_extract' variable to decide to keep all layers to update or freeze a part of the model
num_classes = 10
batch_size = 256
num_epochs = 15
feature_extract = True
pretrained = True
num_epochs = 1000
input_size = 224

# Use Visdom to visualise the training progress
viz = Visdom()
step_list = [0]
win = viz.line(X=np.array([0]), Y=np.array([1.0]), opts=dict(title='loss'))


Setting up a new session...


In [2]:
# This function is used to tell the optimizer not to update params in the freeze layer 
# Only use in freeze mode
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

In [3]:
def train_model(model, dataloaders, criterion, optimizer, num_epochs):
    since = time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 1000.0

    count = 0
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        model.train()  # Set model to training mode
        
        running_loss = 0.0
        running_corrects = 0

        count += 1
        # Iterate over data.
        for step, (inputs, labels) in enumerate(dataloaders):
            inputs = inputs.cuda()
            labels = labels.cuda()

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward pass and calculate loss
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            _, preds = torch.max(outputs, 1)

            # Back prop and update params
            loss.backward()
            optimizer.step()

            # statistics
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels)

            if step % 1 == 0:
                step_list.append(step_list[-1] + 1)
                viz.line(X=np.array([step_list[-1]]), Y=np.array([loss.item()]), win=win, update='append')
                
            if step % 100 == 0:    
                print('     step:{}, loss:{:.3f}, time:{:.3f} min'
                    .format(step, loss.item(), (time() - since) / 60))

        # deep copy the model
        if loss.item() < best_acc:
            count = 0
            best_acc = loss.item()
            best_model_wts = copy.deepcopy(model.state_dict())
            torch.save(model.state_dict(), './model/net{}-{:.3f}.pth'.format(epoch, loss))

        if count == 10:
            break
        print()

    time_elapsed = time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [4]:
BASE_DIR = './data/'
impath = os.listdir(BASE_DIR + 'train')

# Load data
train_ds = Dataset(BASE_DIR)
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)

# Init model and customize from the pretrained version in Pytorch
model = models.vgg11_bn(pretrained=pretrained).cuda()
set_parameter_requires_grad(model, feature_extract)
num_ftrs = model.classifier[6].in_features
model.classifier[6] = nn.Linear(num_ftrs,num_classes)
model =  torch.nn.DataParallel(model).cuda()

loss_fn = torch.nn.CrossEntropyLoss()

# Choose params to update 
params_to_update = model.parameters()
if feature_extract:
    params_to_update = []
    for name,param in model.named_parameters():
        if param.requires_grad == True:
            params_to_update.append(param)
            
optimizer = torch.optim.SGD(params_to_update, lr=0.001, momentum=0.9)

Downloading: "https://download.pytorch.org/models/vgg11_bn-6002323d.pth" to /home/avitech-pc4/.cache/torch/hub/checkpoints/vgg11_bn-6002323d.pth
100%|██████████| 507M/507M [02:41<00:00, 3.30MB/s] 


Params to learn:


In [5]:
model_ft = train_model(model, train_dl, loss_fn, optimizer, num_epochs=num_epochs)



Epoch 0/999
----------
     step:0, loss:2.336, time:0.009 min
     step:100, loss:1.115, time:0.818 min
     step:200, loss:0.905, time:1.627 min

Epoch 1/999
----------
     step:0, loss:0.880, time:1.915 min
     step:100, loss:0.679, time:2.724 min
     step:200, loss:0.731, time:3.534 min

Epoch 2/999
----------
     step:0, loss:0.693, time:3.824 min
     step:100, loss:0.625, time:4.641 min
     step:200, loss:0.841, time:5.456 min

Epoch 3/999
----------
     step:0, loss:0.627, time:5.749 min
     step:100, loss:0.637, time:6.562 min
     step:200, loss:0.649, time:7.383 min

Epoch 4/999
----------
     step:0, loss:0.738, time:7.676 min
     step:100, loss:0.650, time:8.496 min
     step:200, loss:0.624, time:9.318 min

Epoch 5/999
----------
     step:0, loss:0.627, time:9.604 min
     step:100, loss:0.711, time:10.423 min
     step:200, loss:0.552, time:11.246 min

Epoch 6/999
----------
     step:0, loss:0.512, time:11.529 min
     step:100, loss:0.671, time:12.354 min
   

In [6]:

# test_img = cv2.imread(BASE_DIR + 'train/0/1.jpg')
# test_img = cv2.resize(test_img, (input_size,input_size), interpolation=cv2.INTER_CUBIC)
# test_img = np.moveaxis(test_img,2,0)
# test_img = torch.FloatTensor(test_img).cuda().unsqueeze(0)
# print(test_img.shape)

# outputs = model(test_img)
# print(outputs.shape)
# print(outputs)

Connection to remote host was lost.
on_close() takes 1 positional argument but 3 were given
[Errno 111] Connection refused
on_close() takes 1 positional argument but 3 were given
[Errno 111] Connection refused
on_close() takes 1 positional argument but 3 were given
[Errno 111] Connection refused
on_close() takes 1 positional argument but 3 were given
[Errno 111] Connection refused
on_close() takes 1 positional argument but 3 were given
[Errno 111] Connection refused
on_close() takes 1 positional argument but 3 were given
[Errno 111] Connection refused
on_close() takes 1 positional argument but 3 were given
[Errno 111] Connection refused
on_close() takes 1 positional argument but 3 were given
[Errno 111] Connection refused
on_close() takes 1 positional argument but 3 were given
[Errno 111] Connection refused
on_close() takes 1 positional argument but 3 were given
[Errno 111] Connection refused
on_close() takes 1 positional argument but 3 were given
[Errno 111] Connection refused
on_clos