In [1]:
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, models, transforms
from dataset import Dataset
from torch.utils.data import DataLoader

import os
import cv2
import numpy as np

from time import time
from visdom import Visdom
import copy

model_name = "vgg11"
num_classes = 10
batch_size = 64
feature_extract = False
pretrained = False
num_epochs = 1000
input_size = 224

# 设置visdom
viz = Visdom()
step_list = [0]
win = viz.line(X=np.array([0]), Y=np.array([1.0]), opts=dict(title='loss'))


Setting up a new session...


In [2]:
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

In [3]:
def train_model(model, dataloaders, criterion, optimizer, num_epochs):
    since = time()

    val_acc_history = []
    
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 1000.0

    count = 0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        model.train()  # Set model to training mode
        
        running_loss = 0.0
        running_corrects = 0

        count += 1
        # Iterate over data.
        for step, (inputs, labels) in enumerate(dataloaders):
            inputs = inputs.cuda()
            labels = labels.cuda()

            # zero the parameter gradients
            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            _, preds = torch.max(outputs, 1)

            loss.backward()
            optimizer.step()

            # statistics
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

            if step % 1 == 0:
                step_list.append(step_list[-1] + 1)
                viz.line(X=np.array([step_list[-1]]), Y=np.array([loss.item()]), win=win, update='append')
                
            if step % 100 == 0:    
                print('     step:{}, loss:{:.3f}, time:{:.3f} min'
                    .format(step, loss.item(), (time() - since) / 60))

        # deep copy the model
        if loss.item() < best_acc:
            count = 0
            best_acc = loss.item()
            best_model_wts = copy.deepcopy(model.state_dict())
            torch.save(model.state_dict(), './model/net_scratch{}-{:.3f}.pth'.format(epoch, loss))

        if count == 10:
            break
        print()

    time_elapsed = time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, val_acc_history

In [4]:
BASE_DIR = './data/'
impath = os.listdir(BASE_DIR + 'train')

train_ds = Dataset(BASE_DIR)
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)

# model = models.squeezenet1_0(pretrained = pretrained).cuda()
# set_parameter_requires_grad(model, feature_extract)
# model.classifier[1] = nn.Conv2d(512, num_classes, kernel_size=(1,1), stride=(1,1))
# model.num_classes = num_classes

model = models.vgg11_bn(pretrained=pretrained).cuda()
# set_parameter_requires_grad(model, feature_extract)
num_ftrs = model.classifier[6].in_features
model.classifier[6] = nn.Linear(num_ftrs,num_classes)

model =  torch.nn.DataParallel(model).cuda()

loss_fn = torch.nn.CrossEntropyLoss()

# params_to_update = model.parameters()
# print("Params to learn:")
# if feature_extract:
#     params_to_update = []
#     for name,param in model.named_parameters():
#         if param.requires_grad == True:
#             params_to_update.append(param)
# else:
#     for name,param in model.named_parameters():
#         if param.requires_grad == True:
#             print("\t",name)
            
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [5]:
model_ft, hist = train_model(model, train_dl, loss_fn, optimizer, num_epochs=num_epochs)



Epoch 0/999
----------
     step:0, loss:2.450, time:0.009 min
     step:100, loss:0.788, time:0.798 min
     step:200, loss:0.528, time:1.594 min
     step:300, loss:0.527, time:2.386 min
     step:400, loss:0.484, time:3.176 min
     step:500, loss:0.444, time:3.976 min
     step:600, loss:0.299, time:4.771 min
     step:700, loss:0.358, time:5.564 min
     step:800, loss:0.534, time:6.352 min
     step:900, loss:0.278, time:7.133 min

Epoch 1/999
----------
     step:0, loss:0.348, time:7.442 min
     step:100, loss:0.488, time:8.259 min
     step:200, loss:0.534, time:9.037 min
     step:300, loss:0.388, time:9.831 min
     step:400, loss:0.346, time:10.630 min
     step:500, loss:0.212, time:11.434 min
     step:600, loss:0.457, time:12.232 min
     step:700, loss:0.333, time:13.021 min
     step:800, loss:0.318, time:13.819 min
     step:900, loss:0.337, time:14.635 min

Epoch 2/999
----------
     step:0, loss:0.211, time:14.946 min
     step:100, loss:0.240, time:15.749 min
   

In [6]:

# test_img = cv2.imread(BASE_DIR + 'train/0/1.jpg')
# test_img = cv2.resize(test_img, (input_size,input_size), interpolation=cv2.INTER_CUBIC)
# test_img = np.moveaxis(test_img,2,0)
# test_img = torch.FloatTensor(test_img).cuda().unsqueeze(0)
# print(test_img.shape)

# outputs = model(test_img)
# print(outputs.shape)
# print(outputs)

Connection to remote host was lost.
on_close() takes 1 positional argument but 3 were given
[Errno 111] Connection refused
on_close() takes 1 positional argument but 3 were given
[Errno 111] Connection refused
on_close() takes 1 positional argument but 3 were given
[Errno 111] Connection refused
on_close() takes 1 positional argument but 3 were given
[Errno 111] Connection refused
on_close() takes 1 positional argument but 3 were given
[Errno 111] Connection refused
on_close() takes 1 positional argument but 3 were given
[Errno 111] Connection refused
on_close() takes 1 positional argument but 3 were given
[Errno 111] Connection refused
on_close() takes 1 positional argument but 3 were given
[Errno 111] Connection refused
on_close() takes 1 positional argument but 3 were given
[Errno 111] Connection refused
on_close() takes 1 positional argument but 3 were given
[Errno 111] Connection refused
on_close() takes 1 positional argument but 3 were given
[Errno 111] Connection refused
on_clos