In [1]:
import os
import numpy as np
from PIL import Image
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
cuda = torch.cuda.is_available()
cuda

True

In [2]:
trainset = torchvision.datasets.ImageFolder(root='train_data/medium', 
                                                       transform=torchvision.transforms.ToTensor())
trainset.__len__(), len(trainset.classes)

(822154, 2300)

In [3]:
evalset = torchvision.datasets.ImageFolder(root='validation_classification/medium', 
                                                       transform=torchvision.transforms.ToTensor())
evalset.__len__(), len(evalset.classes)

(4601, 2300)

In [4]:
train_loader_args = dict(shuffle=True, batch_size=128,drop_last=False, num_workers=2,pin_memory=True) if cuda\
                    else dict(shuffle=True,drop_last=False, batch_size=128)
train_loader = DataLoader(trainset, **train_loader_args)

eval_loader_args = dict(shuffle=True, batch_size=128,drop_last=False, num_workers=2,pin_memory=True) if cuda\
                    else dict(shuffle=True,drop_last=False, batch_size=128)
eval_loader = DataLoader(evalset, **eval_loader_args)

In [5]:
class IdentityBlock(nn.Module):

    def __init__(self, in_channels, out_channels, stride):
        super(IdentityBlock, self).__init__()

        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)

        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)

        if in_channels != out_channels:
            self.shortcut=[]
            self.shortcut.append(nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, padding=0, bias=False))
            self.shortcut.append(nn.BatchNorm2d(out_channels))
            self.shortcut = nn.Sequential(*self.shortcut)
        else:
            self.shortcut = nn.Identity()

    def forward(self, x):
        y = F.relu(self.bn1(self.conv1(x)), inplace=True)
        y = self.bn2(self.conv2(y)) 
        y += self.shortcut(x)
        y = F.relu(y, inplace=True)
        return y

In [6]:
class CenterLoss(nn.Module):
    """
    Args:
        num_classes (int): number of classes.
        feat_dim (int): feature dimension.
    """
    def __init__(self, num_classes, feat_dim, device=torch.device('cpu')):
        super(CenterLoss, self).__init__()
        self.num_classes = num_classes
        self.feat_dim = feat_dim
        self.device = device
        
        self.centers = nn.Parameter(torch.randn(self.num_classes, self.feat_dim).to(self.device))

    def forward(self, x, labels):
        """
        Args:
            x: feature matrix with shape (batch_size, feat_dim).
            labels: ground truth labels with shape (batch_size).
        """
        batch_size = x.size(0)
        distmat = torch.pow(x, 2).sum(dim=1, keepdim=True).expand(batch_size, self.num_classes) + \
                  torch.pow(self.centers, 2).sum(dim=1, keepdim=True).expand(self.num_classes, batch_size).t()
        distmat.addmm_(1, -2, x, self.centers.t())

        classes = torch.arange(self.num_classes).long().to(self.device)
        labels = labels.unsqueeze(1).expand(batch_size, self.num_classes)
        mask = labels.eq(classes.expand(batch_size, self.num_classes))

        dist = []
        for i in range(batch_size):
            value = distmat[i][mask[i]]
            value = value.clamp(min=1e-12, max=1e+12) # for numerical stability
            dist.append(value)
        dist = torch.cat(dist)
        loss = dist.mean()

        return loss

In [7]:
class Network(nn.Module):

    def __init__(self, feat_dim=2):
        super(Network, self).__init__()
        
        self.layers = []
        
        self.layers.append(nn.Conv2d(in_channels=3, out_channels=128, kernel_size=3, padding=1, stride=1, bias=False))
        self.layers.append(nn.BatchNorm2d(128))
        self.layers.append(nn.ReLU(inplace=True))
        
        self.layers.append(IdentityBlock(in_channels=128, out_channels=128, stride=1))
        self.layers.append(IdentityBlock(in_channels=128, out_channels=128, stride=1))
       
        self.layers.append(IdentityBlock(in_channels=128, out_channels=256, stride=2))
        self.layers.append(IdentityBlock(in_channels=256, out_channels=256, stride=1))
        
        self.layers.append(IdentityBlock(in_channels=256, out_channels=512, stride=1))
        self.layers.append(IdentityBlock(in_channels=512, out_channels=512, stride=1))
        
        self.layers.append(IdentityBlock(in_channels=512, out_channels=1024, stride=2))
        self.layers.append(IdentityBlock(in_channels=1024, out_channels=1024, stride=1))

        self.layers = nn.Sequential(*self.layers)
        
        self.linear_label = nn.Linear(1024, 2300, bias=True)
        self.embedding = nn.Linear(1024, 2048, bias=True)
        
        # For creating the embedding to be passed into the Center Loss criterion
        self.linear_closs = nn.Linear(1024, feat_dim, bias=True)
        self.relu_closs = nn.ReLU(inplace=True)
    
    def forward(self, x, evalMode=False):
        output = x
        output = self.layers(output)

        output = F.avg_pool2d(output, [output.size(2), output.size(3)], stride=1)
        output = output.reshape(output.shape[0], output.shape[1])
        
        embedding = self.embedding(output)
        label_output = self.linear_label(output)
        label_output = label_output/torch.norm(self.linear_label.weight, dim=1)
        
        # Create the feature embedding for the Center Loss
        closs_output = self.linear_closs(output)
        closs_output = self.relu_closs(closs_output)

        return closs_output, label_output, embedding

def init_weights(m):
    if type(m) == nn.Conv2d or type(m) == nn.Linear:
        torch.nn.init.xavier_normal_(m.weight.data)

In [8]:
def train(model, data_loader, test_loader, task='Classification'):
    model.train()

    for epoch in range(numEpochs):
        avg_loss = 0.0
        for batch_num, (feats, labels) in enumerate(data_loader):
            feats, labels = feats.to(device), labels.to(device)
            
            optimizer_label.zero_grad()
            optimizer_closs.zero_grad()
            
            feature, outputs, embedding = model(feats)

            l_loss = criterion_label(outputs, labels.long())
            c_loss = criterion_closs(feature, labels.long())
            loss = l_loss + closs_weight * c_loss
            
            loss.backward()
            
            optimizer_label.step()
            # by doing so, weight_cent would not impact on the learning of centers
            for param in criterion_closs.parameters():
                param.grad.data *= (1. / closs_weight)
            optimizer_closs.step()

            print('batch completed ' + str(batch_num / 822154 * 128 * 100) + '% ', end='\r')
            
            avg_loss += loss.item()
            if batch_num % 100 == 99:
                print('Epoch: {}\tBatch: {}\tAvg-Loss: {:.4f}'.format(epoch+1, batch_num+1, avg_loss/100))
                avg_loss = 0.0
            
            torch.cuda.empty_cache()
            del feats
            del labels
            del loss
        
        if task == 'Classification':
            val_loss, val_acc = test_classify_closs(model, test_loader)
            #train_loss, train_acc = test_classify_closs(model, data_loader)
            print('Val Loss: {:.4f}\tVal Accuracy: {:.4f}'.format(val_loss, val_acc))
        #else:
            #test_verify(model, test_loader)

def test_classify_closs(model, test_loader):
    model.eval()
    test_loss = []
    accuracy = 0
    total = 0

    for batch_num, (feats, labels) in enumerate(test_loader):
        feats, labels = feats.to(device), labels.to(device)
        feature, outputs, embedding = model(feats)
        
        _, pred_labels = torch.max(F.softmax(outputs, dim=1), 1)
        pred_labels = pred_labels.view(-1)
        
        l_loss = criterion_label(outputs, labels.long())
        c_loss = criterion_closs(feature, labels.long())
        loss = l_loss + closs_weight * c_loss
        
        accuracy += torch.sum(torch.eq(pred_labels, labels)).item()
        total += len(labels)
        test_loss.extend([loss.item()]*feats.size()[0])
        del feats
        del labels

    model.train()
    return np.mean(test_loss), accuracy/total

In [16]:
device = torch.device("cuda" if cuda else "cpu")

numEpochs = 5
closs_weight = 0.003
num_feats = 3
learningRate = 3e-4
lr_cent = 0.5
feat_dim = 2

cnn=Network(feat_dim)
cnn.apply(init_weights)

criterion_label = nn.CrossEntropyLoss()
criterion_closs = CenterLoss(2300, feat_dim, device)
optimizer_label = torch.optim.Adam(cnn.parameters(),lr=learningRate)
optimizer_closs = torch.optim.SGD(criterion_closs.parameters(), lr=lr_cent)

In [67]:
numEpochs = 1
cnn.train()
cnn.to(device)
train(cnn, train_loader, eval_loader)

Epoch: 1	Batch: 100	Avg-Loss: 0.2955 
Epoch: 1	Batch: 200	Avg-Loss: 0.2515
Epoch: 1	Batch: 300	Avg-Loss: 0.2654
Epoch: 1	Batch: 400	Avg-Loss: 0.2614
Epoch: 1	Batch: 500	Avg-Loss: 0.2689
Epoch: 1	Batch: 600	Avg-Loss: 0.2759
Epoch: 1	Batch: 700	Avg-Loss: 0.2948
Epoch: 1	Batch: 800	Avg-Loss: 0.2907
Epoch: 1	Batch: 900	Avg-Loss: 0.2902
Epoch: 1	Batch: 1000	Avg-Loss: 0.3107
Epoch: 1	Batch: 1100	Avg-Loss: 0.3147
Epoch: 1	Batch: 1200	Avg-Loss: 0.3267
Epoch: 1	Batch: 1300	Avg-Loss: 0.3171
Epoch: 1	Batch: 1400	Avg-Loss: 0.3061
Epoch: 1	Batch: 1500	Avg-Loss: 0.3215
Epoch: 1	Batch: 1600	Avg-Loss: 0.3264
Epoch: 1	Batch: 1700	Avg-Loss: 0.3299
Epoch: 1	Batch: 1800	Avg-Loss: 0.3349
Epoch: 1	Batch: 1900	Avg-Loss: 0.3374
Epoch: 1	Batch: 2000	Avg-Loss: 0.3204
Epoch: 1	Batch: 2100	Avg-Loss: 0.3568
Epoch: 1	Batch: 2200	Avg-Loss: 0.3571
Epoch: 1	Batch: 2300	Avg-Loss: 0.3442
Epoch: 1	Batch: 2400	Avg-Loss: 0.3544
Epoch: 1	Batch: 2500	Avg-Loss: 0.3540
Epoch: 1	Batch: 2600	Avg-Loss: 0.3379
Epoch: 1	Batch: 2700

In [76]:
numEpochs = 1
cnn.train()
cnn.to(device)
train(cnn, train_loader, eval_loader)

Epoch: 1	Batch: 100	Avg-Loss: 0.2124 
Epoch: 1	Batch: 200	Avg-Loss: 0.1867
Epoch: 1	Batch: 300	Avg-Loss: 0.1820
Epoch: 1	Batch: 400	Avg-Loss: 0.1736
Epoch: 1	Batch: 500	Avg-Loss: 0.1847
Epoch: 1	Batch: 600	Avg-Loss: 0.1923
Epoch: 1	Batch: 700	Avg-Loss: 0.1832
Epoch: 1	Batch: 800	Avg-Loss: 0.1945
Epoch: 1	Batch: 900	Avg-Loss: 0.2063
Epoch: 1	Batch: 1000	Avg-Loss: 0.2024
Epoch: 1	Batch: 1100	Avg-Loss: 0.2164
Epoch: 1	Batch: 1200	Avg-Loss: 0.2238
Epoch: 1	Batch: 1300	Avg-Loss: 0.2338
Epoch: 1	Batch: 1400	Avg-Loss: 0.2288
Epoch: 1	Batch: 1500	Avg-Loss: 0.2468
Epoch: 1	Batch: 1600	Avg-Loss: 0.2446
Epoch: 1	Batch: 1700	Avg-Loss: 0.2385
Epoch: 1	Batch: 1800	Avg-Loss: 0.2517
Epoch: 1	Batch: 1900	Avg-Loss: 0.2449
Epoch: 1	Batch: 2000	Avg-Loss: 0.2495
Epoch: 1	Batch: 2100	Avg-Loss: 0.2525
Epoch: 1	Batch: 2200	Avg-Loss: 0.2684
Epoch: 1	Batch: 2300	Avg-Loss: 0.2456
Epoch: 1	Batch: 2400	Avg-Loss: 0.2682
Epoch: 1	Batch: 2500	Avg-Loss: 0.2504
Epoch: 1	Batch: 2600	Avg-Loss: 0.2623
Epoch: 1	Batch: 2700

In [10]:
#LR=3e-5
numEpochs = 1
cnn.train()
cnn.to(device)
train(cnn, train_loader, eval_loader)

Epoch: 1	Batch: 100	Avg-Loss: 0.1412 
Epoch: 1	Batch: 200	Avg-Loss: 0.1242
Epoch: 1	Batch: 300	Avg-Loss: 0.1050
Epoch: 1	Batch: 400	Avg-Loss: 0.1014
Epoch: 1	Batch: 500	Avg-Loss: 0.0939
Epoch: 1	Batch: 600	Avg-Loss: 0.0866
Epoch: 1	Batch: 700	Avg-Loss: 0.0814
Epoch: 1	Batch: 800	Avg-Loss: 0.0773
Epoch: 1	Batch: 900	Avg-Loss: 0.0777
Epoch: 1	Batch: 1000	Avg-Loss: 0.0758
Epoch: 1	Batch: 1100	Avg-Loss: 0.0735
Epoch: 1	Batch: 1200	Avg-Loss: 0.0733
Epoch: 1	Batch: 1300	Avg-Loss: 0.0701
Epoch: 1	Batch: 1400	Avg-Loss: 0.0725
Epoch: 1	Batch: 1500	Avg-Loss: 0.0712
Epoch: 1	Batch: 1600	Avg-Loss: 0.0683
Epoch: 1	Batch: 1700	Avg-Loss: 0.0681
Epoch: 1	Batch: 1800	Avg-Loss: 0.0601
Epoch: 1	Batch: 1900	Avg-Loss: 0.0631
Epoch: 1	Batch: 2000	Avg-Loss: 0.0608
Epoch: 1	Batch: 2100	Avg-Loss: 0.0626
Epoch: 1	Batch: 2200	Avg-Loss: 0.0634
Epoch: 1	Batch: 2300	Avg-Loss: 0.0598
Epoch: 1	Batch: 2400	Avg-Loss: 0.0604
Epoch: 1	Batch: 2500	Avg-Loss: 0.0564
Epoch: 1	Batch: 2600	Avg-Loss: 0.0611
Epoch: 1	Batch: 2700

In [16]:
#LR=3e-6
numEpochs = 1
cnn.train()
cnn.to(device)
train(cnn, train_loader, eval_loader)

Epoch: 1	Batch: 100	Avg-Loss: 0.0332 
Epoch: 1	Batch: 200	Avg-Loss: 0.0325
Epoch: 1	Batch: 300	Avg-Loss: 0.0332
Epoch: 1	Batch: 400	Avg-Loss: 0.0312
Epoch: 1	Batch: 500	Avg-Loss: 0.0309
Epoch: 1	Batch: 600	Avg-Loss: 0.0320
Epoch: 1	Batch: 700	Avg-Loss: 0.0295
Epoch: 1	Batch: 800	Avg-Loss: 0.0289
Epoch: 1	Batch: 900	Avg-Loss: 0.0287
Epoch: 1	Batch: 1000	Avg-Loss: 0.0313
Epoch: 1	Batch: 1100	Avg-Loss: 0.0295
Epoch: 1	Batch: 1200	Avg-Loss: 0.0276
Epoch: 1	Batch: 1300	Avg-Loss: 0.0273
Epoch: 1	Batch: 1400	Avg-Loss: 0.0296
Epoch: 1	Batch: 1500	Avg-Loss: 0.0261
Epoch: 1	Batch: 1600	Avg-Loss: 0.0263
Epoch: 1	Batch: 1700	Avg-Loss: 0.0275
Epoch: 1	Batch: 1800	Avg-Loss: 0.0279
Epoch: 1	Batch: 1900	Avg-Loss: 0.0271
Epoch: 1	Batch: 2000	Avg-Loss: 0.0261
Epoch: 1	Batch: 2100	Avg-Loss: 0.0288
Epoch: 1	Batch: 2200	Avg-Loss: 0.0244
Epoch: 1	Batch: 2300	Avg-Loss: 0.0253
Epoch: 1	Batch: 2400	Avg-Loss: 0.0252
Epoch: 1	Batch: 2500	Avg-Loss: 0.0257
Epoch: 1	Batch: 2600	Avg-Loss: 0.0273
Epoch: 1	Batch: 2700

In [10]:
#LR=3e-6
numEpochs = 1
cnn.train()
cnn.to(device)
train(cnn, train_loader, eval_loader)

Epoch: 1	Batch: 100	Avg-Loss: 0.0275 
Epoch: 1	Batch: 200	Avg-Loss: 0.0280
Epoch: 1	Batch: 300	Avg-Loss: 0.0272
Epoch: 1	Batch: 400	Avg-Loss: 0.0270
Epoch: 1	Batch: 500	Avg-Loss: 0.0291
Epoch: 1	Batch: 600	Avg-Loss: 0.0299
Epoch: 1	Batch: 700	Avg-Loss: 0.0267
Epoch: 1	Batch: 800	Avg-Loss: 0.0263
Epoch: 1	Batch: 900	Avg-Loss: 0.0271
Epoch: 1	Batch: 1000	Avg-Loss: 0.0262
Epoch: 1	Batch: 1100	Avg-Loss: 0.0248
Epoch: 1	Batch: 1200	Avg-Loss: 0.0248
Epoch: 1	Batch: 1300	Avg-Loss: 0.0284
Epoch: 1	Batch: 1400	Avg-Loss: 0.0239
Epoch: 1	Batch: 1500	Avg-Loss: 0.0238
Epoch: 1	Batch: 1600	Avg-Loss: 0.0236
Epoch: 1	Batch: 1700	Avg-Loss: 0.0248
Epoch: 1	Batch: 1800	Avg-Loss: 0.0248
Epoch: 1	Batch: 1900	Avg-Loss: 0.0240
Epoch: 1	Batch: 2000	Avg-Loss: 0.0237
Epoch: 1	Batch: 2100	Avg-Loss: 0.0232
Epoch: 1	Batch: 2200	Avg-Loss: 0.0230
Epoch: 1	Batch: 2300	Avg-Loss: 0.0255
Epoch: 1	Batch: 2400	Avg-Loss: 0.0226
Epoch: 1	Batch: 2500	Avg-Loss: 0.0229
Epoch: 1	Batch: 2600	Avg-Loss: 0.0243
Epoch: 1	Batch: 2700

In [13]:
#lr=3e-6
numEpochs = 1
cnn.train()
cnn.to(device)
train(cnn, train_loader, eval_loader)

Epoch: 1	Batch: 100	Avg-Loss: 0.0212 
Epoch: 1	Batch: 200	Avg-Loss: 0.0192
Epoch: 1	Batch: 300	Avg-Loss: 0.0204
Epoch: 1	Batch: 400	Avg-Loss: 0.0196
Epoch: 1	Batch: 500	Avg-Loss: 0.0205
Epoch: 1	Batch: 600	Avg-Loss: 0.0200
Epoch: 1	Batch: 700	Avg-Loss: 0.0207
Epoch: 1	Batch: 800	Avg-Loss: 0.0195
Epoch: 1	Batch: 900	Avg-Loss: 0.0199
Epoch: 1	Batch: 1000	Avg-Loss: 0.0206
Epoch: 1	Batch: 1100	Avg-Loss: 0.0205
Epoch: 1	Batch: 1200	Avg-Loss: 0.0213
Epoch: 1	Batch: 1300	Avg-Loss: 0.0202
Epoch: 1	Batch: 1400	Avg-Loss: 0.0211
Epoch: 1	Batch: 1500	Avg-Loss: 0.0205
Epoch: 1	Batch: 1600	Avg-Loss: 0.0189
Epoch: 1	Batch: 1700	Avg-Loss: 0.0215
Epoch: 1	Batch: 1800	Avg-Loss: 0.0199
Epoch: 1	Batch: 1900	Avg-Loss: 0.0199
Epoch: 1	Batch: 2000	Avg-Loss: 0.0207
Epoch: 1	Batch: 2100	Avg-Loss: 0.0214
Epoch: 1	Batch: 2200	Avg-Loss: 0.0220
Epoch: 1	Batch: 2300	Avg-Loss: 0.0208
Epoch: 1	Batch: 2400	Avg-Loss: 0.0211
Epoch: 1	Batch: 2500	Avg-Loss: 0.0196
Epoch: 1	Batch: 2600	Avg-Loss: 0.0213
Epoch: 1	Batch: 2700

In [14]:
class TestDataset(Dataset):
    def __init__(self, file_list):
        self.file_list = file_list

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, index):
        img = Image.open(self.file_list[index])
        img = torchvision.transforms.ToTensor()(img)
        return img

img_sequence=[]
def parse_data(datadir):
    img_list = []
    for root, directories, filenames in os.walk(datadir):
        for filename in filenames:
            img_sequence.append(int(filename[0:4]))
            if filename.endswith('.jpg'):
                filei = os.path.join(root, filename)
                img_list.append(filei)

    print('{}{}'.format('#Images', len(img_list)))
    return img_list

img_list = parse_data('test_classification/medium')
testset = TestDataset(img_list)
test_loader_args = dict(shuffle=False, batch_size=256, pin_memory=True, drop_last=False) 
test_loader = DataLoader(testset, **test_loader_args)

#Images4600


In [15]:
def predict(model, test_loader):
  
  with torch.no_grad():
    model.eval()
    model.to(device)
    
    prediction = torch.LongTensor().to(device)

    for batch_num, feats in enumerate(test_loader):
        feats = feats.to(device)
        outputs = model(feats)[1]
        
        _, pred_labels = torch.max(F.softmax(outputs, dim=1), 1)
        pred_labels = pred_labels.view(-1)
        for label_index in pred_labels:
            label = torch.LongTensor([int(trainset.classes[label_index.item()])]).to(device)
            prediction = torch.cat((prediction, label), dim=0)
   
    return prediction

pred = predict(cnn, test_loader)

import pandas as pd
result = pd.DataFrame()
result['id'] = img_sequence
result['label'] = pred.cpu().numpy()
result_sorted = result.sort_values(by=['id'])
result_sorted.to_csv('/home/ubuntu/zichenli_hw2_classification_21.csv', encoding='utf-8', index=False)

In [19]:
torch.save(cnn, 'resnetnet_best.pth')

  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
