# HW-2
___

* Custom Dataset & DataLoader
* Torchvision ImageFolder Dataset
* Residual Block
* CNN model with Residual Block
* Loss Functions (Center Loss and Triplet Loss)

## Imports

In [1]:
import os
import numpy as np
from PIL import Image

import torch
import torchvision   
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

## Custom DataSet with DataLoader
___
We have used a subset of the data given for the Face Classification and Verification problem in Part 2 of the homework

In [2]:
class ImageDataset(Dataset):
    def __init__(self, file_list, target_list):
        self.file_list = file_list
        self.target_list = target_list
        self.n_class = len(list(set(target_list)))

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, index):
        img = Image.open(self.file_list[index])
        img = torchvision.transforms.ToTensor()(img)
        label = self.target_list[index]
        return img, label

#### Parse the given directory to accumulate all the images

In [3]:
def parse_data(datadir):
    img_list = []
    ID_list = []
    for root, directories, filenames in os.walk(datadir):  #root: median/1
        for filename in filenames:
            if filename.endswith('.jpg'):
                filei = os.path.join(root, filename)
                img_list.append(filei)
                ID_list.append(root.split('/')[-1])

    # construct a dictionary, where key and value correspond to ID and target
    uniqueID_list = list(set(ID_list))
    class_n = len(uniqueID_list)
    target_dict = dict(zip(uniqueID_list, range(class_n)))
    label_list = [target_dict[ID_key] for ID_key in ID_list]#print 有问题

    print('{}\t\t{}\n{}\t\t{}'.format('#Images', '#Labels', len(img_list), len(set(label_list))))
    return img_list, label_list, class_n

In [4]:
img_list, label_list, class_n = parse_data('hw2p2dataset/train_data/medium')

#Images		#Labels
822155		2300


In [5]:
print(img_list[1888])

hw2p2dataset/train_data/medium\1001\37.jpg


In [6]:
trainset = ImageDataset(img_list, label_list)

In [7]:
train_data_item, train_data_label = trainset.__getitem__(0)

In [8]:
print('data item shape: {}\t data item label: {}'.format(train_data_item.shape, train_data_label))

data item shape: torch.Size([3, 32, 32])	 data item label: 1957


In [9]:
dataloader = DataLoader(trainset, batch_size=10, shuffle=True, num_workers=1, drop_last=False)

In [11]:
for batch_num, (feats, labels) in enumerate(dataloader):
    print(batch_num,type(feats),type(labels))

BrokenPipeError: [Errno 32] Broken pipe

## Torchvision DataSet and DataLoader

In [None]:
imageFolder_dataset = torchvision.datasets.ImageFolder(root='hw2p2dataset/train_data/medium', 
                                                       transform=torchvision.transforms.ToTensor())

In [None]:
imageFolder_dataloader = DataLoader(imageFolder_dataset, batch_size=10, shuffle=True, num_workers=1)

In [None]:
imageFolder_dataset.__len__(), len(imageFolder_dataset.classes)

In [None]:
# imageFolder_dataset.classes

## Residual Block

Resnet: https://arxiv.org/pdf/1512.03385.pdf

Here is a basic usage of shortcut in Resnet

In [None]:
class BasicBlock(nn.Module):

    def __init__(self, channel_size, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(channel_size, channel_size, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(channel_size)
        self.shortcut = nn.Conv2d(channel_size, channel_size, kernel_size=1, stride=stride, bias=False)


    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn1(self.conv1(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

## CNN Model with Residual Block 

In [None]:
# [num_feats] + hidden_sizes + [num_classes]

In [None]:
# hidden_sizes

In [None]:
class Network(nn.Module):
    def __init__(self, num_feats, hidden_sizes, num_classes, feat_dim=10):
        super(Network, self).__init__()
        
        self.hidden_sizes = [num_feats] + hidden_sizes + [num_classes]#[3,32,64,2300]
        
        self.layers = []
        self.layers.append(nn.Conv2d(in_channels=3, 
                                     out_channels=64, 
                                     kernel_size=5, stride=1,padding=2, bias=False))
        self.layers.append(nn.ReLU(inplace=True))
#         self.layers.append(BasicBlock(channel_size = 64))
        self.layers.append(nn.MaxPool2d(kernel_size=3, stride=2,padding=1))
         #hidden layer2
        self.layers.append(nn.Conv2d(in_channels=64, 
                                     out_channels=192, 
                                     kernel_size=5, stride=1,padding=2, bias=False))
        self.layers.append(nn.ReLU(inplace=True))
#         self.layers.append(BasicBlock(channel_size = 192))
        self.layers.append(nn.MaxPool2d(kernel_size=3, stride=2,padding=1))
        
          #hidden layer3
        self.layers.append(nn.Conv2d(in_channels=192, 
                                     out_channels=384, 
                                     kernel_size=5, stride=1,padding=2, bias=False))
        self.layers.append(nn.ReLU(inplace=True))
#         self.layers.append(BasicBlock(channel_size = 384))
        
        #hidden layer4
        self.layers.append(nn.Conv2d(in_channels=384, 
                                     out_channels=256, 
                                     kernel_size=3, stride=1,padding=1, bias=False))
        self.layers.append(nn.ReLU(inplace=True))
#         self.layers.append(BasicBlock(channel_size = 256))
        #hidden layer5
        self.layers.append(nn.Conv2d(in_channels=256, 
                                     out_channels=256, 
                                     kernel_size=3, stride=1,padding=1, bias=False))
        self.layers.append(nn.ReLU(inplace=True))
#         self.layers.append(BasicBlock(channel_size = 256))
        self.layers.append(nn.MaxPool2d(kernel_size=3, stride=2,padding=1))
        
        self.layers = nn.Sequential(*self.layers)
        self.linear_label1 = nn.Linear(4096, 4096, bias=False)
        self.linear_label2 = nn.Linear(4096, 4096, bias=False)
        self.linear_label3 = nn.Linear(4096, 2300, bias=False)
        
        # For creating the embedding to be passed into the Center Loss criterion
        self.linear_closs = nn.Linear(4096, feat_dim, bias=False)
        self.relu_closs = nn.ReLU(inplace=True)
    
    def forward(self, x, evalMode=False):
        output = x
#         print('shape1',output.shape)
        output = self.layers(output)            
#         output = F.MaxPool2d(output, [output.size(2), output.size(3)], stride=1)
#         print('shape',output.shape)
        output = output.view(output.shape[0], -1)
        output=self.linear_label1(output)
        output=self.linear_label2(output)
        label_output=self.linear_label3(output)
        label_output = label_output/torch.norm(self.linear_label3.weight, dim=1)
        
        # Create the feature embedding for the Center Loss
        closs_output = self.linear_closs(output)
        closs_output = self.relu_closs(closs_output)

        return closs_output, label_output

def init_weights(m):
    if type(m) == nn.Conv2d or type(m) == nn.Linear:
        torch.nn.init.xavier_normal_(m.weight.data)

In [None]:
# #test linear layer input
# import torch
# model=Network(3,5)
# data_input = model(torch.randn([1,3, 32, 32])) # 这里假设输入图片是96x96
# print (data_input.size())

### Training & Testing Model

In [None]:
def train(model, data_loader, test_loader, task='Classification'):
    model.train()

    for epoch in range(numEpochs):
        avg_loss = 0.0
        for batch_num, (feats, labels) in enumerate(data_loader):
            feats, labels = feats.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(feats)[1]

            loss = criterion(outputs, labels.long())
            loss.backward()
            optimizer.step()
            
            avg_loss += loss.item()

            if batch_num % 50 == 49:
                print('Epoch: {}\tBatch: {}\tAvg-Loss: {:.4f}'.format(epoch+1, batch_num+1, avg_loss/50))
                avg_loss = 0.0    
            
            torch.cuda.empty_cache()
            del feats
            del labels
            del loss
        
        if task == 'Classification':
            val_loss, val_acc = test_classify(model, test_loader)
            train_loss, train_acc = test_classify(model, data_loader)
            print('Train Loss: {:.4f}\tTrain Accuracy: {:.4f}\tVal Loss: {:.4f}\tVal Accuracy: {:.4f}'.
                  format(train_loss, train_acc, val_loss, val_acc))
        else:
            test_verify(model, test_loader)


def test_classify(model, test_loader):
    model.eval()
    test_loss = []
    accuracy = 0
    total = 0

    for batch_num, (feats, labels) in enumerate(test_loader):
        feats, labels = feats.to(device), labels.to(device)
        print('feats',feats)
        outputs = model(feats)[1]
        
        _, pred_labels = torch.max(F.softmax(outputs, dim=1), 1)
        pred_labels = pred_labels.view(-1)
        
        loss = criterion(outputs, labels.long())
        
        accuracy += torch.sum(torch.eq(pred_labels, labels)).item()
        total += len(labels)
        test_loss.extend([loss.item()]*feats.size()[0])
        del feats
        del labels

    model.train()
    return np.mean(test_loss), accuracy/total


def test_verify(model, test_loader):
    raise NotImplementedError

#### Dataset, DataLoader and Constant Declarations

In [None]:
train_dataset = torchvision.datasets.ImageFolder(root='hw2p2dataset/train_data/medium', 
                                                 transform=torchvision.transforms.ToTensor())
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=256, 
                                               shuffle=True, num_workers=8)

dev_dataset = torchvision.datasets.ImageFolder(root='hw2p2dataset/validation_classification/medium', 
                                               transform=torchvision.transforms.ToTensor())
dev_dataloader = torch.utils.data.DataLoader(dev_dataset, batch_size=256, 
                                             shuffle=True, num_workers=8)

In [None]:
for batch_num, (feats, labels) in enumerate(train_dataloader):
#     feats, labels = feats.to(device), labels.to(device)
    print(batch_num,'feats',feats.shape,'label',labels)
#         outputs = model(feats)[1]

In [None]:
numEpochs = 4
num_feats = 3

learningRate = 1e-2
weightDecay = 5e-5

hidden_sizes = [32, 64]
num_classes = len(train_dataset.classes)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
# network = Network(num_feats, hidden_sizes, num_classes)
network = Network(num_feats,hidden_sizes, num_classes)
# network.apply(init_weights)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(network.parameters(), lr=learningRate, weight_decay=weightDecay, momentum=0.9)

In [None]:
network.train()
network.to(device)
train(network, train_dataloader, dev_dataloader)

In [None]:
len(train_dataset)

In [None]:
train_dataset[6][1]

## Center Loss
___
The following piece of code for Center Loss has been pulled and modified based on the code from the GitHub Repo: https://github.com/KaiyangZhou/pytorch-center-loss
    
<b>Reference:</b>
<i>Wen et al. A Discriminative Feature Learning Approach for Deep Face Recognition. ECCV 2016.</i>

In [None]:
class CenterLoss(nn.Module):
    """
    Args:
        num_classes (int): number of classes.
        feat_dim (int): feature dimension.
    """
    def __init__(self, num_classes, feat_dim, device=torch.device('cpu')):
        super(CenterLoss, self).__init__()
        self.num_classes = num_classes
        self.feat_dim = feat_dim
        self.device = device
        
        self.centers = nn.Parameter(torch.randn(self.num_classes, self.feat_dim).to(self.device))

    def forward(self, x, labels):
        """
        Args:
            x: feature matrix with shape (batch_size, feat_dim).
            labels: ground truth labels with shape (batch_size).
        """
        batch_size = x.size(0)
        distmat = torch.pow(x, 2).sum(dim=1, keepdim=True).expand(batch_size, self.num_classes) + \
                  torch.pow(self.centers, 2).sum(dim=1, keepdim=True).expand(self.num_classes, batch_size).t()
        distmat.addmm_(1, -2, x, self.centers.t())

        classes = torch.arange(self.num_classes).long().to(self.device)
        labels = labels.unsqueeze(1).expand(batch_size, self.num_classes)
        mask = labels.eq(classes.expand(batch_size, self.num_classes))

        dist = []
        for i in range(batch_size):
            value = distmat[i][mask[i]]
            value = value.clamp(min=1e-12, max=1e+12) # for numerical stability
            dist.append(value)
        dist = torch.cat(dist)
        loss = dist.mean()

        return loss

In [None]:
def train_closs(model, data_loader, test_loader, task='Classification'):
    model.train()

    for epoch in range(numEpochs):
        avg_loss = 0.0
        for batch_num, (feats, labels) in enumerate(data_loader):
            
            feats, labels = feats.to(device), labels.to(device)
            
            optimizer_label.zero_grad()
            optimizer_closs.zero_grad()
            
            feature, outputs = model(feats)

            l_loss = criterion_label(outputs, labels.long())
            c_loss = criterion_closs(feature, labels.long())
            loss = l_loss + closs_weight * c_loss
            
            loss.backward()
            
            optimizer_label.step()
            # by doing so, weight_cent would not impact on the learning of centers
            for param in criterion_closs.parameters():
                param.grad.data *= (1. / closs_weight)
            optimizer_closs.step()
            
            avg_loss += loss.item()

            if batch_num % 50 == 49:
                print('Epoch: {}\tBatch: {}\tAvg-Loss: {:.4f}'.format(epoch+1, batch_num+1, avg_loss/50))
                avg_loss = 0.0    
            
            torch.cuda.empty_cache()
            del feats
            del labels
            del loss
        
        if task == 'Classification':
            val_loss, val_acc = test_classify_closs(model, test_loader)
            train_loss, train_acc = test_classify_closs(model, data_loader)
            print('Train Loss: {:.4f}\tTrain Accuracy: {:.4f}\tVal Loss: {:.4f}\tVal Accuracy: {:.4f}'.
                  format(train_loss, train_acc, val_loss, val_acc))
        else:
            test_verify(model, test_loader)


def test_classify_closs(model, test_loader):
    model.eval()
    test_loss = []
    accuracy = 0
    total = 0

    for batch_num, (feats, labels) in enumerate(test_loader):
        feats, labels = feats.to(device), labels.to(device)
        feature, outputs = model(feats)
        
        _, pred_labels = torch.max(F.softmax(outputs, dim=1), 1)
        pred_labels = pred_labels.view(-1)
        
        l_loss = criterion_label(outputs, labels.long())
        c_loss = criterion_closs(feature, labels.long())
        loss = l_loss + closs_weight * c_loss
        
        accuracy += torch.sum(torch.eq(pred_labels, labels)).item()
        total += len(labels)
        test_loss.extend([loss.item()]*feats.size()[0])
        del feats
        del labels

    model.train()
    return np.mean(test_loss), accuracy/total

In [None]:
i=0
for batch_num, (feats, labels) in enumerate(train_dataset):
    print(batch_num, feats.shape, labels)
    i+=1
    if (i==12):
        break        

In [None]:
closs_weight = 1
lr_cent = 0.5
feat_dim = 10

network = Network(num_feats, hidden_sizes, num_classes, feat_dim)
network.apply(init_weights)

criterion_label = nn.CrossEntropyLoss()
criterion_closs = CenterLoss(num_classes, feat_dim, device)
optimizer_label = torch.optim.SGD(network.parameters(), lr=learningRate, weight_decay=weightDecay, momentum=0.9)
                 optimizer_closs = torch.optim.SGD(criterion_closs.parameters(), lr=lr_cent)

In [None]:
network.train()
network.to(device)
train_closs(network, train_dataloader, dev_dataloader)