## ML HW2 sample code

#### Import packages

In [1]:
# references: https://clay-atlas.com/us/blog/2021/08/25/pytorch-en-early-stopping/
# references: https://chih-sheng-huang821.medium.com/03-pytorch-dataaug-a712a7a7f55e

import os
import random
import glob
import csv
import torch
import torch.nn as nn
import numpy as np
import pandas as pd

from torch.optim import Adam
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image

In [2]:
!gdown 1drrS7gnyzUJPPiQcDWcHdIXqzjy2n3yZ
!unzip 'HW2.zip'

[1;30;43m串流輸出內容已截斷至最後 5000 行。[0m
 extracting: data/train/35561.jpg    
 extracting: data/train/25855.jpg    
 extracting: data/train/19327.jpg    
 extracting: data/train/26948.jpg    
 extracting: data/train/11690.jpg    
 extracting: data/train/16673.jpg    
 extracting: data/train/11974.jpg    
 extracting: data/train/29951.jpg    
 extracting: data/train/19225.jpg    
 extracting: data/train/16735.jpg    
 extracting: data/train/29258.jpg    
 extracting: data/train/34324.jpg    
 extracting: data/train/23429.jpg    
 extracting: data/train/32778.jpg    
 extracting: data/train/18948.jpg    
 extracting: data/train/16266.jpg    
 extracting: data/train/32922.jpg    
 extracting: data/train/19090.jpg    
 extracting: data/train/11971.jpg    
 extracting: data/train/32888.jpg    
 extracting: data/train/23236.jpg    
 extracting: data/train/10155.jpg    
 extracting: data/train/26199.jpg    
 extracting: data/train/28311.jpg    
 extracting: data/train/18732.jpg    
 extracting: da

#### Set arguments and random seed

In [1]:
!nvidia-smi

Mon Oct 24 02:01:48 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   36C    P8     9W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [18]:
TRA_PATH = 'data/train/'
TST_PATH = 'data/test/'
LABEL_PATH = 'data/train.csv'
DEVICE_ID = 0
SEED = 5566
NUM_ECPOCH = 100

torch.cuda.set_device(DEVICE_ID)
use_gpu = torch.cuda.is_available()
device = torch.device("cuda" if use_gpu else "cpu")

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
random.seed(SEED)
np.random.seed(SEED)

#### Process data

In [4]:
def load_train_data(img_path, label_path, valid_ratio=0.12):
    train_label = pd.read_csv(label_path)['label'].values.tolist()
    train_image = [f'{img_path}/{i+10000}.jpg' for i in range(len(train_label)-1)]
    
    train_data = list(zip(train_image, train_label))
    random.shuffle(train_data)
    
    split_len = int(len(train_data) * valid_ratio)
    train_set = train_data[split_len:]
    valid_set = train_data[:split_len]
    
    return train_set, valid_set

def load_test_data(img_path):
    test_set = [f'{img_path}/{i}.jpg' for i in range(7000, 10000)]
    return test_set
    
def compute_statistics(dataset):
    data = []
    for (img_path, label) in dataset:
        data.append(np.array(Image.open(img_path)))
    data = np.array(data)
    return data.mean(), data.std()

In [5]:
train_set, valid_set = load_train_data(TRA_PATH, LABEL_PATH)
test_set = load_test_data(TST_PATH)
transform1 = transforms.Compose([transforms.RandomHorizontalFlip(p=0.5)])
transform2 = transforms.Compose([transforms.RandomRotation(10, center=(36,32)), transforms.CenterCrop(54), transforms.Resize((64,64)), transforms.RandomHorizontalFlip(p=0.5)])
transform3 = transforms.Compose([transforms.RandomRotation(10, center=(28,32)), transforms.CenterCrop(54), transforms.Resize((64,64)), transforms.RandomHorizontalFlip(p=0.5)])
transform4 = transforms.Compose([transforms.RandomRotation(10, center=(32,32)), transforms.RandomCrop((59,59)), transforms.Resize((64,64)), transforms.RandomHorizontalFlip(p=0.5)])
transform5 = transforms.Compose([transforms.RandomRotation(10, center=(32,32)), transforms.RandomCrop((54,54)), transforms.Resize((64,64)), transforms.RandomHorizontalFlip(p=0.5)])

#### Customize dataset

In [6]:
class FaceExpressionDataset(Dataset):
    def __init__(self, data, augment=None):
        self.data = data
        self.augment = augment

    def __len__(self):
        return len(self.data)
    
    def normalize(self, data):

      picture = data[0]
      a = torch.max(picture)
      b = torch.min(picture)
      if (a-b != 0):
        data[0] = (picture - b) / (a - b)
      else:
        data[0] = picture - picture
      return data

      
      picture = data[0]
      a = torch.mean(picture)
      b = torch.std(picture)
      if (b != 0):
        data[0] = (picture - a) / b
      else:
        data[0] = picture - picture
      return data


      data[0] = data[0] / 255
      return data

    
    def read_img(self, idx):
        img = Image.open(self.data[idx][0])
        if not self.augment is None:
            img = self.augment(img)
        img = torch.from_numpy(np.array(img)).float()
        img = img.unsqueeze(0).float()
        img = self.normalize(img)
        return img
    
    def __getitem__(self, idx):
        img = self.read_img(idx)
        label = self.data[idx][1]
        return img, label
    
class TestingDataset(Dataset):
    def __init__(self, data, augment=None):
        self.data = data
        self.augment = augment

    def __len__(self):
        return len(self.data)
    
    def normalize(self, data):

      picture = data[0]
      a = torch.max(picture)
      b = torch.min(picture)
      if (a-b != 0):
        data[0] = (picture - b) / (a - b)
      else:
        data[0] = picture - picture
      return data

      picture = data[0]
      a = torch.mean(picture)
      b = torch.std(picture)
      if (b != 0):
        data[0] = (picture - a) / b
      else:
        data[0] = picture - picture
      return data

      data[0] = data[0] / 255
      return data
    
    def read_img(self, idx):
        img = Image.open(self.data[idx])
        if not self.augment is None:
            img = self.augment(img)
        img = torch.from_numpy(np.array(img)).float()
        img = img.unsqueeze(0).float()
        img = self.normalize(img)
        return img, self.data[idx].split('/')[-1][:-4]
        
    def __getitem__(self, idx):
        img, name = self.read_img(idx)
        
        return img, name

In [19]:
train_dataset0 = FaceExpressionDataset(train_set, None)
train_dataset1 = FaceExpressionDataset(train_set, transform1)
train_dataset2 = FaceExpressionDataset(train_set, transform2)
train_dataset3 = FaceExpressionDataset(train_set, transform3)
train_dataset4 = FaceExpressionDataset(train_set, transform4)
train_dataset5 = FaceExpressionDataset(train_set, transform5)
# train_dataset = torch.utils.data.ConcatDataset([train_dataset1, train_dataset4, train_dataset5])
train_dataset = train_dataset0
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
print(len(train_dataset3))
print(len(train_dataset))

valid_dataset = FaceExpressionDataset(valid_set)
valid_loader = DataLoader(valid_dataset, batch_size=128, shuffle=False)

test_dataset = TestingDataset(test_set)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)  

print(len(train_dataset4.__getitem__(5)[0][0]), len(train_dataset4.__getitem__(5)[0][0][3]))
print(len(train_dataset3.__getitem__(5)[0][0]), len(train_dataset3.__getitem__(5)[0][0][3]))
print(train_dataset0.__getitem__(3)[0][0][0])
print(train_dataset0.__getitem__(3)[0][0][0])
print(train_dataset4.__getitem__(5)[0][0][32])
print(train_dataset5.__getitem__(5)[0][0][32])
print(train_dataset4.__getitem__(5)[0][0][63])
print(train_dataset5.__getitem__(5)[0][0][63])
print(test_dataset.__getitem__(1395)[0][0][32])

22780
22780
64 64
64 64
tensor([0.5000, 0.5714, 0.6270, 0.6151, 0.5714, 0.5595, 0.5913, 0.6230, 0.6190,
        0.6230, 0.6230, 0.6349, 0.6746, 0.7183, 0.7262, 0.7063, 0.7302, 0.7341,
        0.7817, 0.7897, 0.7421, 0.7381, 0.7857, 0.8016, 0.7937, 0.7381, 0.7698,
        0.8333, 0.8016, 0.7579, 0.8175, 0.9008, 0.9008, 0.8452, 0.7857, 0.7579,
        0.7421, 0.7381, 0.7619, 0.8016, 0.8294, 0.8095, 0.7778, 0.7698, 0.7460,
        0.6587, 0.6310, 0.6944, 0.7341, 0.7698, 0.8175, 0.8254, 0.7500, 0.6310,
        0.5437, 0.5159, 0.3730, 0.3492, 0.3968, 0.4405, 0.4484, 0.4960, 0.4841,
        0.3611])
tensor([0.5000, 0.5714, 0.6270, 0.6151, 0.5714, 0.5595, 0.5913, 0.6230, 0.6190,
        0.6230, 0.6230, 0.6349, 0.6746, 0.7183, 0.7262, 0.7063, 0.7302, 0.7341,
        0.7817, 0.7897, 0.7421, 0.7381, 0.7857, 0.8016, 0.7937, 0.7381, 0.7698,
        0.8333, 0.8016, 0.7579, 0.8175, 0.9008, 0.9008, 0.8452, 0.7857, 0.7579,
        0.7421, 0.7381, 0.7619, 0.8016, 0.8294, 0.8095, 0.7778, 0.7698, 0.7460,

#### Define module class

In [9]:
class FaceExpressionNet(nn.Module):
    def __init__(self):
        super(FaceExpressionNet, self).__init__()
        # TODO
        self.conv = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=3, padding=1),
            nn.BatchNorm2d(16, eps=1e-05, affine=True),
            nn.LeakyReLU(negative_slope=0.05),
            nn.MaxPool2d((2, 2)),
            nn.Conv2d(16, 32, kernel_size=3, padding=1), 
            nn.BatchNorm2d(32, eps=1e-05, affine=True),
            nn.LeakyReLU(negative_slope=0.05),
            nn.MaxPool2d((2, 2)), 
            nn.Conv2d(32, 64, kernel_size=3, padding=1), 
            nn.BatchNorm2d(64, eps=1e-05, affine=True),
            nn.LeakyReLU(negative_slope=0.05),
            nn.MaxPool2d((2, 2)), 
            nn.Conv2d(64, 32, kernel_size=3, padding=1), 
            nn.BatchNorm2d(32, eps=1e-05, affine=True),
            nn.LeakyReLU(negative_slope=0.05),
            nn.MaxPool2d((2, 2)), 
            nn.Conv2d(32, 16, kernel_size=3, padding=1), 
            nn.BatchNorm2d(16, eps=1e-05, affine=True),
            nn.LeakyReLU(negative_slope=0.05),
            nn.MaxPool2d((2, 2)), 
        )
        self.fc = nn.Sequential(
            nn.Linear(16*2*2, 7),
        )

    def forward(self, x):
        #image size (64,64)
        x = self.conv(x) #(32,32)
        x = x.flatten(start_dim=1)
        x = self.fc(x)
        return x

#### Define training and testing process

In [10]:
def train(train_loader, model, loss_fn, use_gpu=True):
    model.train()
    train_loss = []
    train_acc = []
    # print(len(train_loader))
    temp = 0
    for (img, label) in train_loader:
        # print(temp)
        temp += 1
        if use_gpu:
            img = img.to(device)
            label = label.to(device)
        optimizer.zero_grad()
        output = model(img)
        loss = loss_fn(output, label)
        loss.backward()            
        optimizer.step()
        with torch.no_grad():
            predict = torch.argmax(output, dim=-1)
            acc = np.mean((label == predict).cpu().numpy())
            train_acc.append(acc)
            train_loss.append(loss.item())
    print("Epoch: {}, train Loss: {:.4f}, train Acc: {:.4f}".format(epoch + 1, np.mean(train_loss), np.mean(train_acc)))
    return (np.mean(train_loss), np.mean(train_acc))
    
def valid(valid_loader, model, loss_fn, use_gpu=True):
    model.eval()
    with torch.no_grad():
        valid_loss = []
        valid_acc = []
        for idx, (img, label) in enumerate(valid_loader):
            if use_gpu:
                img = img.to(device)
                label = label.to(device)
            output = model(img)
            loss = loss_fn(output, label)
            predict = torch.argmax(output, dim=-1)
            acc = (label == predict).cpu().tolist()
            valid_loss.append(loss.item())
            valid_acc += acc
       
        valid_acc = np.mean(valid_acc)
        valid_loss = np.mean(valid_loss)
        print("Epoch: {}, valid Loss: {:.4f}, valid Acc: {:.4f}".format(epoch + 1, valid_loss, valid_acc))
    return (valid_loss, valid_acc)

def save_checkpoint(valid_acc, acc_record, epoch, prefix='model'):
    # you can define the condition to save model :)
    if valid_acc >= np.mean(acc_record[-5:]):    
        checkpoint_path = f'{prefix}.pth'
        torch.save(model.state_dict(), checkpoint_path)
        print('model saved to %s' % checkpoint_path)

def better(acc_record, los_record):
    if max(acc_record) == acc_record[-1]: return 1
    if min(los_record) == los_record[-1]: return 2
    if (los_record[-1] < los_record[-2]): return 2
    return 0

In [20]:
if __name__ == '__main__':
    model = FaceExpressionNet()
    if use_gpu:
        model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    loss_fn = nn.CrossEntropyLoss()
    
    train_acc_record = []
    train_los_record = []
    valid_acc_record = []
    valid_los_record = []
    stop = 10

    for epoch in range(NUM_ECPOCH):
        (train_loss, train_acc) = train(train_loader, model, loss_fn, use_gpu)
        (valid_loss, valid_acc) = valid(valid_loader, model, loss_fn, use_gpu=True)
        train_acc_record.append(train_acc)
        train_los_record.append(train_loss)
        valid_acc_record.append(valid_acc)
        valid_los_record.append(valid_loss)
        
        temp = better(valid_acc_record, valid_los_record)
        stop -= 1
        if (temp == 1):
            save_checkpoint(valid_acc, valid_acc_record, epoch, prefix='model')
            stop = min(10, stop+5)
        if (temp == 2):
            stop = min(10, stop+2)
        # if (stop == 0):
            # break
        
        print('########################################################')

Epoch: 1, train Loss: 1.5514, train Acc: 0.3948
Epoch: 1, valid Loss: 1.5102, valid Acc: 0.4214
model saved to model.pth
########################################################
Epoch: 2, train Loss: 1.2990, train Acc: 0.5031
Epoch: 2, valid Loss: 1.2776, valid Acc: 0.5058
model saved to model.pth
########################################################
Epoch: 3, train Loss: 1.1822, train Acc: 0.5500
Epoch: 3, valid Loss: 1.2103, valid Acc: 0.5415
model saved to model.pth
########################################################
Epoch: 4, train Loss: 1.1003, train Acc: 0.5829
Epoch: 4, valid Loss: 1.2001, valid Acc: 0.5509
model saved to model.pth
########################################################
Epoch: 5, train Loss: 1.0330, train Acc: 0.6103
Epoch: 5, valid Loss: 1.2047, valid Acc: 0.5464
########################################################
Epoch: 6, train Loss: 0.9604, train Acc: 0.6381
Epoch: 6, valid Loss: 1.2119, valid Acc: 0.5522
model saved to model.pth
##############

In [21]:
def confusion_matrix(valid_loader, model):
    model.eval()
    matrix = [[0 for j in range (7)] for i in range (7)]
    with torch.no_grad():
        for idx, (img, label) in enumerate(valid_loader):
            if use_gpu:
                img = img.to(device)
                label = label.to(device)
            output = model(img)
            predict = torch.argmax(output, dim=-1)
            for i in range (len(label)):
                matrix[label[i]][predict[i]] += 1
    print(matrix)
    for i in range (7):
        sum = 0
        for j in range (7):
            sum += matrix[i][j]
        if (sum == 0):
            continue
        for j in range (7):
            matrix[i][j] /= sum
    return matrix


print(train_acc_record)
print(train_los_record)
print(valid_acc_record)
print(valid_los_record)
del model
model = FaceExpressionNet()
model.load_state_dict(torch.load('model.pth'))
model = model.cuda()
print(confusion_matrix(valid_loader, model))
print(confusion_matrix(train_loader, model))

[0.39476061299383836, 0.5030723314606742, 0.5500407756433491, 0.5829416228706052, 0.6102697988401594, 0.6381430658753171, 0.6614857058716926, 0.6878879349401957, 0.7136912717470097, 0.7396418539325843, 0.7610023445994927, 0.7853303959768032, 0.8046492728343603, 0.8213304752627765, 0.8370262663102572, 0.8588214140086988, 0.8686500430409567, 0.8790973858281986, 0.8970131841246828, 0.9049162966654585, 0.9188734595868069, 0.9298375770206597, 0.9281272653135194, 0.9380564176332006, 0.9436687318774918, 0.948946912377673, 0.9585136145342515, 0.9568811729793403, 0.9553081959043132, 0.9560146905581731, 0.9541656283979704, 0.9604405468466837, 0.9661066622870605, 0.970322977075027, 0.9725628058173251, 0.9716807493657121, 0.9684824098405219, 0.9557046257702065, 0.9611527047843421, 0.9685234686480608, 0.9722966314787966, 0.9785319069409206, 0.9739673001993476, 0.9768173477709314, 0.9728714547843421, 0.9652274374773469, 0.961324019119246, 0.9669887187386734, 0.9729592356832185, 0.9793247666727076, 0

In [22]:
def test(test_loader, model, file_name='predict.csv'):
    with torch.no_grad():
        predict_result = []
        predict_name = []
        for img, name in test_loader:
            if use_gpu:
                img = img.to(device)
            output = model(img)
            predict = torch.argmax(output, dim=-1).tolist()
            predict_result += predict
            predict_name += name
        
    with open(file_name, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['id', 'label'])
        for id, r in zip(predict_name, predict_result):
            writer.writerow([id, r])

    
    # from google.colab import files
    # files.download("predict.csv")  

test(test_loader, model)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>