In [None]:
# some basic things to include
import torch
import torch.nn as nn
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import numpy as np
import pathlib

from PIL import Image
import matplotlib.pyplot as plt

# some preprocessing techniques
from scipy.fftpack import dct

import time
from datetime import datetime
# this is only required for Google Colab
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
'''
Usage:
as image preprocessor by calling DCT() as a transform

input: 128*128*3 PIL image
output: 64*256 torch array (histogram)

[128*128] => (crop) => [256 * [8*8]] => (DCT_2d) => [256 * [8 * 8]] => reshape => [256 * 64]  
'''
class DCT(object):
    def __init__(self):
        self.BLOCK_HEIGHT = 8
        self.BLOCK_WIDTH = 8
        self.BLOCK_SIZE = (self.BLOCK_HEIGHT, self.BLOCK_WIDTH)

    def div_block(self, img, block_size):
        img_height = img.height
        img_width = img.width
        block_height = block_size[0]
        block_width = block_size[1]
        assert(img_height % block_height == 0)
        assert(img_width % block_width == 0)

        blocks = []
        for i in range(0,img_height,block_height):
            for j in range(0,img_width,block_width):
                box = (j, i, j+block_width, i+block_height)
                block = np.array(img.crop(box))
                blocks.append(block)
        return np.array(blocks)

    def dct2(self, array_2d):
        return dct(dct(array_2d.T, norm = 'ortho').T, norm = 'ortho')

    def _dct2(self, array_2d):
        return dct(dct(array_2d, norm = 'ortho').T, norm = 'ortho').T

    def __call__(self, img):
        image = img
        blocks = self.div_block(image, self.BLOCK_SIZE)
        b_blocks, g_blocks, r_blocks = blocks[:, :, :, 0], blocks[:, :, :, 1], blocks[:, :, :, 2]
        test_blocks = (b_blocks + g_blocks + r_blocks) / 3 # naive greyscale
        result = np.array([self._dct2(test_block) for test_block in test_blocks])
        # return a torch.tensor
        return torch.from_numpy(result.reshape(256, 64).T).float()

    def __repr__(self):
        return "Simply DCT. What do you expect?"

'''
Usage: Same as DCT()

input: 64*256 torch array (histogram)
output: 64*256 torch array (frequency histogram)
'''
class DFT(object):
    def __init__(self):
        pass

    def __call__(self, freq):
        # convert into complex form containing real and imaginary part
        cmplx = torch.from_numpy(np.zeros((freq.shape[0], freq.shape[1], 2)))
        cmplx[:, :, 0] += freq
        out = torch.fft(cmplx, 1)[:, :, 0]
        return out

    def __repr__(self):
        return "Simply DFT. What do you expect?"

class Ycbcr_convert():
    def __init__(self):
        pass

    def __call__(self, img):
        return img.convert('YCbCr')

    def __repr__(self):
        return "Convert a PIL Image from RGB to YCbCr"

In [None]:
def image_show(np_image):
    plt.figure(figsize = (5,5))
    plt.imshow(np_image) # it should be a numpy array
    plt.show()

# mainfolder = 'drive/My Drive/COMP5331 Fall 2020/MM17-WeiboRumorSet/'

#mainfolder = 'MM17-WeiboRumorSet/'
train_path='/content/drive/MyDrive/colab_data/train/'
test_path='/content/drive/MyDrive/colab_data/test/'

image_height = 128
image_width  = 128
mytransform = transforms.Compose([
    transforms.Resize((image_height,image_width), interpolation=Image.BICUBIC),
    Ycbcr_convert(),
    DCT(),
    # DFT()
])

#dataset = datasets.ImageFolder(mainfolder, transform=mytransform) 
trainset = datasets.ImageFolder(train_path, transform=mytransform)
testset = datasets.ImageFolder(test_path, transform=mytransform)
# NOTE: The path should point to a place with subfolders (which contain images inside).
#  It will report bugs if there is no subfolder.
# print(type(dataset))
#print('Total no. of images: ', len(trainset))
print('Total no. of images: ', len(testset))

#print(round(len(dataset)*0.5))

# 50% train, 50% test
#trainset, testset = torch.utils.data.random_split(dataset, [round(len(dataset)*0.8), round(len(dataset)*0.2)])
print('Total no. of train set images: ', len(trainset))
print('Total no. of test set images: ', len(testset))

#labels = dataset.class_to_idx # the dataset saves the subfolder's name as the labels
# print(labels) 
# print(type(labels))

#classes = list(labels.keys()) # convert dict keys into list
root=pathlib.Path(train_path)
classes=sorted([j.name.split('/')[-1] for j in root.iterdir()])
print('classes:', classes)

  "Argument interpolation should be of type InterpolationMode instead of int. "


Total no. of images:  3662
Total no. of train set images:  14459
Total no. of test set images:  3662
classes: ['Boredom', 'Confusion', 'Engagement', 'Frustration']


In [None]:
print(type(testset))

<class 'torchvision.datasets.folder.ImageFolder'>


In [None]:
batch_size = 32

trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True) 
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=True)

In [None]:
print(type(testloader))

<class 'torch.utils.data.dataloader.DataLoader'>


In [None]:
class Frequent_Domain_Subnetwork(nn.Module):
    def __init__(self):
        super(Frequent_Domain_Subnetwork, self).__init__()
        self.backbone = nn.Sequential(nn.Conv1d(64, 32, 3, padding=1),
                             nn.BatchNorm1d(32),
                             nn.ReLU(),
                             nn.MaxPool1d(2),
                             nn.Conv1d(32, 64, 3, padding=1),
                             nn.BatchNorm1d(64),
                             nn.ReLU(),
                             nn.MaxPool1d(2),
                             nn.Conv1d(64, 128, 3, padding=1),
                             nn.BatchNorm1d(128),
                             nn.ReLU(),
                             nn.MaxPool1d(2),
                             nn.Flatten(),
                             nn.Linear(4096, 64),
                             nn.ReLU(),
                             nn.Linear(64, 64))
        #self.Wc = nn.Linear(64, 2)

    def forward(self, x):
        out = self.backbone.forward(x)
        return out

    #def forward(self, x_pixel, x_freq):

        #L0 = self.freq_subnet(x_freq)  # Bx64
        #out = self.Wc(L0) # final output: Bx2
        #return out

In [None]:
MAX_EPOCH = 5
print_every = 20

device = 'cpu'
model = Frequent_Domain_Subnetwork().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0002)


# Hyper-parameters
MAX_EPOCH = 10
learning_rate = 0.0001 # adopt a small lr to ensure convergence
batch_size = 32
resumetraining = False

print_every = 20
test_n_savemodel_every_epoch = 2
#device = 'cuda'
seed_no = 0
stop_at_loss = 0.1    #before it was 0.1. But it is not very stable.
#================================
modelname = 'MVNN_wout_freq'    # Which model to use?
#================================

In [None]:
from sklearn.metrics import accuracy_score # normal accuracy
from sklearn.metrics import balanced_accuracy_score # used in case of imbalanced data sets, average of recall, from 0 to 1
from sklearn.metrics import confusion_matrix # division of performance on the multilabels
from sklearn.metrics import cohen_kappa_score # compares model against random prediction, from -1 to 1
from sklearn.metrics import classification_report # for multilabel classification, gives precision, recall, f score, support, more

def print_metrics(y_true, y_pred, target_names):
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("Balanced Accuracy:" , balanced_accuracy_score(y_true, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_true, y_pred))
    print("Cohen Kappa Score:", cohen_kappa_score(y_true, y_pred))
    print("Classification Report:\n", classification_report(y_true, y_pred, target_names=target_names))

#  function to view one image
def image_show(np_image):
  plt.figure(figsize = (5,5))
  plt.imshow(np_image) # it should be a numpy array
  plt.show()

In [None]:
for epoch in range(MAX_EPOCH):
    total_loss, total_acc = 0, 0
    cnt = 0
    for i, data in enumerate(trainloader):
        X, y = data[0].float().to(device), data[1].to(device)
        optimizer.zero_grad() 
       
        # forward
        out = model(X)        
        #print(out, y_pred)
        loss = criterion(out, y)

        # backward
        loss.backward()
        optimizer.step()

        # stats
        y_pred = torch.argmax(out, dim=1)
        total_acc += (y_pred == y).sum().item() / len(y_pred)
        total_loss += loss.item()

        cnt += 1
        if i % print_every == 0:
            avg_loss = total_loss / cnt
            avg_acc = total_acc / cnt
            total_acc, total_loss = 0, 0
            cnt = 0
            # print(out.T, '\n', y_pred.T, '\n', y.T)
            print('[Epoch %d Iter %d] Loss: %5f  Acc: %5f' % (epoch+1, i+1, avg_loss, avg_acc))
    # Validating
    '''if (epoch+1) % test_n_savemodel_every_epoch == 0:
      report_every = 10
      test_loss = 0
      cnt = 0
      model.eval()
      y_true = []
      y_pred = []
      with torch.no_grad():
          print('\n===== Start Validating ... =====')
          for data in testloader:
              Xp, Xf = data[0].float().to(device), data[1].float().to(device)
              y = data[0].to(device)

              # prediction
              out = model(Xp, Xf)        
              pred = torch.argmax(out, dim=1)

              loss = criterion(out, y)
              test_loss += loss.item()

              y_true.append(y)
              y_pred.append(pred)

              cnt += 1

              if cnt % report_every == 0:
                  print("[Test] [Epoch %d]  %d / %d batches tested" % (epoch+1, cnt, testloader.__len__()))        

          test_loss = test_loss/cnt
          print("[Test] [Epoch %d] %d / %d batches tested. Test Loss: %5f" % (epoch+1, cnt, testloader.__len__(), test_loss))
      model.train() # Toggle on the training mode to enable back the dropout/batchnorm layers for training
      
      # Print classification report
      y_true = torch.cat(y_true, dim=0)
      y_pred = torch.cat(y_pred, dim=0)
      target_names = ['Boredom', 'Confusion', 'Engagement', 'Frustration']
      print_metrics(y_true.cpu(), y_pred.cpu(), target_names)
      print('')

      # Save model checkpoint
      PATH = ('drive/My Drive/Colab Notebooks/trained_models/' + modelname + "_MAXepoch" + str(MAX_EPOCH)
              + "_batch" + str(batch_size) + "_lr" + str(learning_rate) + '_seed' + str(seed_no) + ".pth")
      print(PATH)
      torch.save({
            'epoch': epoch+1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': criterion,
            'epoch_acc': epoch_acc,
            'epoch_loss': epoch_loss,
            }, PATH)
      print('!!! The trained model is saved !!!') # Make sure you have enough space on google drive

    # Early stopper
    #if running_loss < stop_at_loss:
      #print('Training is stopped at [Epoch %d] as loss is already very low (%5f)!' %(epoch+1, running_loss))
      #break

print('\n===== Finished Training & Validating =====')
'''
# save the model somewhere
torch.save(model.state_dict(), "model")

[Epoch 1 Iter 1] Loss: 4.126261  Acc: 0.000000
[Epoch 1 Iter 21] Loss: 1.753894  Acc: 0.496875
[Epoch 1 Iter 41] Loss: 1.018311  Acc: 0.637500
[Epoch 1 Iter 61] Loss: 0.976918  Acc: 0.635938
[Epoch 1 Iter 81] Loss: 0.907277  Acc: 0.648438
[Epoch 1 Iter 101] Loss: 0.944477  Acc: 0.657813
[Epoch 1 Iter 121] Loss: 0.901147  Acc: 0.675000
[Epoch 1 Iter 141] Loss: 0.935786  Acc: 0.639062
[Epoch 1 Iter 161] Loss: 0.924929  Acc: 0.681250
[Epoch 1 Iter 181] Loss: 0.841995  Acc: 0.671875
[Epoch 1 Iter 201] Loss: 0.919518  Acc: 0.656250
[Epoch 1 Iter 221] Loss: 0.810133  Acc: 0.693750
[Epoch 1 Iter 241] Loss: 0.831985  Acc: 0.685937
[Epoch 1 Iter 261] Loss: 0.817299  Acc: 0.684375
[Epoch 1 Iter 281] Loss: 0.847840  Acc: 0.657813
[Epoch 1 Iter 301] Loss: 0.826316  Acc: 0.709375
[Epoch 1 Iter 321] Loss: 0.795262  Acc: 0.715625
[Epoch 1 Iter 341] Loss: 0.784903  Acc: 0.703125
[Epoch 1 Iter 361] Loss: 0.800849  Acc: 0.693750
[Epoch 1 Iter 381] Loss: 0.799522  Acc: 0.695312
[Epoch 1 Iter 401] Loss: 0

In [None]:
model.load_state_dict(torch.load("model"))

<All keys matched successfully>

In [None]:
from sklearn.metrics import accuracy_score # normal accuracy
from sklearn.metrics import balanced_accuracy_score # used in case of imbalanced data sets, average of recall, from 0 to 1
from sklearn.metrics import confusion_matrix # division of performance on the multilabels
from sklearn.metrics import cohen_kappa_score # compares model against random prediction, from -1 to 1
from sklearn.metrics import classification_report # for multilabel classification, gives precision, recall, f score, support, more
target_names = ['Bored', 'Confused','Engaged','frustrated']

def print_metrics(y_true, y_pred):
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("Balanced Accuracy:" , balanced_accuracy_score(y_true, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_true, y_pred))
    print("Cohen Kappa Score:", cohen_kappa_score(y_true, y_pred))
    print("Classification Report:\n", classification_report(y_true, y_pred, target_names=target_names))

In [None]:
print(testloader)

<torch.utils.data.dataloader.DataLoader object at 0x7fd815563510>


In [None]:
report_every = 10
acc, loss = 0, 0
cnt = 0
model.eval()
y_true = []
y_pred = []
with torch.no_grad():
    for data in testloader:
        X, y = data[0].float().to(device), data[1].to(device)    
        # prediction
        out = model(X)        
        pred = torch.argmax(out, dim=1)

        y_true.append(y)
        y_pred.append(pred)

        cnt += 1

        if cnt % report_every == 0:
            print("[Test] %d / %d batches tested" % (cnt, testloader.__len__()))        

    print("[Test] %d / %d batches tested" % (cnt, testloader.__len__()))
    y_true = torch.cat(y_true, dim=0)
    y_pred = torch.cat(y_pred, dim=0)
    print_metrics(y_true, y_pred)

[Test] 10 / 115 batches tested
[Test] 20 / 115 batches tested
[Test] 30 / 115 batches tested
[Test] 40 / 115 batches tested
[Test] 50 / 115 batches tested
[Test] 60 / 115 batches tested
[Test] 70 / 115 batches tested
[Test] 80 / 115 batches tested
[Test] 90 / 115 batches tested
[Test] 100 / 115 batches tested
[Test] 110 / 115 batches tested
[Test] 115 / 115 batches tested
Accuracy: 0.6947023484434736
Balanced Accuracy: 0.6117553721544172
Confusion Matrix:
 [[ 776  205  345   43]
 [  89  316   62   21]
 [  64  110 1364   21]
 [  56   67   35   88]]
Cohen Kappa Score: 0.5380688100012603
Classification Report:
               precision    recall  f1-score   support

       Bored       0.79      0.57      0.66      1369
    Confused       0.45      0.65      0.53       488
     Engaged       0.76      0.87      0.81      1559
  frustrated       0.51      0.36      0.42       246

    accuracy                           0.69      3662
   macro avg       0.63      0.61      0.61      3662
weig

In [None]:
torch.save(model.state_dict(), "MyModel")