# CS4487 - Group Project - Group 22

Name List:  
CHENG Ho Man 56208961, CHENG Hong Wai 56216309, CHONG Chun Yu 56225263

In [None]:
# library installation
# pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu117
# pip install matplotlib

In [None]:
# import standard PyTorch modules
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data
from torch.utils.data import DataLoader
from torch.utils.data import Dataset

# import torchvision module to handle image manipulation
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torchvision.transforms import ToTensor

import matplotlib.pyplot as plt

In here, we will input dataset and perform data augmentation. For example, fliping, rotation, color enhancement.
Then, all transformed data will be combined into one dataset which will have a size of 12000*9 = 108000 photos.

The structure of the test data should be the same as the sample dataset provided by TA Team. There should be two folders storing original and manipulated data separately.


    |--- data

        |--- test

            |--- original

                |--- 0001.png

                |--- ...

            |--- manipulated

                |--- DF_0001.png

                |--- ...

        |--- train

            |--- original

                |--- 0.png

                |--- ...

            |--- manipulated

                |--- DF_0.png

                |--- ... 


In [None]:
# normalize 
normalize = transforms.Normalize(mean=[0.4914, 0.4822, 0.4465],std=[0.2023, 0.1994, 0.2010]) 

# original data
data_transform = transforms.Compose([transforms.ToTensor(),normalize])
# data augmentation
data_transform2 = transforms.Compose([transforms.ToTensor(),transforms.RandomHorizontalFlip(100),normalize])
data_transform3 = transforms.Compose([transforms.ToTensor(),transforms.ColorJitter(contrast=1),normalize])
data_transform4 = transforms.Compose([transforms.ToTensor(),transforms.RandomHorizontalFlip(100),transforms.ColorJitter(contrast=0.7),normalize])
data_transform5 = transforms.Compose([transforms.ToTensor(),transforms.RandomRotation(degrees=(0, 25)),normalize])
data_transform6 = transforms.Compose([transforms.ToTensor(),transforms.RandomRotation(degrees=(335, 360)),normalize])
data_transform7 = transforms.Compose([transforms.ToTensor(),transforms.RandomRotation(degrees=(315, 360)),transforms.RandomHorizontalFlip(100),transforms.ColorJitter(contrast=0.7),normalize])
data_transform8 = transforms.Compose([transforms.ToTensor(),transforms.RandomRotation(degrees=(0, 45)),transforms.RandomHorizontalFlip(100),transforms.ColorJitter(contrast=0.7),normalize])
data_transform9 = transforms.Compose([transforms.ToTensor(),transforms.RandomPerspective(distortion_scale=0.5, p=1.0),normalize])
 
    
# import dataset
train_dir = './data/train'
train_datafull = datasets.ImageFolder(root=train_dir, 
                                  transform=data_transform, 
                                  target_transform=None)

train_data2 = datasets.ImageFolder(root=train_dir, 
                                  transform=data_transform2, 
                                  target_transform=None)

train_data3 = datasets.ImageFolder(root=train_dir, 
                                  transform=data_transform3, 
                                  target_transform=None)
train_data4 = datasets.ImageFolder(root=train_dir, 
                                  transform=data_transform4, 
                                  target_transform=None)

train_data5 = datasets.ImageFolder(root=train_dir, 
                                  transform=data_transform5, 
                                  target_transform=None)

train_data6 = datasets.ImageFolder(root=train_dir, 
                                  transform=data_transform6, 
                                  target_transform=None)


train_data7 = datasets.ImageFolder(root=train_dir, 
                                  transform=data_transform7, 
                                  target_transform=None)


train_data8 = datasets.ImageFolder(root=train_dir, 
                                  transform=data_transform8, 
                                  target_transform=None)

train_data9 = datasets.ImageFolder(root=train_dir, 
                                  transform=data_transform9, 
                                  target_transform=None)

conbine = train_datafull,train_data2,train_data3,train_data4,train_data5,train_data6,train_data7,train_data8,train_data9
train_data = torch.utils.data.ConcatDataset(conbine)

print(len(train_data))

Here are some of the modified training data.

In [None]:
plt.imshow(train_data3[1][0].permute(1, 2, 0))

In [None]:
plt.imshow(train_data9[19][0].permute(1, 2, 0))

In [None]:
plt.imshow(train_data6[1][0].permute(1, 2, 0))

## Model

In our studies, we found that our CNN model has a higher accuracy than pre-trained online EfficientNet, Resnet model. Therefore, we choose CNN as the model in our project.

We used CNN to implement the model. We testd the model with 2,3,4 conv layers, 2,3 pool layers. We tried different layers of CNN and finally come out with a most efficient CNN model. There are 4 conv layers, 4 pooling layers and 3 linear layers.

In [None]:
# CNN
class Network(nn.Module):
    def __init__(self):
        super().__init__()

        self.conv1 = nn.Conv2d(3,6,4)            # 3@299*299 -> 6@296*296
        self.pool1 = nn.MaxPool2d(kernel_size=2) # 6@296*296 -> 6@148*148

        self.conv2 = nn.Conv2d(6,12,5)           # 6@148*148 -> 12@144*144    
        self.pool2 = nn.MaxPool2d(kernel_size=2) # 12@144*144 -> 12@72*72
        
        self.conv3 = nn.Conv2d(12,24,5)          # 12@72*72 -> 24@68*68
        self.pool3 = nn.MaxPool2d(kernel_size=2) # 24@68*68 -> 24@34*34
        
        self.conv4 = nn.Conv2d(24,48,5)          # 24@34*34 -> 48@30*30
        self.pool4 = nn.MaxPool2d(kernel_size=2) # 48@30*30 -> 48@15*15

        self.fc1 = nn.Linear(48*15*15,48*15)
        self.fc2 = nn.Linear(48*15,48)
        self.fc3 = nn.Linear(48,2)
        
    def forward(self, t):
        t = self.pool1(F.relu(self.conv1(t)))
        t = self.pool2(F.relu(self.conv2(t)))
        t = self.pool3(F.relu(self.conv3(t)))
        t = self.pool4(F.relu(self.conv4(t)))
        
        t = torch.flatten(t,1)
        
        t = F.relu(self.fc1(t))
        t = F.relu(self.fc2(t))
        t = self.fc3(t)
        return t

### Reminder

We use cuda to run the model instead of cpu. Therefore, there may be error if the use of devices is different during TA testing.

In [None]:
# use GPU
device = torch.device("cuda" if torch.cuda.is_available() else 'cpu')
torch.cuda.empty_cache()

## Training part

Here are the most effective hyper-parameters we have found.

In [None]:
# hyper-parameters
learning_rate = 0.001
batch_size = 35
num_epochs = 20

In [None]:
model = Network()
model = model.cuda() # run in GPU
train_loader = DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True)

# loss and optimizer
# criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# train the model
for epoch in range(num_epochs):
    model.train() 
    correct_train = 0
    total_train = 0
    count = 0
    for img, labels in train_loader:
        optimizer.zero_grad()
        labels = labels.to(device)
        img = img.to(device)
        preds = model(img)
        
        loss = F.cross_entropy(preds,labels)
        loss.backward()
        optimizer.step()
        
        predicted_train = torch.max(preds.data, 1)[1]
        total_train += labels.size(0)
        correct_train += (predicted_train == labels).float().sum()

    print ('Train Epoch [{}/{}], Loss: {:.3f}'.format(epoch+1, num_epochs, loss.item()))
    print ('Training accuracy: {} %'.format(100 * correct_train / total_train))
    
# save the model 
torch.save(model.state_dict(), 'group22_cnn.ckpt')

This CNN model has the highest training accuracy among all CNN models we have implemented.

## Testing part

In [None]:
# testing for TA
test_dir = "./data/test" # insert data path here

# transforms 
normalize = transforms.Normalize(mean=[0.4914, 0.4822, 0.4465],std=[0.2023, 0.1994, 0.2010]) 
data_transform = transforms.Compose([transforms.ToTensor(),normalize])

test_data = datasets.ImageFolder(root=test_dir, 
                                  transform=data_transform, 
                                  target_transform=None)
test_loader = DataLoader(dataset=test_data, batch_size=100, shuffle=True)

model_test = Network().to(device)
model_test.load_state_dict(torch.load('group22_cnn.ckpt'))

model_test.eval()
with torch.no_grad():
    correct = 0
    total = 0
    target_num = torch.zeros((1, 2))
    predict_num = torch.zeros((1, 2))
    acc_num = torch.zeros((1, 2))
    for img, labels in test_loader:
        img = img.to(device)
        labels = labels.to(device)
        
        preds = model_test(img)
        predicted = torch.max(preds.data, 1)[1]
    
        pre_mask = torch.zeros(preds.size()).scatter_(1, predicted.cpu().view(-1, 1), 1.)
        predict_num += pre_mask.sum(0)
        tar_mask = torch.zeros(preds.size()).scatter_(1, labels.data.cpu().view(-1, 1), 1.)
        target_num += tar_mask.sum(0)
        acc_mask = pre_mask * tar_mask 
        acc_num += acc_mask.sum(0)

    recall = acc_num / target_num
    precision = acc_num / predict_num
    F1 = 2 * recall * precision / (recall + precision)
    accuracy = 100. * acc_num.sum(1) / target_num.sum(1)

    print('Test Acc {}, recal {}, precision {}, F1-score {}'.format(accuracy, recall, precision, F1))