# Imports

In [None]:
import torch
import torch.nn.functional as F
from torchvision.datasets import CIFAR10
from torchvision import transforms
from tensorflow.keras.utils import img_to_array
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix
from torch.utils.data import DataLoader
# from sklearn.model_selection import train_test_split

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


# Setting and Dataset

In [None]:
data_path = './'
cifar10 =  CIFAR10(data_path, train=True, download=True, transform = transforms.Compose([transforms.Resize(224), transforms.ToTensor(), transforms.Normalize((125.30691805, 122.95039414, 113.86538318),(62.99321928, 62.08870764, 66.70489964))]))
# cifar10_test = CIFAR10(data_path, train=False, download=True, transform = transforms.Compose([transforms.Resize(224), transforms.ToTensor(), transforms.Normalize((125.30691805, 122.95039414, 113.86538318),(62.99321928, 62.08870764, 66.70489964))]))

Files already downloaded and verified


In [None]:
train_set, valset, _ = torch.utils.data.random_split(cifar10, [2700, 300, 47000])

In [None]:
idx = np.array([i for i in range(len(cifar10)) if cifar10[i][1]<2])
idxTest = np.array([i for i in range(len(cifar10_test)) if cifar10_test[i][1]<2])

# Reduce Datapoints from 10k+2k  to 
# Data Train 90% and 10%
idx = idx[0:2700]
idxTest = idxTest[0:300]


In [None]:
cifar10_trainX = np.array([cifar10[i][0] for i in idx])
cifar10_trainY = np.array([cifar10[i][1] for i in idx])

cifar10_testX = np.array([cifar10_test[i][0] for i in idxTest])
cifar10_testY = np.array([cifar10_test[i][1] for i in idxTest])

In [None]:
cifar10_trainX.shape
# cifar10_trainY.shape

(2700, 3, 224, 224)

In [None]:
# Hyperparameters
random_seed = 1
learning_rate = 0.05
num_epochs = 10
batch_size = 128

# Architecture
num_classes = 1



# Models

## VGG16

Paper: 
- “Conv (receptive field size)-(number of channels)”.
- The convolution stride is fixed to 1 
- The padding is 1 pixel for 3 × 3 conv
- Max-pooling is performed over a 2 × 2 pixel window, with stride 2.
- Not all Conv layer are followed by Max-pooling

<img src="VGG_16.png" width=700px>

In C configuration, 1x1 conv is a way to increase the non-linearity of the decision function without affecting the receptive fields of the conv.

In [None]:
#@title VGG16

class VGG16(torch.nn.Module):
    def __init__(self, num_classes=1) -> None:
        super(VGG16, self).__init__()
        #Architecture  
        self.block_1 =torch.nn.Sequential(
            #Two conv 
            torch.nn.Conv2d(in_channels=3, 
                            out_channels=64, 
                            kernel_size=(3, 3), 
                            stride=(1, 1), 
                            padding=1
                        ),
            torch.nn.ReLU(),
            torch.nn.Conv2d(in_channels=64, 
                            out_channels=64, 
                            kernel_size=(3, 3), 
                            stride=(1, 1), 
                            padding=1
                        ),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=(2, 2), stride=2)
        )
        self.block_2 =torch.nn.Sequential(
            #Two conv 
            torch.nn.Conv2d(in_channels=64, 
                            out_channels=128, 
                            kernel_size=(3, 3), 
                            stride=(1, 1), 
                            padding=1
                        ),
            torch.nn.ReLU(),
            torch.nn.Conv2d(in_channels=128, 
                            out_channels=128, 
                            kernel_size=(3, 3), 
                            stride=(1, 1), 
                            padding=1
                        ),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=(2, 2), stride=2)
        )
        self.block_3 =torch.nn.Sequential(
            #Three conv
            torch.nn.Conv2d(in_channels=128,
                            out_channels=256, 
                            kernel_size=(3, 3), stride=(1, 1), 
                            padding=1
             ),
            torch.nn.ReLU(),
            torch.nn.Conv2d(in_channels=256,
                            out_channels=256,
                            kernel_size=(3, 3),
                            stride=(1, 1), 
                            padding=1
            ),
            torch.nn.ReLU(),
            torch.nn.Conv2d(in_channels=256, 
                            out_channels=256, 
                            kernel_size=(3, 3), 
                            stride=(1, 1), 
                            padding=1
                        ),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=(2, 2), stride=2)
        )
        
        self.block_4 =torch.nn.Sequential(
            #Three conv
            torch.nn.Conv2d(in_channels=256, 
                            out_channels=512, 
                            kernel_size=(3, 3), 
                            stride=(1, 1), 
                            padding=1
                        ),
            torch.nn.ReLU(),
            torch.nn.Conv2d(in_channels=512, 
                            out_channels=512, 
                            kernel_size=(3, 3), 
                            stride=(1, 1), 
                            padding=1
                        ),
            torch.nn.ReLU(),
            torch.nn.Conv2d(in_channels=512, 
                            out_channels=512, 
                            kernel_size=(3, 3), 
                            stride=(1, 1), 
                            padding=1
                        ),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=(2, 2), stride=2)
        )
        self.block_5 =torch.nn.Sequential(
            #Three conv
            torch.nn.Conv2d(in_channels=512, 
                            out_channels=512, 
                            kernel_size=(3, 3), 
                            stride=(1, 1), 
                            padding=1
                        ),
            torch.nn.ReLU(),
            torch.nn.Conv2d(in_channels=512, 
                            out_channels=512, 
                            kernel_size=(3, 3),
                            stride=(1, 1), 
                            padding=1
                        ),
            torch.nn.ReLU(),
            torch.nn.Conv2d(in_channels=512, 
                            out_channels=512, 
                            kernel_size=(3, 3), 
                            stride=(1, 1), 
                            padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=(2, 2), stride=2)
        )

        # #VGG_16 classifier on 1000 class
        # self.classifier = torch.nn.Sequential(
        #     torch.nn.Linear(in_features=512, out_features=4096),
        #     torch.nn.ReLU(),
        #     torch.nn.Linear(in_features=4096, out_features=4096),
        #     torch.nn.ReLU(),
        #     torch.nn.Linear(in_features=4096, out_features=1000),
        # )

        self.classifier = torch.nn.Linear(in_features=7*7*512, out_features=1)

    def forward(self, x):
        x = self.block_1(x)
        x = self.block_2(x)
        x = self.block_3(x)
        x = self.block_4(x)
        x = self.block_5(x)
        logits = self.classifier(x.view(x.size(0), -1))
        probs = F.sigmoid(logits)

        return logits, probs.reshape(-1)

## AlexNet

AlexNet has introduced: 
- Relu as an activation function.
- Local Response Normalization.
- Overlapping Max-pooling (stride < kernal size).

<img src="AlexNet.png" width=800px>

In [None]:
#@title alexnet
class AlexNet(torch.nn.Module):
    def __init__(self, num_classes=1) -> None:
        super().__init__()
        #Arch
        self.conv = torch.nn.Sequential(
            #Input image (deepFake)--> 224*224*3 
            #Output --> (224+2*2-1)/4 +1 = 55
            torch.nn.Conv2d(in_channels=3, 
                            out_channels=96, 
                            kernel_size=(11, 11),
                            stride=(4, 4), 
                            padding=2
                        ), 
            torch.nn.ReLU(),
            torch.nn.LocalResponseNorm(2), #Local Responise Normalization 
            torch.nn.MaxPool2d(kernel_size=(3, 3), stride=2), #Overlapping pooling #27*27*96

            #27+2*2 -5 +1 = 27 
            torch.nn.Conv2d(in_channels=96, 
                            out_channels=256, 
                            kernel_size=(5, 5), 
                            stride=1, 
                            padding=2), #27*27*256
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=(3,3), stride=2), #13*13*256

            #13+2*1 -3 +1 = 13
            torch.nn.Conv2d(in_channels=256, 
                            out_channels=384, 
                            kernel_size=(3, 3), 
                            stride=1, 
                            padding=1 ), #13*13*384
            torch.nn.ReLU(),
            
            torch.nn.Conv2d(in_channels=384, 
                            out_channels=384, 
                            kernel_size=(3, 3), 
                            stride=1, 
                            padding=1 ), #13*13*384
            torch.nn.ReLU(),

            torch.nn.Conv2d(in_channels=384, 
                            out_channels=256, 
                            kernel_size=(3, 3), 
                            stride=1, 
                            padding=1 ), #13*13*384
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=(3,3), stride=2), #6*6*256
            
        )

        # self.classifier = torch.nn.Sequential(
        #     torch.nn.Linear(in_features=6*6*256, out_features=4096),
        #     torch.nn.ReLU(),
        #     torch.nn.Linear(in_features=4096, out_features=4096),
        #     torch.nn.ReLU(),
        #     torch.nn.Linear(in_features=4096, out_features=num_classes),
        # )
        
        self.classifier = torch.nn.Sequential(
            torch.nn.Linear(in_features=6*6*256, out_features=100),
            torch.nn.ReLU(),
            torch.nn.Linear(in_features=100, out_features=num_classes),
        )

    def forward(self, x):
        x = self.conv(x)
        logits = self.classifier(x.view(x.size(0), -1))
        probs = F.sigmoid(logits) #Sigmoid as it's binary 

        return logits, probs.reshape(-1)


In [None]:
def fit(train_set, model, lr):
  batchSize = 14
  loader = DataLoader(train_set, batch_size=batchSize, pin_memory=True)
  adam = torch.optim.AdamW(model.parameters(), lr)

  for e in range(10): # Epochs
    for batch_ndx, sample in enumerate(loader):
        # sample[0] = sample[0].permute(0, 2,  3,  1)
        # print(sample[0].shape)

        for i in range(len(sample[1])):
            if sample[1][i] != 0:
                sample[1][i] = 1

        adam.zero_grad()
        logits, probs = model(sample[0].cuda())
        # print(probs.shape)
        loss = F.binary_cross_entropy(probs, sample[1].cuda().float())
        loss.backward()
        adam.step()

        
    print("Epoch: ", e)        
    print("Loss: ",loss.item())

  # tn, fp, fn, tp = confusion_matrix(TrainY, preds).ravel()
  # return confusion_matrix(TrainY, preds).ravel()

def scores(model):
    val_loader = DataLoader(valset, batch_size= 256)

    total_preds = []
    total_target = []
    for batch_ndx, sample in enumerate(val_loader):
          # sample[0] = sample[0].permute(0, 2,  3,  1)
          # print(sample[0].shape)

          for i in range(len(sample[1])):
              if sample[1][i] != 0:
                  sample[1][i] = 1

          logits, probs = model(sample[0].cuda())
          # print(probs.shape)
          total_preds.extend(probs.cpu().detach().numpy().tolist())
          total_target.extend(sample[1].cpu().detach().numpy().tolist())

    total_preds=[0 if i < 0.5 else 1 for i in total_preds]

    tn, fp, fn, tp = confusion_matrix(total_target, total_preds).ravel()
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    acc = (tp + tn) / (tp + tn + fp + fn)
    f1_score = 2 * ((precision * recall) / (precision + recall))

    print("Accuracy: ", acc)
    print("Precision: ", precision)
    print("Recall: ", recall)
    print("F1 Score: ", f1_score)


In [None]:
model1 = AlexNet()
fit(train_set, model1.cuda(), 0.001)

Epoch:  0
Loss:  0.32861417531967163
Epoch:  1
Loss:  0.3092641234397888
Epoch:  2
Loss:  0.2881191372871399
Epoch:  3
Loss:  0.2874259948730469
Epoch:  4
Loss:  0.28745657205581665
Epoch:  5
Loss:  0.2874767780303955
Epoch:  6
Loss:  0.2875770628452301
Epoch:  7
Loss:  0.28751140832901
Epoch:  8
Loss:  0.2875679135322571
Epoch:  9
Loss:  0.2875572443008423
Epoch:  10
Loss:  0.2875671088695526
Epoch:  11
Loss:  0.28759217262268066
Epoch:  12
Loss:  0.28761744499206543
Epoch:  13
Loss:  0.2887268662452698
Epoch:  14
Loss:  0.28766483068466187
Epoch:  15
Loss:  0.28764867782592773
Epoch:  16
Loss:  0.28769275546073914
Epoch:  17
Loss:  0.28775009512901306
Epoch:  18
Loss:  0.2877691090106964
Epoch:  19
Loss:  0.28780364990234375
Epoch:  20
Loss:  0.2878483533859253
Epoch:  21
Loss:  0.28784775733947754
Epoch:  22
Loss:  0.2879144549369812
Epoch:  23
Loss:  0.2879222631454468
Epoch:  24
Loss:  0.28798556327819824
Epoch:  25
Loss:  0.28798842430114746
Epoch:  26
Loss:  0.288066029548645
Ep

In [None]:
scores(model1)



Accuracy:  0.9133333333333333
Precision:  0.9133333333333333
Recall:  1.0
F1 Score:  0.9547038327526133


In [None]:
model2 = VGG16()
fit(train_set, model2.cuda(), 0.0001)

Epoch:  0
Loss:  0.3192116618156433
Epoch:  1
Loss:  0.29941749572753906
Epoch:  2
Loss:  0.2952144145965576
Epoch:  3
Loss:  0.29231566190719604
Epoch:  4
Loss:  0.29005175828933716
Epoch:  5
Loss:  0.2889229655265808
Epoch:  6
Loss:  0.2885769009590149
Epoch:  7
Loss:  0.28842586278915405
Epoch:  8
Loss:  0.2885691821575165
Epoch:  9
Loss:  0.2881740927696228
