<a href="https://colab.research.google.com/github/Mrutyunjay01/AML-specialisation/blob/master/Mrutyunjay/notebooks/DCR_Resizing_Factor_Classification_with_LANCZOS_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -q kaggle
!mkdir ~/.kaggle/
!cp kaggle.json ~/.kaggle/kaggle.json
!chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets download -d mrutyunjaybiswal/djpeg-forensics-dataset-in-gray-scale
!unzip -q "/content/djpeg-forensics-dataset-in-gray-scale.zip" -d './dpjeg_forensics'

mkdir: cannot create directory ‘/root/.kaggle/’: File exists
Downloading djpeg-forensics-dataset-in-gray-scale.zip to /content
100% 11.7G/11.7G [04:43<00:00, 30.1MB/s]
100% 11.7G/11.7G [04:43<00:00, 44.5MB/s]
replace ./dpjeg_forensics/test/QF1_50/RF_06/QF2_50/img_0_qf1_50_qf2_50_rf_06.jpeg? [y]es, [n]o, [A]ll, [N]one, [r]ename: A


In [2]:
import os
import gc
import glob
import math
import numpy as np
import pandas as pd
from PIL import Image
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from torch.optim.lr_scheduler import ReduceLROnPlateau

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
train_folder = "/content/dpjeg_forensics/train"
test_folder = "/content/dpjeg_forensics/test"
val_folder = "/content/dpjeg_forensics/val"

all_files_train = [fn for fn in glob.glob(os.path.join(train_folder, "**/*.jpeg"), recursive=True)]
all_files_test = [fn for fn in glob.glob(os.path.join(test_folder, "**/*.jpeg"), recursive=True)]
all_files_val = [fn for fn in glob.glob(os.path.join(val_folder, "**/*.jpeg"), recursive=True)]


all_files = all_files_train + all_files_test + all_files_val
print("Total Number of files: ", len(all_files))

del all_files_train, all_files_test, all_files_val
gc.collect()

Total Number of files:  64399


0

In [4]:
train_files, test_files = train_test_split(all_files, test_size=0.15, shuffle=True)
train_files, val_files = train_test_split(train_files, test_size=0.15, shuffle=True)
print("Number of files in train: ", len(train_files))
print("Number of files in test: ", len(test_files))
print("Number of files in val: ", len(val_files))

Number of files in train:  46528
Number of files in test:  9660
Number of files in val:  8211


In [5]:
IMAGE_SIZE = (256, 256)
BATCH_SIZE = 32
EPOCHS = 30

class_dict = {
    "06": 0,
    "07": 1,
    "08": 2,
    "09": 3,
    "095": 4,
    "105": 5,
    "11": 6,
    "12": 7,
    "13": 8,
    "14": 9
}

In [6]:
class DatasetLoader(Dataset):
    def __init__(self, filenames, target_size=IMAGE_SIZE, batch_size=BATCH_SIZE, shuffle=True, class_dict=class_dict):
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.filenames = filenames
        self.target_size = target_size
        self.class_dict = class_dict
        self.read_label = lambda x: x.split("/")[-1].split("_")[-1].split(".")[0]
        self.map_label = lambda x: self.class_dict[x]
        self.read_image = lambda x: np.array(Image.open(x))
        self.on_epoch_end()
        pass
    
    @staticmethod
    def crop_img(img):
        img = np.expand_dims(img, 2)
        h_, w_, _ = img.shape
        return img[(h_ - 256)//2:(h_+ 256)//2, (w_ - 256)//2:(w_+ 256)//2, :]
        pass
    
    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.filenames)
        pass

    def __getitem__(self, index):
        X = self.filenames[index]
        Y = self.read_label(X)
        
        X = self.read_image(X)
        X = torch.tensor(self.crop_img(X))
        
        return X, self.map_label(Y)
        pass

    def __len__(self):
        return len(self.filenames)
        pass

In [7]:
class DJNet(nn.Module):
    def __init__(self, num_classes):
        super(DJNet, self).__init__()
        self.num_classes = num_classes
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 5, kernel_size=5, stride=1))
            
        self.layer2 = nn.Sequential(
            nn.Conv2d(5, 96, kernel_size=7, stride=2),
            nn.BatchNorm2d(96),
            nn.Tanh(),
            nn.MaxPool2d(kernel_size=3, stride=2)
        )
            
        self.layer3 = nn.Sequential(
            nn.Conv2d(96, 64, kernel_size=5, stride=1),
            nn.BatchNorm2d(64)
        )
        self.layer4 = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=5, stride=1),
            nn.BatchNorm2d(64),
            nn.Tanh(), 
            nn.MaxPool2d(kernel_size=3, stride=2)
        )
            
        self.layer5 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=1, stride=1),
            nn.BatchNorm2d(128),
            nn.Tanh(),
            nn.AvgPool2d(kernel_size=3, stride=2)
        )
        
        self.fc1 = nn.Linear(12 * 12 * 128, 200)
        self.fc2 = nn.Linear(200, 200)
        self.fc3 = nn.Linear(200, num_classes)
        

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc1(out)
        out = self.fc2(out)
        out = self.fc3(out)
        return  out

def ConvConst(filter_weights):
    filter_weights[:,0,2,2] = torch.FloatTensor([0]*5)
    for i in range(filter_weights.shape[0]):
        t = np.array(filter_weights[i, 0, :, :].data.cpu().numpy())
        nom = -1*np.sum(t)
        filter_weights[i, :, :, :] = filter_weights[i, :, :, :]/nom  
    
    filter_weights[:,0,2,2] = torch.FloatTensor([1]*5)
    return filter_weights

In [8]:
model = DJNet(10)
model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.0003)
scheduler = ReduceLROnPlateau(optimizer=optimizer, mode='max', patience=5, verbose=True, factor=0.2)
criterion =  nn.CrossEntropyLoss()

In [12]:
train_data_gen = DatasetLoader(train_files)
test_data_gen = DatasetLoader(test_files)
val_data_gen = DatasetLoader(val_files)

train_set = DataLoader(train_data_gen, batch_size=32, shuffle=True)
test_set = DataLoader(test_data_gen, batch_size=64, shuffle=True)
val_set = DataLoader(val_data_gen, batch_size=64, shuffle=True)

In [None]:
acc_hist = {'train': [],"val": [],"test": []}
loss_hist = {'train': [],"val": [],"test": []}

patience = 5

for epoch in range(EPOCHS):
    epoch_loss, epoch_acc = 0, 0
    model_path = f"./best_model_epoch_{epoch}.pth"
    print("[INFO]Training Model...")
    model.train()
 
    for x, y in tqdm(train_set):
        x = x.permute(0, 3, 1, 2)
        x, y = x.to(device, dtype=torch.float), y.to(device, dtype=torch.long)
        optimizer.zero_grad()
        pred = model(x)
        loss = criterion(pred, y)
        loss.backward()
        optimizer.step()
        
        with torch.no_grad():
            model.layer1[0].weight = ConvConst(model.layer1[0].weight)
            
        pred = torch.log_softmax(pred, dim=1)
        _, pred = torch.max(pred, dim=1)
        
        acc_pred = (pred==y).float()
        acc = accuracy_score(np.array(pred.cpu()), np.array(y.cpu())) * 100
        epoch_loss += loss.item()/len(train_set)
        epoch_acc += acc.item()/len(train_set)
        pass
    
    loss_hist["train"].append(epoch_loss)
    acc_hist["train"].append(epoch_acc)
    
    
    model.eval()
    val_epoch_loss = 0
    val_epoch_acc = 0
    
    test_epoch_loss = 0
    test_epoch_acc = 0

    with torch.no_grad():
        print("[INFO]Validating Model...")
        for x, y in tqdm(val_set):
            x = x.permute(0, 3, 1, 2)
            x, y = x.to(device, dtype=torch.float), y.to(device , dtype=torch.long)
            z_val = model(x) 
            val_loss = criterion(z_val, y)

            y_pred_softmax = torch.log_softmax(z_val, dim = 1)
            _, val_pred = torch.max(y_pred_softmax, dim = 1) 

            correct_pred = (val_pred==y).float()
            val_acc =  accuracy_score(np.array(val_pred.cpu()), np.array(y.cpu())) * 100

            val_epoch_loss += val_loss.item()/len(val_set)
            val_epoch_acc += val_acc.item()/len(val_set)

        loss_hist['val'].append(val_epoch_loss)
        acc_hist['val'].append(val_epoch_acc)
        
        print("[INFO]Testing Model...")
        for x, y in tqdm(test_set):
            x = x.permute(0, 3, 1, 2)
            x, y = x.to(device, dtype=torch.float), y.to(device , dtype=torch.long)
            z_test = model(x) 
            test_loss = criterion(z_test, y)

            y_pred_softmax = torch.log_softmax(z_test, dim = 1)
            _, test_pred = torch.max(y_pred_softmax, dim = 1) 

            correct_pred = (test_pred==y).float()
            test_acc =  accuracy_score(np.array(test_pred.cpu()), np.array(y.cpu())) * 100

            test_epoch_loss += test_loss.item()/len(test_set)
            test_epoch_acc += test_acc.item()/len(test_set)

        loss_hist['test'].append(test_epoch_loss)
        acc_hist['test'].append(test_epoch_acc)
        
        print('Epoch {:03}: acc: {:.3f} | loss: {:.3f} | val_acc: {:.3f} | val_loss: {:.3f} | test_acc: {:.3f} | test_loss: {:.3f}'.format(epoch + 1,
                                                                                                                                          epoch_acc,
                                                                                                                                          epoch_loss,
                                                                                                                                          val_epoch_acc,
                                                                                                                                          val_epoch_loss,
                                                                                                                                          test_epoch_acc,
                                                                                                                                          test_epoch_loss))

        scheduler.step(val_epoch_acc)
        best_val =  0.0
        if val_acc >= best_val:
            best_val = val_acc
            patience = patience
            torch.save(model, model_path)
        else:
            patience -= 1
            if patience == 0:
                print('Early stopping. Best Val roc_auc: {:.3f}'.format(best_val))
                break
    pass

[INFO]Training Model...


HBox(children=(FloatProgress(value=0.0, max=1454.0), HTML(value='')))


[INFO]Validating Model...


HBox(children=(FloatProgress(value=0.0, max=129.0), HTML(value='')))


[INFO]Testing Model...


HBox(children=(FloatProgress(value=0.0, max=151.0), HTML(value='')))


Epoch 001: acc: 36.941 | loss: 1.665 | val_acc: 54.405 | val_loss: 1.182 | test_acc: 53.840 | test_loss: 1.195
[INFO]Training Model...


HBox(children=(FloatProgress(value=0.0, max=1454.0), HTML(value='')))


[INFO]Validating Model...


HBox(children=(FloatProgress(value=0.0, max=129.0), HTML(value='')))


[INFO]Testing Model...


HBox(children=(FloatProgress(value=0.0, max=151.0), HTML(value='')))


Epoch 002: acc: 84.111 | loss: 0.473 | val_acc: 88.662 | val_loss: 0.336 | test_acc: 87.258 | test_loss: 0.356
[INFO]Training Model...


HBox(children=(FloatProgress(value=0.0, max=1454.0), HTML(value='')))


[INFO]Validating Model...


HBox(children=(FloatProgress(value=0.0, max=129.0), HTML(value='')))


[INFO]Testing Model...


HBox(children=(FloatProgress(value=0.0, max=151.0), HTML(value='')))


Epoch 003: acc: 93.911 | loss: 0.194 | val_acc: 96.584 | val_loss: 0.108 | test_acc: 96.594 | test_loss: 0.110
[INFO]Training Model...


HBox(children=(FloatProgress(value=0.0, max=1454.0), HTML(value='')))


[INFO]Validating Model...


HBox(children=(FloatProgress(value=0.0, max=129.0), HTML(value='')))


[INFO]Testing Model...


HBox(children=(FloatProgress(value=0.0, max=151.0), HTML(value='')))


Epoch 004: acc: 95.927 | loss: 0.130 | val_acc: 92.913 | val_loss: 0.214 | test_acc: 92.744 | test_loss: 0.218
[INFO]Training Model...


HBox(children=(FloatProgress(value=0.0, max=1454.0), HTML(value='')))


[INFO]Validating Model...


HBox(children=(FloatProgress(value=0.0, max=129.0), HTML(value='')))


[INFO]Testing Model...


HBox(children=(FloatProgress(value=0.0, max=151.0), HTML(value='')))


Epoch 005: acc: 96.748 | loss: 0.107 | val_acc: 97.323 | val_loss: 0.091 | test_acc: 96.832 | test_loss: 0.096
[INFO]Training Model...


HBox(children=(FloatProgress(value=0.0, max=1454.0), HTML(value='')))


[INFO]Validating Model...


HBox(children=(FloatProgress(value=0.0, max=129.0), HTML(value='')))


[INFO]Testing Model...


HBox(children=(FloatProgress(value=0.0, max=151.0), HTML(value='')))


Epoch 006: acc: 97.707 | loss: 0.077 | val_acc: 95.708 | val_loss: 0.126 | test_acc: 95.910 | test_loss: 0.125
[INFO]Training Model...


HBox(children=(FloatProgress(value=0.0, max=1454.0), HTML(value='')))


[INFO]Validating Model...


HBox(children=(FloatProgress(value=0.0, max=129.0), HTML(value='')))


[INFO]Testing Model...


HBox(children=(FloatProgress(value=0.0, max=151.0), HTML(value='')))


Epoch 007: acc: 98.083 | loss: 0.066 | val_acc: 98.704 | val_loss: 0.036 | test_acc: 98.778 | test_loss: 0.036
[INFO]Training Model...


HBox(children=(FloatProgress(value=0.0, max=1454.0), HTML(value='')))


[INFO]Validating Model...


HBox(children=(FloatProgress(value=0.0, max=129.0), HTML(value='')))


[INFO]Testing Model...


HBox(children=(FloatProgress(value=0.0, max=151.0), HTML(value='')))


Epoch 008: acc: 98.109 | loss: 0.062 | val_acc: 98.450 | val_loss: 0.046 | test_acc: 98.531 | test_loss: 0.045
[INFO]Training Model...


HBox(children=(FloatProgress(value=0.0, max=1454.0), HTML(value='')))


[INFO]Validating Model...


HBox(children=(FloatProgress(value=0.0, max=129.0), HTML(value='')))


[INFO]Testing Model...


HBox(children=(FloatProgress(value=0.0, max=151.0), HTML(value='')))


Epoch 009: acc: 98.352 | loss: 0.056 | val_acc: 97.977 | val_loss: 0.070 | test_acc: 98.146 | test_loss: 0.068
[INFO]Training Model...


HBox(children=(FloatProgress(value=0.0, max=1454.0), HTML(value='')))


[INFO]Validating Model...


HBox(children=(FloatProgress(value=0.0, max=129.0), HTML(value='')))


[INFO]Testing Model...


HBox(children=(FloatProgress(value=0.0, max=151.0), HTML(value='')))


Epoch 010: acc: 98.751 | loss: 0.042 | val_acc: 97.977 | val_loss: 0.060 | test_acc: 98.086 | test_loss: 0.062
[INFO]Training Model...


HBox(children=(FloatProgress(value=0.0, max=1454.0), HTML(value='')))

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(20, 6))
ax[0].plot(np.arange(len(acc_hist["train"])), acc_hist["train"], "--sr")
ax[0].plot(np.arange(len(acc_hist["test"])), acc_hist["test"], "--sr")
ax[0].plot(np.arange(len(acc_hist["val"])), acc_hist["val"], "--sr")

ax[1].plot(np.arange(len(loss_hist["train"])), loss_hist["train"], "--sr")
ax[1].plot(np.arange(len(loss_hist["test"])), loss_hist["test"], "--sr")
ax[1].plot(np.arange(len(loss_hist["val"])), loss_hist["val"], "--sr")

fig.show()

EOF