In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms, datasets
from tqdm import tqdm
import numpy as np
import cv2
import numpy as np
import matplotlib.pyplot as plt
import os
import sys

In [2]:
from PIL import Image

In [3]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.ensemble import RandomForestClassifier
import time

In [4]:
from dataset import CustomDatasetLoader

In [5]:
torch.cuda.is_available()

True

In [6]:
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import os

# Define the transformations (resize, normalization, etc.)
transform = transforms.Compose([
    transforms.Resize((128, 128)),  # Resize all images to 128x128
    transforms.ToTensor(),          # Convert images to PyTorch tensors
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize (ImageNet stats)
])

# Define the paths to the dataset directories
train_dir = 'CNN_dataset/train'  # Path to the train folder
test_dir = 'CNN_dataset/test'    # Path to the test folder

# Load the dataset using ImageFolder
train_dataset = datasets.ImageFolder(root=train_dir, transform=transform)
test_dataset = datasets.ImageFolder(root=test_dir, transform=transform)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Check the structure of the dataset
print(f'Train dataset size: {len(train_dataset)}')
print(f'Test dataset size: {len(test_dataset)}')

# Example of iterating through the DataLoader
for images, labels in train_loader:
    print(f'Batch of images shape: {images.shape}')  # Should be (batch_size, 3, 128, 128)
    print(f'Batch of labels shape: {labels.shape}')  # Should be (batch_size,)
    break  # Just show the first batch


Train dataset size: 1632
Test dataset size: 409
Batch of images shape: torch.Size([32, 3, 128, 128])
Batch of labels shape: torch.Size([32])


In [26]:
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from albumentations import Compose, Resize, HorizontalFlip, Rotate, RandomBrightnessContrast
from albumentations.pytorch import ToTensorV2
from PIL import Image
import numpy as np
import os
from sklearn.model_selection import train_test_split

# Define the Albumentations transformations
def albumentations_transform(image):
    # Apply random horizontal flip, random crop, and random brightness/contrast
    transform = Compose([
        HorizontalFlip(p=0.5),  # Flip the image horizontally with 50% probability
        # RandomCrop(width=120, height=120, p=1),  # Crop the image randomly to 120x120
        Rotate(limit=30, p=0.5),  # Rotate the image randomly by up to 30 degrees
        RandomBrightnessContrast(p=0.2),  # Randomly change brightness/contrast
        ToTensorV2()    # Convert image to PyTorch tensor
    ])
    
    # Apply the transformations to the image (albumentations works on NumPy arrays)
    image = np.array(image)  # Convert PIL image to NumPy array
    augmented = transform(image=image)
    return augmented['image']

# Define the paths to the dataset directories
train_dir = 'CNN_dataset/train'  # Path to the train folder
test_dir = 'CNN_dataset/test'    # Path to the test folder

# Define the custom dataset class with Albumentations transform
class CustomImageFolderWithAugmentations(Dataset):
    def __init__(self, image_folder, transform=None):
        """
        Custom dataset class for loading images from a folder with augmentations.
        
        :param image_folder: Path to the dataset folder containing subfolders for each class.
        :param transform: Albumentations transformation pipeline to apply to images.
        """
        self.image_folder = image_folder
        self.transform = transform
        self.images = []
        self.labels = []

        # Load image paths and labels
        for label, folder_name in enumerate(os.listdir(image_folder)):
            folder_path = os.path.join(image_folder, folder_name)
            if os.path.isdir(folder_path):
                for img_name in os.listdir(folder_path):
                    img_path = os.path.join(folder_path, img_name)
                    self.images.append(img_path)
                    self.labels.append(label)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = self.images[idx]
        label = self.labels[idx]

        # Load image using PIL
        image = Image.open(img_path).convert("RGB")
        
        # Convert image to numpy array for Albumentations
        image_np = np.array(image)

        # Apply transformations (augmentation) using Albumentations if provided
        if self.transform:
            augmented = self.transform(image=image_np)
            image = augmented['image']  # This will be a PyTorch tensor

        # Ensure the image is of type float32 and normalize to [0, 1]
        # Convert from uint8 to float32
        image = image.float()  # Convert tensor to float32

        # Normalize pixel values to [0, 1] by dividing by 255.0
        image /= 255.0

        return image, label


In [27]:
images.dtype

torch.float32

In [52]:

# Define the augmentation pipeline
augmentation_pipeline = Compose([
    Resize(224, 224),
    # HorizontalFlip(p=0.5),
    # VerticalFlip(p=0.5),
    # RandomCrop(width=120, height=120, p=1),
    # Rotate(limit=30, p=0.5),
    # RandomBrightnessContrast(p=0.2),
    ToTensorV2()  # Converts to a PyTorch tensor, but does NOT normalize to [0, 1] yet
])

# Path to the dataset folder containing subfolders 'real' and 'fake' (or any other class names)
dataset_folder = 'dataset'

# Create the dataset
train_dataset = CustomImageFolderWithAugmentations(image_folder=dataset_folder, transform=augmentation_pipeline)

# Create the DataLoader
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Iterate over the DataLoader
for images, labels in train_loader:
    print(f'Batch size: {images.shape[0]}')  # Should print batch size (32)
    print(f'Images dtype: {images.dtype}')   # Should be torch.float32
    print(f'Images range: {images.min()}, {images.max()}')  # Should be in the range [0, 1]
    break  # Just print information about the first batch

Batch size: 32
Images dtype: torch.float32
Images range: 0.0, 1.0


In [53]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class VGG16Custom(nn.Module):
    def __init__(self, num_classes=1):
        super(VGG16Custom, self).__init__()
        
        # VGG16-like Convolutional layers with max pooling
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)  # 224x224 -> 224x224
        self.conv2 = nn.Conv2d(64, 64, kernel_size=3, padding=1)  # 224x224 -> 224x224
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)  # 224x224 -> 112x112

        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)  # 112x112 -> 112x112
        self.conv4 = nn.Conv2d(128, 128, kernel_size=3, padding=1)  # 112x112 -> 112x112
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)  # 112x112 -> 56x56

        self.conv5 = nn.Conv2d(128, 256, kernel_size=3, padding=1)  # 56x56 -> 56x56
        self.conv6 = nn.Conv2d(256, 256, kernel_size=3, padding=1)  # 56x56 -> 56x56
        self.conv7 = nn.Conv2d(256, 256, kernel_size=3, padding=1)  # 56x56 -> 56x56
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)  # 56x56 -> 28x28

        self.conv8 = nn.Conv2d(256, 512, kernel_size=3, padding=1)  # 28x28 -> 28x28
        self.conv9 = nn.Conv2d(512, 512, kernel_size=3, padding=1)  # 28x28 -> 28x28
        self.conv10 = nn.Conv2d(512, 512, kernel_size=3, padding=1)  # 28x28 -> 28x28
        self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2)  # 28x28 -> 14x14

        self.conv11 = nn.Conv2d(512, 512, kernel_size=3, padding=1)  # 14x14 -> 14x14
        self.conv12 = nn.Conv2d(512, 512, kernel_size=3, padding=1)  # 14x14 -> 14x14
        self.conv13 = nn.Conv2d(512, 512, kernel_size=3, padding=1)  # 14x14 -> 14x14
        self.pool5 = nn.MaxPool2d(kernel_size=2, stride=2)  # 14x14 -> 7x7

        # Fully connected layers (FC1: 4096 units, FC2: 4096 units, FC3: num_classes)
        self.fc1 = nn.Linear(512 * 7 * 7, 4096)  # Adjusted for 224x224 input (512 * 7 * 7 = 25088)
        self.fc2 = nn.Linear(4096, 4096)
        self.fc3 = nn.Linear(4096, num_classes)
        
        # Dropout (optional)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        # Convolutional layers with ReLU activations and max pooling
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = self.pool1(x)
        
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = self.pool2(x)
        
        x = F.relu(self.conv5(x))
        x = F.relu(self.conv6(x))
        x = F.relu(self.conv7(x))
        x = self.pool3(x)
        
        x = F.relu(self.conv8(x))
        x = F.relu(self.conv9(x))
        x = F.relu(self.conv10(x))
        x = self.pool4(x)
        
        x = F.relu(self.conv11(x))
        x = F.relu(self.conv12(x))
        x = F.relu(self.conv13(x))
        x = self.pool5(x)

        # Flatten the tensor
        x = x.view(-1, 512 * 7 * 7)  # For input size of 224x224

        # Fully connected layers with ReLU activation and Dropout
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)

        # Sigmoid activation for binary classification
        x = torch.sigmoid(self.fc3(x))

        # Reshape to (batch_size,) for binary classification target matching
        x = x.view(-1)  # Flatten to (batch_size,)
        
        return x

# Instantiate the model for binary classification (1 output unit)
model = VGG16Custom(num_classes=1)

# Print the model architecture
print(model)


VGG16Custom(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv5): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv6): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv7): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv8): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv9): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv10): Conv2d(512, 

In [54]:
# Set up the criterion (loss function) and optimizer
# Loss function for binary classification
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training the model
num_epochs = 20
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)


print(device)

cuda


In [55]:
torch.cuda.is_available()

True

In [56]:
images.shape, labels.shape

(torch.Size([32, 3, 224, 224]), torch.Size([32]))

In [57]:
torch.cuda.empty_cache()


In [58]:

for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    running_loss = 0.0
    correct_preds = 0
    total_preds = 0

    # Wrap the train_loader with tqdm for the training loop progress bar
    with tqdm(train_loader, unit="batch", desc=f"Epoch {epoch+1}/{num_epochs} (Train)") as tepoch:
        for inputs, labels in tepoch:
#     for inputs, labels in train_loader:  
            inputs, labels = inputs.to(device), labels.to(device)
            # optimizer.zero_grad()  # Zero the gradients

            # Forward pass
            outputs = model(inputs)  # Logits: (batch_size, 1) for binary classification
            # print(inputs.shape, labels.shape)

            # break
#             # Calculate the loss (using BCEWithLogitsLoss for binary classification)
            # print(outputs.shape, labels.shape)
            loss = criterion(outputs, labels.float())
            # Backpropagation
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            
            # Calculate the number of correct predictions
            predicted = (outputs > 0.5).float()  # Apply threshold of 0.5 to get binary predictions (0 or 1)
            correct_preds += (predicted.squeeze() == labels).sum().item()  # Compare predictions with true labels
            total_preds += labels.size(0)

            # Update the tqdm progress bar description with loss and accuracy
            tepoch.set_postfix(loss=running_loss / len(tepoch), accuracy=100 * correct_preds / total_preds)
#     Calculate average loss and accuracy for the epoch
    epoch_loss = running_loss / len(train_loader)
    epoch_acc = 100 * correct_preds / total_preds

    print(f"Epoch {epoch+1}/{num_epochs} - Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.2f}%")
    torch.cuda.empty_cache()
torch.save(model.state_dict(), 'vgg16_custom_model.pth')


Epoch 1/20 (Train): 100%|██████████| 64/64 [03:58<00:00,  3.72s/batch, accuracy=49.1, loss=18.7] 


Epoch 1/20 - Loss: 18.6874, Accuracy: 49.14%


Epoch 2/20 (Train): 100%|██████████| 64/64 [03:08<00:00,  2.95s/batch, accuracy=53, loss=47]    


Epoch 2/20 - Loss: 47.0117, Accuracy: 52.96%


Epoch 3/20 (Train): 100%|██████████| 64/64 [03:14<00:00,  3.03s/batch, accuracy=53, loss=47]    


Epoch 3/20 - Loss: 46.9844, Accuracy: 52.96%


Epoch 4/20 (Train): 100%|██████████| 64/64 [03:15<00:00,  3.06s/batch, accuracy=53, loss=47]    


Epoch 4/20 - Loss: 47.0391, Accuracy: 52.96%


Epoch 5/20 (Train): 100%|██████████| 64/64 [03:14<00:00,  3.04s/batch, accuracy=53, loss=47]    


Epoch 5/20 - Loss: 46.9844, Accuracy: 52.96%


Epoch 6/20 (Train): 100%|██████████| 64/64 [03:11<00:00,  3.00s/batch, accuracy=53, loss=47.1]  


Epoch 6/20 - Loss: 47.0527, Accuracy: 52.96%


Epoch 7/20 (Train): 100%|██████████| 64/64 [03:14<00:00,  3.04s/batch, accuracy=53, loss=47]    


Epoch 7/20 - Loss: 47.0391, Accuracy: 52.96%


Epoch 8/20 (Train): 100%|██████████| 64/64 [03:15<00:00,  3.05s/batch, accuracy=53, loss=47]    


Epoch 8/20 - Loss: 47.0391, Accuracy: 52.96%


Epoch 9/20 (Train): 100%|██████████| 64/64 [03:14<00:00,  3.04s/batch, accuracy=53, loss=47]    


Epoch 9/20 - Loss: 47.0254, Accuracy: 52.96%


Epoch 10/20 (Train): 100%|██████████| 64/64 [03:14<00:00,  3.04s/batch, accuracy=53, loss=47]    


Epoch 10/20 - Loss: 47.0254, Accuracy: 52.96%


Epoch 11/20 (Train): 100%|██████████| 64/64 [03:14<00:00,  3.03s/batch, accuracy=53, loss=47]    


Epoch 11/20 - Loss: 46.9570, Accuracy: 52.96%


Epoch 12/20 (Train): 100%|██████████| 64/64 [03:09<00:00,  2.96s/batch, accuracy=53, loss=47]    


Epoch 12/20 - Loss: 46.9980, Accuracy: 52.96%


Epoch 13/20 (Train): 100%|██████████| 64/64 [02:51<00:00,  2.67s/batch, accuracy=53, loss=47.1]  


Epoch 13/20 - Loss: 47.0527, Accuracy: 52.96%


Epoch 14/20 (Train): 100%|██████████| 64/64 [02:51<00:00,  2.68s/batch, accuracy=53, loss=47]    


Epoch 14/20 - Loss: 47.0254, Accuracy: 52.96%


Epoch 15/20 (Train): 100%|██████████| 64/64 [03:13<00:00,  3.02s/batch, accuracy=53, loss=47]    


Epoch 15/20 - Loss: 47.0391, Accuracy: 52.96%


Epoch 16/20 (Train): 100%|██████████| 64/64 [03:14<00:00,  3.04s/batch, accuracy=53, loss=47.1]  


Epoch 16/20 - Loss: 47.0801, Accuracy: 52.96%


Epoch 17/20 (Train): 100%|██████████| 64/64 [03:14<00:00,  3.04s/batch, accuracy=53, loss=47]    


Epoch 17/20 - Loss: 46.9844, Accuracy: 52.96%


Epoch 18/20 (Train): 100%|██████████| 64/64 [03:14<00:00,  3.04s/batch, accuracy=53, loss=47.1]  


Epoch 18/20 - Loss: 47.0664, Accuracy: 52.96%


Epoch 19/20 (Train): 100%|██████████| 64/64 [03:13<00:00,  3.03s/batch, accuracy=53, loss=47.1]  


Epoch 19/20 - Loss: 47.0527, Accuracy: 52.96%


Epoch 20/20 (Train): 100%|██████████| 64/64 [03:07<00:00,  2.92s/batch, accuracy=53, loss=47]    


Epoch 20/20 - Loss: 47.0254, Accuracy: 52.96%
