In [None]:
import torch
import torch.nn as nn
import os
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, datasets
from torch.utils.data import DataLoader, random_split
import zipfile
import math

In [None]:
# Define the base model configuration (using a simplified version)
base_model = [
    [1, 16, 1, 1, 3],
    [6, 24, 2, 2, 3],
    [6, 40, 2, 2, 5],
    [6, 80, 3, 2, 3],
    [6, 112, 3, 1, 5],
]

phi_values = {
    "b0": (0, 32, 0.2),
    "b1": (1, 32, 0.2),
    "b2": (2, 32, 0.3),
    "b3": (3, 32, 0.3),
    "b4": (4, 32, 0.4),
}

class CNNBlock(nn.Module):
    def __init__(self, in_channel, out_channel, kernel_size, stride, padding, groups=1):
        super(CNNBlock, self).__init__()
        self.cnn = nn.Conv2d(in_channel, out_channel, kernel_size, stride, padding, groups=groups)
        self.bn = nn.BatchNorm2d(out_channel)
        self.silu = nn.SiLU()

    def forward(self, x):
        return self.silu(self.bn(self.cnn(x)))

class SqueezeExcitation(nn.Module):
    def __init__(self, in_channel, reduced_dim):
        super(SqueezeExcitation, self).__init__()
        self.se = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),
            nn.Conv2d(in_channel, reduced_dim, 1),
            nn.SiLU(),
            nn.Conv2d(reduced_dim, in_channel, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return x * self.se(x)

class InvertedResidualBlock(nn.Module):
    def __init__(self, in_channel, out_channel, kernel_size, stride, padding, expand_ratio, reduction=4, survival_prob=0.8):
        super(InvertedResidualBlock, self).__init__()
        self.survival_prob = survival_prob
        self.use_residual = in_channel == out_channel and stride == 1
        hidden_dim = in_channel * expand_ratio
        self.expand = in_channel != hidden_dim
        reduced_dim = int(in_channel / reduction)

        if self.expand:
            self.expand_conv = CNNBlock(in_channel, hidden_dim, kernel_size=3, stride=1, padding=1)

        self.conv = nn.Sequential(
            CNNBlock(hidden_dim, hidden_dim, kernel_size, stride, padding, groups=hidden_dim),
            SqueezeExcitation(hidden_dim, reduced_dim),
            nn.Conv2d(hidden_dim, out_channel, 1, bias=False),
            nn.BatchNorm2d(out_channel)
        )

    def stochastic_depth(self, x):
        if not self.training:
            return x
        binary_tensor = torch.rand(x.shape[0], 1, 1, 1, device=x.device) < self.survival_prob
        binary_tensor = binary_tensor.float()
        return x / self.survival_prob * binary_tensor

    def forward(self, inputs):
        x = self.expand_conv(inputs) if self.expand else inputs
        if self.use_residual:
            return self.stochastic_depth(self.conv(x)) + inputs
        else:
            return self.conv(x)

class EfficientNet(nn.Module):
    def __init__(self, version):
        super(EfficientNet, self).__init__()
        width_factor, depth_factor, dropout_rate = self.calculate_factors(version)
        last_channels = math.ceil(1280 * width_factor)
        self.pool = nn.AdaptiveAvgPool2d(1)
        self.features = self.create_features(width_factor, depth_factor, last_channels)
        self.fc_layers = nn.Sequential(
            nn.Linear(last_channels, 512),
            nn.GELU(),
            nn.Dropout(dropout_rate),
            nn.Linear(512, 128),
            nn.ReLU6(),
            nn.Dropout(dropout_rate),
            nn.Linear(128, 1),
            nn.Sigmoid()
        )

    def calculate_factors(self, version, alpha=1.2, beta=1.1):
        phi, res, drop_rate = phi_values[version]
        depth_factor = alpha ** phi
        width_factor = beta ** phi
        return width_factor, depth_factor, drop_rate

    def create_features(self, width_factor, depth_factor, last_channels):
        channels = int(32 * width_factor)
        features = [CNNBlock(3, channels, 3, stride=2, padding=1)]
        in_channels = channels

        for expand_ratio, channels, repeats, stride, kernel_size in base_model:
            out_channels = 4 * math.ceil(int(channels * width_factor) / 4)
            layers_repeats = math.ceil(repeats * depth_factor)

            for layer in range(layers_repeats):
                features.append(
                    InvertedResidualBlock(
                        in_channels,
                        out_channels,
                        expand_ratio=expand_ratio,
                        stride=stride if layer == 0 else 1,
                        kernel_size=kernel_size,
                        padding=kernel_size // 2
                    )
                )
                in_channels = out_channels

        features.append(
            CNNBlock(in_channels, last_channels, kernel_size=1, stride=1, padding=0)
        )

        return nn.Sequential(*features)

    def forward(self, x):
        x = self.pool(self.features(x))
        x = x.view(x.shape[0], -1)
        return self.fc_layers(x)


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import zipfile
import random
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset

# Path to the zip file and the destination folder for extraction
zip_file_path = '/content/drive/MyDrive/web_scraped.zip'
extraction_path = '/content/web_scraped_extracted'

# Extract the zip file
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extraction_path)

# Define the transform
transform = transforms.Compose([
    transforms.Resize((32, 32)),  # Resize images to the input size for EfficientNet-B4
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load the full dataset
dataset_path = '/content/web_scraped_extracted/train'  # Updated to the correct path after extraction
full_dataset = datasets.ImageFolder(root=dataset_path, transform=transform)

# Define function to get class indices
def get_class_indices(dataset, class_name):
    class_idx = dataset.class_to_idx[class_name]
    indices = [i for i, (_, label) in enumerate(dataset.samples) if label == class_idx]
    return indices

# Get indices for the "REAL" and "FAKE" classes in the full dataset
real_indices = get_class_indices(full_dataset, 'REAL')
fake_indices = get_class_indices(full_dataset, 'FAKE')

# Check the number of images in each class
num_real_images = len(real_indices)
num_fake_images = len(fake_indices)

print(f"Number of REAL images: {num_real_images}")
print(f"Number of FAKE images: {num_fake_images}")

# Ensure we have 45,000 images per class
if num_real_images >= 45000 and num_fake_images >= 45000:
    # Randomly select 5000 indices from each class for the validation subset
    real_val_selected = random.sample(real_indices, 5000)
    fake_val_selected = random.sample(fake_indices, 5000)

    # Combine the selected indices for the validation subset
    val_selected_indices = real_val_selected + fake_val_selected

    # Use the remaining indices for the training set
    real_train_selected = [i for i in real_indices if i not in real_val_selected][:45000-5000]
    fake_train_selected = [i for i in fake_indices if i not in fake_val_selected][:45000-5000]
    train_selected_indices = real_train_selected + fake_train_selected

    # Create the validation and training subsets
    val_subset = Subset(full_dataset, val_selected_indices)
    train_subset = Subset(full_dataset, train_selected_indices)

    # Create DataLoaders
    train_loader = DataLoader(train_subset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_subset, batch_size=32, shuffle=False)

    # Print sizes of subsets
    print(f"Train dataset size: {len(train_subset)}")
    print(f"Validation dataset size: {len(val_subset)}")
else:
    print("Not enough images in one or both classes. Please ensure each class has at least 45,000 images.")


Number of REAL images: 50000
Number of FAKE images: 50000
Train dataset size: 80000
Validation dataset size: 10000


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = EfficientNet(version="b0").to(device)
# model.load_state_dict(torch.load("/content/drive/MyDrive/model (2).pth", map_location=device))

criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

num_epochs = 20

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0

    # Training loop
    for inputs, labels in tqdm(train_loader, desc=f"Training Epoch {epoch+1}/{num_epochs}"):
        inputs, labels = inputs.to(device), labels.to(device).float().unsqueeze(1)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        predicted = (outputs > 0.5).float()
        correct_train += (predicted == labels).sum().item()
        total_train += labels.size(0)

    epoch_loss = running_loss / len(train_loader.dataset)
    epoch_acc = correct_train / total_train

    print(f"Epoch {epoch+1}/{num_epochs}, Training Loss: {epoch_loss:.4f}, Training Accuracy: {epoch_acc:.4f}")

    # Validation loop
    model.eval()
    val_loss = 0.0
    correct_val = 0
    total_val = 0

    with torch.no_grad():
        for inputs, labels in tqdm(val_loader, desc=f"Validation Epoch {epoch+1}/{num_epochs}"):
            inputs, labels = inputs.to(device), labels.to(device).float().unsqueeze(1)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            val_loss += loss.item() * inputs.size(0)
            predicted = (outputs > 0.5).float()
            correct_val += (predicted == labels).sum().item()
            total_val += labels.size(0)

    val_epoch_loss = val_loss / len(val_loader.dataset)
    val_epoch_acc = correct_val / total_val

    print(f"Epoch {epoch+1}/{num_epochs}, Validation Loss: {val_epoch_loss:.4f}, Validation Accuracy: {val_epoch_acc:.4f}")


Training Epoch 1/20: 100%|██████████| 2500/2500 [01:56<00:00, 21.55it/s]


Epoch 1/20, Training Loss: 0.3145, Training Accuracy: 0.8665


Validation Epoch 1/20: 100%|██████████| 313/313 [00:07<00:00, 39.37it/s]


Epoch 1/20, Validation Loss: 0.1972, Validation Accuracy: 0.9233


Training Epoch 2/20: 100%|██████████| 2500/2500 [01:51<00:00, 22.35it/s]


Epoch 2/20, Training Loss: 0.2039, Training Accuracy: 0.9201


Validation Epoch 2/20: 100%|██████████| 313/313 [00:08<00:00, 37.39it/s]


Epoch 2/20, Validation Loss: 0.1631, Validation Accuracy: 0.9416


Training Epoch 3/20: 100%|██████████| 2500/2500 [01:51<00:00, 22.50it/s]


Epoch 3/20, Training Loss: 0.1707, Training Accuracy: 0.9362


Validation Epoch 3/20: 100%|██████████| 313/313 [00:08<00:00, 37.92it/s]


Epoch 3/20, Validation Loss: 0.1550, Validation Accuracy: 0.9357


Training Epoch 4/20: 100%|██████████| 2500/2500 [01:54<00:00, 21.93it/s]


Epoch 4/20, Training Loss: 0.1506, Training Accuracy: 0.9435


Validation Epoch 4/20: 100%|██████████| 313/313 [00:07<00:00, 43.77it/s]


Epoch 4/20, Validation Loss: 0.1430, Validation Accuracy: 0.9440


Training Epoch 5/20: 100%|██████████| 2500/2500 [01:49<00:00, 22.88it/s]


Epoch 5/20, Training Loss: 0.1336, Training Accuracy: 0.9497


Validation Epoch 5/20: 100%|██████████| 313/313 [00:07<00:00, 39.14it/s]


Epoch 5/20, Validation Loss: 0.1337, Validation Accuracy: 0.9476


Training Epoch 6/20: 100%|██████████| 2500/2500 [01:49<00:00, 22.88it/s]


Epoch 6/20, Training Loss: 0.1203, Training Accuracy: 0.9552


Validation Epoch 6/20: 100%|██████████| 313/313 [00:08<00:00, 38.82it/s]


Epoch 6/20, Validation Loss: 0.1230, Validation Accuracy: 0.9544


Training Epoch 7/20: 100%|██████████| 2500/2500 [01:49<00:00, 22.81it/s]


Epoch 7/20, Training Loss: 0.1072, Training Accuracy: 0.9599


Validation Epoch 7/20: 100%|██████████| 313/313 [00:07<00:00, 40.24it/s]


Epoch 7/20, Validation Loss: 0.1185, Validation Accuracy: 0.9532


Training Epoch 8/20: 100%|██████████| 2500/2500 [01:49<00:00, 22.78it/s]


Epoch 8/20, Training Loss: 0.0982, Training Accuracy: 0.9634


Validation Epoch 8/20: 100%|██████████| 313/313 [00:08<00:00, 38.64it/s]


Epoch 8/20, Validation Loss: 0.1241, Validation Accuracy: 0.9499


Training Epoch 9/20: 100%|██████████| 2500/2500 [01:49<00:00, 22.74it/s]


Epoch 9/20, Training Loss: 0.0843, Training Accuracy: 0.9694


Validation Epoch 9/20: 100%|██████████| 313/313 [00:07<00:00, 44.26it/s]


Epoch 9/20, Validation Loss: 0.1354, Validation Accuracy: 0.9504


Training Epoch 10/20: 100%|██████████| 2500/2500 [01:49<00:00, 22.90it/s]


Epoch 10/20, Training Loss: 0.0759, Training Accuracy: 0.9724


Validation Epoch 10/20: 100%|██████████| 313/313 [00:08<00:00, 38.65it/s]


Epoch 10/20, Validation Loss: 0.1874, Validation Accuracy: 0.9286


Training Epoch 11/20: 100%|██████████| 2500/2500 [01:49<00:00, 22.88it/s]


Epoch 11/20, Training Loss: 0.0658, Training Accuracy: 0.9756


Validation Epoch 11/20: 100%|██████████| 313/313 [00:07<00:00, 44.62it/s]


Epoch 11/20, Validation Loss: 0.1578, Validation Accuracy: 0.9440


Training Epoch 12/20: 100%|██████████| 2500/2500 [01:49<00:00, 22.94it/s]


Epoch 12/20, Training Loss: 0.0616, Training Accuracy: 0.9768


Validation Epoch 12/20: 100%|██████████| 313/313 [00:08<00:00, 38.67it/s]


Epoch 12/20, Validation Loss: 0.1616, Validation Accuracy: 0.9552


Training Epoch 13/20: 100%|██████████| 2500/2500 [01:49<00:00, 22.75it/s]


Epoch 13/20, Training Loss: 0.0538, Training Accuracy: 0.9804


Validation Epoch 13/20: 100%|██████████| 313/313 [00:07<00:00, 41.05it/s]


Epoch 13/20, Validation Loss: 0.1540, Validation Accuracy: 0.9487


Training Epoch 14/20: 100%|██████████| 2500/2500 [01:49<00:00, 22.84it/s]


Epoch 14/20, Training Loss: 0.0499, Training Accuracy: 0.9818


Validation Epoch 14/20: 100%|██████████| 313/313 [00:07<00:00, 42.13it/s]


Epoch 14/20, Validation Loss: 0.1693, Validation Accuracy: 0.9501


Training Epoch 15/20: 100%|██████████| 2500/2500 [01:49<00:00, 22.91it/s]


Epoch 15/20, Training Loss: 0.0430, Training Accuracy: 0.9839


Validation Epoch 15/20: 100%|██████████| 313/313 [00:08<00:00, 38.79it/s]


Epoch 15/20, Validation Loss: 0.1498, Validation Accuracy: 0.9545


Training Epoch 16/20: 100%|██████████| 2500/2500 [01:47<00:00, 23.19it/s]


Epoch 16/20, Training Loss: 0.0425, Training Accuracy: 0.9842


Validation Epoch 16/20: 100%|██████████| 313/313 [00:06<00:00, 44.77it/s]


Epoch 16/20, Validation Loss: 0.1725, Validation Accuracy: 0.9428


Training Epoch 17/20: 100%|██████████| 2500/2500 [01:49<00:00, 22.86it/s]


Epoch 17/20, Training Loss: 0.0387, Training Accuracy: 0.9860


Validation Epoch 17/20: 100%|██████████| 313/313 [00:08<00:00, 38.71it/s]


Epoch 17/20, Validation Loss: 0.1675, Validation Accuracy: 0.9515


Training Epoch 18/20: 100%|██████████| 2500/2500 [01:49<00:00, 22.86it/s]


Epoch 18/20, Training Loss: 0.0356, Training Accuracy: 0.9875


Validation Epoch 18/20: 100%|██████████| 313/313 [00:07<00:00, 43.76it/s]


Epoch 18/20, Validation Loss: 0.1522, Validation Accuracy: 0.9519


Training Epoch 19/20: 100%|██████████| 2500/2500 [01:49<00:00, 22.74it/s]


Epoch 19/20, Training Loss: 0.0338, Training Accuracy: 0.9875


Validation Epoch 19/20: 100%|██████████| 313/313 [00:08<00:00, 37.86it/s]


Epoch 19/20, Validation Loss: 0.1594, Validation Accuracy: 0.9531


Training Epoch 20/20: 100%|██████████| 2500/2500 [01:48<00:00, 23.08it/s]


Epoch 20/20, Training Loss: 0.0302, Training Accuracy: 0.9890


Validation Epoch 20/20: 100%|██████████| 313/313 [00:07<00:00, 42.49it/s]


Epoch 20/20, Validation Loss: 0.1860, Validation Accuracy: 0.9533


In [None]:
model_save_path = '/content/model_checkpoint.pth'

# Save the model
torch.save(model.state_dict(), model_save_path)

In [None]:
#Model testing

# Switch the model to evaluation mode
model.eval()

# Variables to keep track of test performance
test_loss = 0.0
correct_test = 0
total_test = 0

# List to store all predictions and labels
all_preds = []
all_labels = []

# Use the same loss function
criterion = nn.BCELoss()

test_dataset = datasets.ImageFolder(root='/content/web_scraped_extracted/test', transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

with torch.no_grad():
    for inputs, labels in tqdm(test_loader, desc="Testing"):
        inputs, labels = inputs.to(device), labels.to(device).float().unsqueeze(1)
        outputs = model(inputs)

        # Calculate loss
        loss = criterion(outputs, labels)
        test_loss += loss.item() * inputs.size(0)

        # Calculate accuracy
        predicted = (outputs > 0.5).float()
        correct_test += (predicted == labels).sum().item()
        total_test += labels.size(0)

        # Store predictions and labels for further analysis if needed
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Calculate average loss and accuracy
test_epoch_loss = test_loss / len(test_loader.dataset)
test_epoch_acc = correct_test / total_test

print(f"Test Loss: {test_epoch_loss:.4f}, Test Accuracy: {test_epoch_acc:.4f}")


Testing: 100%|██████████| 625/625 [00:15<00:00, 39.41it/s]


Test Loss: 0.2084, Test Accuracy: 0.9471


In [None]:
num_incorrect = 0

for i in range(len(all_preds)):
    if all_preds[i] != all_labels[i]:
        num_incorrect += 1

print(f"Number of incorrect predictions: {num_incorrect}")

Number of incorrect predictions: 1058


In [None]:
len(all_preds)

20000