In [3]:
# Basic imports for data handling, training, and visualization
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, datasets
from torch.utils.data import DataLoader, random_split
import matplotlib.pyplot as plt
from PIL import Image

# Load EfficientNet model from timm (PyTorch Image Models)
import timm

# Set device to GPU if available, else use CPU
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", DEVICE)

Using device: cpu


In [4]:
# Path to the new dataset directory for fine-tuning
DATASET_DIR = 'Dataset-New'

# Hyperparameters
BATCH_SIZE    = 32         # Number of images per batch
NUM_EPOCHS    = 5          # Number of fine-tuning epochs
LEARNING_RATE = 1e-4       # Learning rate for optimizer
IMG_SIZE      = 224        # Input image size for EfficientNet-B0

# Proportions for splitting the dataset
train_ratio = 0.8
val_ratio   = 0.1
test_ratio  = 0.1

In [5]:
# Data augmentation and normalization for training set
train_transforms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),        # Resize to model input size
    transforms.RandomHorizontalFlip(),              # Augment with horizontal flips
    transforms.RandomRotation(10),                  # Augment with small rotations
    transforms.ToTensor(),                          # Convert to PyTorch tensor
    transforms.Normalize([0.485, 0.456, 0.406],      # Normalize using ImageNet means
                         [0.229, 0.224, 0.225])      # and standard deviations
])

# Only resize and normalize validation and test sets (no augmentation)
val_test_transforms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

In [None]:
# Load dataset containing 'fake' and 'real' subfolders
full_dataset = datasets.ImageFolder(DATASET_DIR, transform=None)

# Split dataset into train, validation, and test subsets
num_samples = len(full_dataset)
train_size  = int(train_ratio * num_samples)
val_size    = int(val_ratio * num_samples)
test_size   = num_samples - train_size - val_size

train_dataset, val_dataset, test_dataset = random_split(full_dataset, [train_size, val_size, test_size])

# Apply transforms to each split
train_dataset.dataset.transform = train_transforms
val_dataset.dataset.transform   = val_test_transforms
test_dataset.dataset.transform  = val_test_transforms

print("Classes:", full_dataset.classes)


Total images  : 36000
Train samples : 28800
Val samples   : 3600
Test samples  : 3600
Classes: ['fake', 'real']


In [7]:
# Create data loaders for training, validation, and testing
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True,  num_workers=2)
val_loader   = DataLoader(val_dataset,   batch_size=BATCH_SIZE, shuffle=False, num_workers=2)
test_loader  = DataLoader(test_dataset,  batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

In [9]:
# Initialize EfficientNet-B0 model (no pretrained weights for fine-tuning)
model = timm.create_model('efficientnet_b0', pretrained=False)

# Modify the final classifier layer for binary classification (2 output classes)
num_features = model.classifier.in_features
model.classifier = nn.Linear(num_features, 2)

# Load weights from previously trained model
checkpoint_path = 'efficientnet_b0_real_vs_fake.pth'
model.load_state_dict(torch.load(checkpoint_path, map_location=DEVICE))

# Move model to the appropriate device (GPU or CPU)
model = model.to(DEVICE)

print("Loaded pretrained model from:", checkpoint_path)

Loaded pretrained model from: efficientnet_b0_real_vs_fake.pth


In [10]:
# Freeze all layers except the classifier for partial fine-tuning
for name, param in model.named_parameters():
    if "classifier" not in name:
        param.requires_grad = False

print("Froze all layers except the classifier.")

Froze all layers except the classifier.


In [11]:
# Define loss function (cross-entropy for classification)
criterion = nn.CrossEntropyLoss()

# Get parameters that require gradients (trainable parameters)
trainable_params = [p for p in model.parameters() if p.requires_grad]

# Initialize optimizer (Adam) for only the trainable parameters
optimizer = optim.Adam(trainable_params, lr=LEARNING_RATE)

In [None]:
from tqdm import tqdm

def train_one_epoch(model, dataloader, optimizer, criterion, device=DEVICE):
    model.train()  # Set model to training mode
    running_loss = 0.0
    correct      = 0
    total        = 0
    
    # Iterate through the training data
    for images, labels in tqdm(dataloader, desc="Training", leave=False):
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()  # Reset gradients
        outputs = model(images)  # Forward pass
        loss    = criterion(outputs, labels)  # Compute loss
        loss.backward()  # Backpropagate the error
        optimizer.step()  # Update weights
        
        running_loss += loss.item() * images.size(0)  # Accumulate loss
        _, predicted = torch.max(outputs, 1)  # Get predicted class
        correct      += (predicted == labels).sum().item()  # Count correct predictions
        total        += labels.size(0)  # Total number of samples
    
    # Compute average loss and accuracy for the epoch
    epoch_loss = running_loss / total if total else 0.0
    epoch_acc  = correct / total if total else 0.0
    return epoch_loss, epoch_acc


def validate_one_epoch(model, dataloader, criterion, device=DEVICE):
    model.eval()  # Set model to evaluation mode
    running_loss = 0.0
    correct      = 0
    total        = 0
    
    with torch.no_grad():  # Disable gradient computation for validation
        # Iterate through the validation data
        for images, labels in tqdm(dataloader, desc="Validating", leave=False):
            images, labels = images.to(device), labels.to(device)
            
            outputs = model(images)  # Forward pass
            loss    = criterion(outputs, labels)  # Compute loss
            
            running_loss += loss.item() * images.size(0)  # Accumulate loss
            _, predicted = torch.max(outputs, 1)  # Get predicted class
            correct      += (predicted == labels).sum().item()  # Count correct predictions
            total        += labels.size(0)  # Total number of samples
    
    # Compute average loss and accuracy for the epoch
    epoch_loss = running_loss / total if total else 0.0
    epoch_acc  = correct / total if total else 0.0
    return epoch_loss, epoch_acc

In [None]:
# Track best validation accuracy for saving the best model
best_val_acc = 0.0

# Main training loop for multiple epochs
for epoch in range(NUM_EPOCHS):
    # Train for one epoch and validate
    train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, criterion, DEVICE)
    val_loss,   val_acc   = validate_one_epoch(model, val_loader, criterion, DEVICE)
    
    # Print training and validation results
    print(f"Epoch [{epoch+1}/{NUM_EPOCHS}]")
    print(f"  Train Loss: {train_loss:.4f}  | Train Acc: {train_acc:.4f}")
    print(f"  Val   Loss: {val_loss:.4f}    | Val Acc:   {val_acc:.4f}")
    
    # Save model if validation accuracy improves
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "efficientnet_b0_finetuned_newdata.pth")  # Save best model weights
        print(f"==> Best model saved with val_acc = {best_val_acc:.4f}")


                                                             

Epoch [1/5]
  Train Loss: 2.6258  | Train Acc: 0.5362
  Val   Loss: 0.6697    | Val Acc:   0.5967
==> Best model saved with val_acc = 0.5967


                                                             

Epoch [2/5]
  Train Loss: 0.6512  | Train Acc: 0.6218
  Val   Loss: 0.6491    | Val Acc:   0.6222
==> Best model saved with val_acc = 0.6222


                                                             

Epoch [3/5]
  Train Loss: 0.6356  | Train Acc: 0.6408
  Val   Loss: 0.6395    | Val Acc:   0.6419
==> Best model saved with val_acc = 0.6419


                                                             

Epoch [4/5]
  Train Loss: 0.6243  | Train Acc: 0.6503
  Val   Loss: 0.6318    | Val Acc:   0.6497
==> Best model saved with val_acc = 0.6497


                                                             

Epoch [5/5]
  Train Loss: 0.6163  | Train Acc: 0.6644
  Val   Loss: 0.6271    | Val Acc:   0.6569
==> Best model saved with val_acc = 0.6569




In [15]:
# Additional Training with Learning Rate Scheduler and Early Stopping

import copy
from tqdm import tqdm

# Hyperparameters for fine-tuning
additional_epochs = 5  # Number of additional epochs you want to run
patience = 3           # Number of epochs to wait for improvement before early stopping
best_val_loss = float('inf')
patience_counter = 0

# Define a learning rate scheduler that monitors validation loss
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, 
    mode='min', 
    factor=0.1, 
    patience=3, 
    verbose=True
)

# Save the best model weights
best_model_wts = copy.deepcopy(model.state_dict())

for epoch in range(additional_epochs):
    print(f"Epoch {epoch+1}/{additional_epochs}")
    
    # Training phase
    train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, criterion, DEVICE)
    # Validation phase
    val_loss, val_acc = validate_one_epoch(model, val_loader, criterion, DEVICE)
    
    print(f"  Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}")
    print(f"  Val   Loss: {val_loss:.4f} | Val Acc:   {val_acc:.4f}")
    
    # Step the scheduler with the validation loss value
    scheduler.step(val_loss)
    
    # Check if validation loss improved, save the model and reset patience counter
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_model_wts = copy.deepcopy(model.state_dict())
        patience_counter = 0
        torch.save(model.state_dict(), "efficientnet_b0_finetuned_newdata_best.pth")
        print(f"==> Best model saved with val_loss = {best_val_loss:.4f}")
    else:
        patience_counter += 1
        print(f"No improvement for {patience_counter} epoch(s).")
        if patience_counter >= patience:
            print("Early stopping triggered.")
            break

# Load the best model weights for further evaluation or inference
model.load_state_dict(best_model_wts)
print("Resumed model set to best weights achieved during additional training.")




Epoch 1/5


                                                             

  Train Loss: 0.5848 | Train Acc: 0.6909
  Val   Loss: 0.5957 | Val Acc:   0.6897
==> Best model saved with val_loss = 0.5957
Epoch 2/5


                                                             

  Train Loss: 0.5825 | Train Acc: 0.6904
  Val   Loss: 0.6120 | Val Acc:   0.6881
No improvement for 1 epoch(s).
Epoch 3/5


                                                             

  Train Loss: 0.5823 | Train Acc: 0.6944
  Val   Loss: 0.5959 | Val Acc:   0.6889
No improvement for 2 epoch(s).
Epoch 4/5


                                                             

  Train Loss: 0.5816 | Train Acc: 0.6923
  Val   Loss: 0.5948 | Val Acc:   0.6900
==> Best model saved with val_loss = 0.5948
Epoch 5/5


                                                             

  Train Loss: 0.5812 | Train Acc: 0.6937
  Val   Loss: 0.5928 | Val Acc:   0.6944
==> Best model saved with val_loss = 0.5928
Resumed model set to best weights achieved during additional training.




In [None]:
import copy
from tqdm import tqdm

# Hyperparameters for fine-tuning with early stopping and learning rate scheduler
additional_epochs = 5  # Number of additional epochs for training
patience = 3           # Patience for early stopping (epochs without improvement)
best_val_loss = float('inf')
patience_counter = 0

# Learning rate scheduler: reduces LR if validation loss plateaus
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, 
    mode='min', 
    factor=0.1,   # Reduce LR by a factor of 0.1
    patience=3,   # Wait for 3 epochs without improvement
    verbose=True
)

# Save the best model weights
best_model_wts = copy.deepcopy(model.state_dict())

for epoch in range(additional_epochs):
    print(f"Epoch {epoch+1}/{additional_epochs}")
    
    # Training phase
    train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, criterion, DEVICE)
    # Validation phase
    val_loss, val_acc = validate_one_epoch(model, val_loader, criterion, DEVICE)
    
    print(f"  Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}")
    print(f"  Val   Loss: {val_loss:.4f} | Val Acc:   {val_acc:.4f}")
    
    # Step the scheduler with the validation loss value
    scheduler.step(val_loss)
    
    # Save the best model weights and reset patience counter if loss improves
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_model_wts = copy.deepcopy(model.state_dict())
        patience_counter = 0
        torch.save(model.state_dict(), "efficientnet_b0_finetuned_newdata_best.pth")
        print(f"==> Best model saved with val_loss = {best_val_loss:.4f}")
    else:
        patience_counter += 1
        print(f"No improvement for {patience_counter} epoch(s).")
        if patience_counter >= patience:
            print("Early stopping triggered.")
            break

# Load the best model weights after training is complete
model.load_state_dict(best_model_wts)
print("Resumed model set to best weights achieved during additional training.")

Learning rate reduced further for extra training.
Extra Epoch 1/3


                                                             

  Train Loss: 0.5761 | Train Acc: 0.6969
  Val   Loss: 0.5920 | Val Acc:   0.6925
==> Best model updated with val_loss = 0.5920
Extra Epoch 2/3


                                                             

  Train Loss: 0.5769 | Train Acc: 0.6947
  Val   Loss: 0.5902 | Val Acc:   0.6939
==> Best model updated with val_loss = 0.5902
Extra Epoch 3/3


                                                             

  Train Loss: 0.5758 | Train Acc: 0.6938
  Val   Loss: 0.5897 | Val Acc:   0.6933
==> Best model updated with val_loss = 0.5897
Extra training complete. Model set to best weights from extra training.


