

# Image Classification with Dual Path Network (DPN)

**Training, Validation, and Testing with Dual Path Network (DPN) Model on Custom Dataset for Prediction**

## Intializing and Import Libraries

In [None]:
!pip install timm
!pip install tqdm
!pip install torch
!pip install torchvision
!pip install scikit-image
!pip install scikit-learn
!pip install numpy
!pip install pandas

In [None]:
import timm
from pprint import pprint
import os
import numpy as np
import torch
import torch.nn as nn
from torchvision.transforms import transforms as T
from skimage.feature import local_binary_pattern
from skimage.color import rgb2gray
from skimage.io import imread
from skimage.transform import resize
from tqdm import tqdm
from PIL import Image
import zipfile
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets
from torchvision.datasets import ImageFolder
from torchvision.transforms import ToTensor
from sklearn.model_selection import train_test_split
from torch.utils.data import Subset
from torch.utils.data import random_split
import shutil
from torch.optim.lr_scheduler import StepLR
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
import numpy as np
import pandas as pd
import imghdr  # For detecting image file types

In [None]:
# Parameters
batch_size = 32
num_epochs = 100

learning_rate = 1e-3
num_classes = 2  # two classes: Real and Fake

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Custom Dataset Preprocessing

### Custom Dataset Definition

In [None]:
# Custom Dataset with LBP preprocessing
class LBPImageDataset(Dataset):
    """
    A custom dataset class for applying Local Binary Patterns (LBP) preprocessing to images.

    Args:
        root (str): Path to the root directory of the dataset.
        transform (callable, optional): Optional transformations to apply to the images.
    """
    def __init__(self, root, transform=None):
        """
        Initializes the LBPImageDataset.

        Args:
            root (str): Root directory for the dataset.
            transform (callable, optional): Transformations to apply to the preprocessed images.
        """
        self.dataset = ImageFolder(root=root)
        self.transform = transform

    def lbp_preprocessing(self, image):
        """
        Applies LBP preprocessing, FFT, and combines the results with the original image.

        Steps:
        1. Resize the image to (384, 384).
        2. Convert to 3-channel RGB if grayscale or remove the alpha channel if present.
        3. Compute the grayscale version of the image.
        4. Compute the Local Binary Pattern (LBP) on the grayscale image.
        5. Compute the FFT of the grayscale image.
        6. Concatenate the original RGB image, LBP, and FFT channels.

        Args:
            image (numpy.ndarray): Input image as a NumPy array.

        Returns:
            numpy.ndarray: Preprocessed image with concatenated RGB, LBP, and FFT channels.
        """
        image = resize(image, (384, 384), anti_aliasing=True)
        if image.ndim == 2:
            image = np.stack([image] * 3, axis=-1)
        elif image.shape[2] == 4:
            image = image[:, :, :3]
        image_rgb = image.astype("float32")

        image_gray = rgb2gray(image_rgb)
        P, R = 8, 1
        lbp = local_binary_pattern(image_gray, P, R, method="uniform")
        fft = np.log(np.abs(np.fft.fft2(image_gray)) + 1)

        lbp_expanded = np.expand_dims(lbp, axis=-1)
        fft_expanded = np.expand_dims(fft, axis=-1)
        concatenated = np.concatenate((image_rgb, lbp_expanded, fft_expanded), axis=-1)
        concatenated = nn.Conv2d(in_channels=5, out_channels=3, kernel_size=1, padding=0)

        return concatenated.astype("float32")

    def __len__(self):
        """
        Returns the total number of samples in the dataset.

        Returns:
            int: Number of samples in the dataset.
        """
        return len(self.dataset)

    def __getitem__(self, idx):
        """
        Retrieves an image and its corresponding label at the specified index,
        applies LBP preprocessing, and optionally applies additional transformations.

        Args:
            idx (int): Index of the sample to retrieve.

        Returns:
            tuple: A tuple containing the preprocessed image and its label.
        """
        image, label = self.dataset[idx]
        image_np = np.array(image)  # Convert PIL image to NumPy
        processed_image = self.lbp_preprocessing(image_np)
        if self.transform:
            processed_image = self.transform(processed_image)
        return processed_image, label

In [None]:
class DoubleLayerGaussianFilter(nn.Module):
    """
    A PyTorch module that applies a double layer Gaussian filter to an input tensor.

    Args:
        channels (int): Number of input channels.
        kernel_size (int, optional): Size of the Gaussian kernel. Default is 3.
        sigma (float, optional): Standard deviation for the Gaussian kernel. Default is 1.0.
    """
    def __init__(self, channels, kernel_size=3, sigma=1.0):
        """
        Initializes the DoubleLayerGaussianFilter.

        Args:
            channels (int): Number of input channels.
            kernel_size (int, optional): Kernel size for the Gaussian filter. Default is 3.
            sigma (float, optional): Standard deviation of the Gaussian filter. Default is 1.0.
        """
        super(DoubleLayerGaussianFilter, self).__init__()
        self.channels = channels
        self.kernel_size = kernel_size
        self.sigma = sigma
        self.gaussian_filter = self.create_gaussian_filter()

    def create_gaussian_filter(self):
        """
        Creates a Gaussian filter kernel.

        Returns:
            nn.Parameter: A normalized Gaussian kernel with shape
                          (channels, 1, kernel_size, kernel_size).
        """
        k = self.kernel_size // 2
        x = torch.arange(-k, k + 1, dtype=torch.float32)
        y = torch.arange(-k, k + 1, dtype=torch.float32)
        xx, yy = torch.meshgrid(x, y)
        kernel = torch.exp(-(xx**2 + yy**2) / (2 * self.sigma**2))
        kernel /= kernel.sum()  # Normalize kernel
        kernel = kernel.expand(self.channels, 1, -1, -1)
        return nn.Parameter(kernel, requires_grad=False)

    def forward(self, x):
        """
        Applies the double layer Gaussian filter to the input tensor.

        Args:
            x (torch.Tensor): Input tensor of shape (batch_size, channels, height, width).

        Returns:
            torch.Tensor: Filtered tensor after applying the Gaussian filter twice.
        """
        padding = self.kernel_size // 2
        x = nn.functional.conv2d(x, self.gaussian_filter, padding=padding, groups=self.channels)
        x = nn.functional.conv2d(x, self.gaussian_filter, padding=padding, groups=self.channels)
        return x


class AddGaussianNoise:
    """
    A transformation class that adds Gaussian noise to a tensor.

    Args:
        mean (float, optional): Mean of the Gaussian noise. Default is 0.
        std (float, optional): Standard deviation of the Gaussian noise. Default is 0.1.
    """
    def __init__(self, mean=0., std=0.1):
        """
        Initializes the AddGaussianNoise transformation.

        Args:
            mean (float, optional): Mean of the Gaussian noise. Default is 0.
            std (float, optional): Standard deviation of the Gaussian noise. Default is 0.1.
        """
        self.mean = mean
        self.std = std

    def __call__(self, tensor):
        """
        Adds Gaussian noise to the input tensor.

        Args:
            tensor (torch.Tensor): Input tensor.

        Returns:
            torch.Tensor: Tensor with added Gaussian noise.
        """
        return tensor + torch.randn(tensor.size()) * self.std + self.mean

In [None]:
class ApplyDoubleGaussianFilter:
    """
    A transformation class that applies a double-layer Gaussian filter to a tensor.

    Args:
        kernel_size (int or float, optional): Size of the Gaussian kernel. Default is 3.0.
        sigma (float, optional): Standard deviation for the Gaussian kernel. Default is 1.0.
    """
    def __init__(self, kernel_size=3.0, sigma=1.0):
        """
        Initializes the ApplyDoubleGaussianFilter transformation.

        Args:
            kernel_size (int or float, optional): Kernel size for the Gaussian filter. Default is 3.0.
            sigma (float, optional): Standard deviation of the Gaussian filter. Default is 1.0.
        """
        self.filter = DoubleLayerGaussianFilter(channels=3, kernel_size=kernel_size, sigma=sigma)

    def __call__(self, tensor):
        """
        Applies the double-layer Gaussian filter to the input tensor.

        Args:
            tensor (torch.Tensor): Input tensor of shape (channels, height, width).

        Returns:
            torch.Tensor: Tensor after applying the Gaussian filter.
        """
        # Add batch dimension for processing
        tensor = tensor.unsqueeze(0)
        # Apply filter
        filtered = self.filter(tensor)
        # Remove batch dimension
        return filtered.squeeze(0)


class AddCustomNoise:
    """
    A transformation class that adds custom Gaussian noise to a tensor.

    Args:
        noise_level (float, optional): Standard deviation of the Gaussian noise. Default is 0.1.
    """
    def __init__(self, noise_level=0.1):
        """
        Initializes the AddCustomNoise transformation.

        Args:
            noise_level (float, optional): Level of Gaussian noise to add. Default is 0.1.
        """
        self.noise_level = noise_level

    def __call__(self, tensor):
        """
        Adds Gaussian noise to the input tensor.

        Args:
            tensor (torch.Tensor): Input tensor.

        Returns:
            torch.Tensor: Tensor with added Gaussian noise.
        """
        noise = torch.randn_like(tensor) * self.noise_level
        return tensor + noise

### Data Augmentation and Transforms

In [None]:
# Transforms
transform = T.Compose([
    T.ToTensor(),
    T.Resize((224, 224)),
    T.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
    T.RandomApply([
        T.RandomHorizontalFlip(p=0.5),
        T.RandomVerticalFlip(p=0.5),
        # T.RandomRotation(degrees=30),
        T.RandomAffine(degrees=0, scale=(0.8, 1.2)),
    ], p=0.3),
    T.RandomApply([
        ApplyDoubleGaussianFilter(kernel_size=3, sigma=1.0),
        AddCustomNoise(noise_level=0.1)
    ], p=0.3)
])

## Data Preparation and Model Definition

### Data Preparation

In [None]:
import zipfile
import os

# Define the zip file path and the destination directory
zip_file_path = 'corrected_dataset.zip'
extract_dir = 'custom_dataset'

# Create the directory if it doesn't exist
os.makedirs(extract_dir, exist_ok=True)

# Unzip the file
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extract_dir)

print(f"File extracted to {extract_dir}")

In [None]:
full_train_dataset = datasets.ImageFolder(root="custom_dataset/train", transform=transform)

train_size = int(0.8 * len(full_train_dataset))
val_size = len(full_train_dataset) - train_size
train_subset, val_subset = random_split(full_train_dataset, [train_size, val_size])

test_dataset = datasets.ImageFolder(root="custom_dataset/test", transform=transform)

batch_size = 32

train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

### Model Definition

In [None]:
# Model
model = timm.create_model('dpn68b.mx_in1k', pretrained=True, num_classes=num_classes)
model = model.to(device)

## Model Training and Validation

### Early stopping implementation

In [None]:
class EarlyStopping:
    """
    Implements early stopping to terminate training when validation loss stops improving.

    Args:
        patience (int, optional): Number of epochs to wait after the last improvement in validation loss.
            Training stops if no improvement is observed for 'patience' consecutive epochs. Default is 3.
        delta (float, optional): Minimum change in validation loss to qualify as an improvement. Default is 0.0.
    """
    def __init__(self, patience=5, delta=0.0):
        """
        Initializes the EarlyStopping mechanism.

        Args:
            patience (int, optional): Number of epochs to wait before stopping. Default is 3.
            delta (float, optional): Minimum improvement in validation loss to reset the patience counter. Default is 0.0.
        """
        self.patience = patience
        self.delta = delta
        self.best_loss = None
        self.counter = 0
        self.early_stop = False

    def __call__(self, val_loss, model):
        """
        Checks if training should stop based on validation loss.

        Args:
            val_loss (float): Current validation loss.
            model (torch.nn.Module): The model being trained. Typically used for saving the best model.

        Updates:
            self.early_stop (bool): Set to True if training should stop.
        """
        if self.best_loss is None:
            self.best_loss = val_loss
        elif val_loss > self.best_loss - self.delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.counter = 0

### Loss and Optimizer

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
scheduler = StepLR(optimizer, step_size=3, gamma=1e-4)  # Reduce LR by 0.1 every 5 epochs

### Train the Model

In [None]:
# Training Loop
def train_model(model, train_loader, val_loader, num_epochs, device='cuda'):
    best_val_acc = 0.0
    early_stopping = EarlyStopping(patience=5, delta=0.1)

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        correct = 0
        total = 0

        for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
            images, labels = images.to(device), labels.to(device)
            labels = labels.long()

            # Ensure tensors and model are on the same device
            assert images.device == labels.device == next(model.parameters()).device, \
                "Tensors and model are not on the same device."

            optimizer.zero_grad()
            features = model.forward_features(images)  # Extract features
            outputs = model.forward_head(features, pre_logits=False)  # Get logits
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        train_acc = correct / total
        print(f"Epoch {epoch+1}/{num_epochs} - Train Loss: {train_loss/len(train_loader):.3f}, Train Acc: {train_acc:.3f}")

        # Validation
        val_acc, precision, recall, f1, cm, all_labels, all_preds, results_df = evaluate_model(model, val_loader, device)
        val_loss = train_loss / len(val_loader)  # Use this for early stopping monitoring

        print(f"Validation Acc: {val_acc:.3f}, F1: {f1:.3f}")
        print("Confusion Matrix:")
        print(cm)


        # Save the best model based on accuracy
        if val_acc > best_val_acc:
            best_val_acc = val_acc

            # Save the results CSV
            results_csv_path = "Validation_Results.csv"
            results_df.to_csv(results_csv_path, index=False)
            print(f"Validation results saved to {results_csv_path}")

            torch.save(model.state_dict(), "dpn68b.mx_in1k.pth")
            print("Best model saved!")

        # Step the scheduler
        scheduler.step()
        print(f"Learning Rate after Epoch {epoch+1}: {scheduler.get_last_lr()}")

        # Check for early stopping
        early_stopping(val_loss, model)
        if early_stopping.early_stop:
            print("Early stopping triggered. Training stopped.")
            break


# Define the evaluation function
def evaluate_model(model, data_loader, device="cuda"):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for images, labels in data_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)  # Get the class predictions
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # Save results to CSV
    results_df = pd.DataFrame({
        "True Labels": all_labels,
        "Predicted Labels": all_preds
    })

    # Calculate metrics
    accuracy = (np.array(all_preds) == np.array(all_labels)).mean()
    precision = precision_score(all_labels, all_preds, average="binary", zero_division=1)
    recall = recall_score(all_labels, all_preds, average="binary")
    f1 = f1_score(all_labels, all_preds, average="binary")
    cm = confusion_matrix(all_labels, all_preds)

    return accuracy, precision, recall, f1, cm, all_labels, all_preds, results_df


In [None]:
# Training
train_model(model, train_loader, val_loader, num_epochs, device)

### Validate the Model and Generate Metrics

In [None]:
# Testing
test_acc, test_precision, test_recall, test_f1, test_cm, test_labels, test_preds, result_df = evaluate_model(model, test_loader)

# Print metrics
print(f"Test Accuracy: {test_acc:.3f}")
print(f"Precision: {test_precision:.3f}")
print(f"Recall: {test_recall:.3f}")
print(f"F1 Score: {test_f1:.3f}")
print("Confusion Matrix:")
print(test_cm)

In [None]:
from collections import Counter

# Count the occurrences of each label in true and predicted labels
true_label_counts = Counter(test_labels)
pred_label_counts = Counter(test_preds)

print("\nTrue Label Counts:")
print(true_label_counts)  # Outputs a dictionary, e.g., Counter({0: 150, 1: 150})

print("\nPredicted Label Counts:")
print(pred_label_counts)  # Outputs a dictionary, e.g., Counter({0: 160, 1: 140})

## Model Testing on New Dataset

### Custom Dataset Definition

In [None]:
# Custom Dataset for loading test images without labels
class CustomTestDataset(Dataset):
    """
    Custom dataset class for loading test images without labels.

    Args:
        test_dir (str): Path to the directory containing test images.
        transform (callable, optional): Transformations to be applied to each image. Default is None.
    """
    def __init__(self, test_dir, transform=None):
        """
        Initializes the dataset with the directory path and transformations.

        Args:
            test_dir (str): Path to the directory containing test images.
            transform (callable, optional): Transformations to be applied to each image. Default is None.
        """
        self.test_dir = test_dir
        self.transform = transform
        # Get all the files and filter only PNG files by checking file type
        self.image_paths = [
            os.path.join(test_dir, filename)
            for filename in os.listdir(test_dir)
            if imghdr.what(os.path.join(test_dir, filename)) == 'png'
        ]

    def __len__(self):
        """
        Returns the total number of images in the dataset.

        Returns:
            int: Number of images in the dataset.
        """
        return len(self.image_paths)

    def __getitem__(self, idx):
        """
        Retrieves an image and its file path at the specified index.

        Args:
            idx (int): Index of the image to retrieve.

        Returns:
            tuple: A tuple containing:
                - image (PIL.Image or torch.Tensor): The processed image.
                - image_path (str): The file path of the image.
        """
        image_path = self.image_paths[idx]
        image = Image.open(image_path).convert('RGB')  # Ensure image is in RGB mode
        if self.transform:
            image = self.transform(image)
        return image, image_path


# Define transformations (adjust this as per your model's requirements)
transform = T.Compose([
    T.Resize((224, 224)),
    T.ToTensor(),
    T.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])

### Model Definition

In [None]:
# Load the model
model = timm.create_model('dpn68b.mx_in1k', pretrained=True, num_classes=num_classes)
model.load_state_dict(torch.load("dpn68b.mx_in1k.pth"))
model = model.to(device)
model.eval()  # Set model to evaluation mode

### Load Test Dataset

In [None]:
# Load the test dataset
test_dir = '/path/to/new_test_dataset'  # define the path for the new dataset on which you need the model to provide predictions
test_dataset = CustomTestDataset(test_dir=test_dir, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

### Evaluate the Model

In [None]:
# Function to run inference and save results to CSV
def run_inference(model, data_loader, device="cuda"):
    all_preds = []
    all_paths = []

    with torch.no_grad():
        for images, paths in data_loader:
            images = images.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)  # Get the class predictions

            # Collect predictions and image paths
            all_preds.extend(preds.cpu().numpy())
            all_paths.extend(paths)

    # Save results to CSV
    test_results_df = pd.DataFrame({
        "Image Path": all_paths,
        "Prediction": all_preds
    })

    # Save to CSV file
    test_results_df.to_csv('test_predictions_dpn.csv', index=False)
    print(f"Predictions saved to 'test_predictions.csv'.")

# Run inference and save predictions
run_inference(model, test_loader)