In [229]:
import torch
import torch.nn as nn
import math
import torch.utils.model_zoo as model_zoo
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, Dataset
import os
import pandas as pd
from PIL import Image
import torch.optim as optim
from torch.optim import AdamW
from tqdm import tqdm
import numpy as np 
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
import torchvision.transforms as T
# from pytorchcv.model_provider import get_model

In [230]:
# pip install pytorchcv

In [231]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

In [232]:
class PatchExtraction(nn.Module):
    def __init__(self):
        super(PatchExtraction, self).__init__()
        # First separable convolution (depthwise + pointwise)
        self.depthwise_conv1 = nn.Conv2d(320, 320, kernel_size=4, stride=4, padding=1, groups=320)
        self.pointwise_conv1 = nn.Conv2d(320, 256, kernel_size=1, stride=1, padding=0)
        
        # Second separable convolution (depthwise + pointwise)
        self.depthwise_conv2 = nn.Conv2d(256, 256, kernel_size=2, stride=2, padding=0, groups=256)
        self.pointwise_conv2 = nn.Conv2d(256, 256, kernel_size=1, stride=1, padding=0)

        # Normal Conv (used directly)
        self.conv3 = nn.Conv2d(256, 256, kernel_size=1, stride=1, padding=0)

    def forward(self, x):
        # First separable convolution
        x = F.relu(self.depthwise_conv1(x))
        x = F.relu(self.pointwise_conv1(x))

        # Second separable convolution
        x = F.relu(self.depthwise_conv2(x))
        x = F.relu(self.pointwise_conv2(x))

        # Normal convolution
        x = F.relu(self.conv3(x))
        
        return x

In [233]:
class SelfAttention(nn.Module):
    def __init__(self, embed_size, num_heads=1):
        super(SelfAttention, self).__init__()
        self.attention = nn.MultiheadAttention(embed_dim=embed_size, num_heads=num_heads)

    def forward(self, x):
        # Attention expects input of shape [sequence_length, batch_size, embed_dim]
        x = x.unsqueeze(0)  # Adding sequence length as 1
        attn_output, _ = self.attention(x, x, x)
        return attn_output.squeeze(0)

In [258]:
# directory_path = '/kaggle/working'

# # Method 1: Using os to remove files
# for filename in os.listdir(directory_path):
#     file_path = os.path.join(directory_path, filename)
#     try:
#         if os.path.isfile(file_path):
#             os.unlink(file_path)  # Remove the file
#           # Remove the directory and all its contents
#     except Exception as e:
#         print(f'Failed to delete {file_path}. Reason: {e}')

In [256]:
dummy_input = torch.randn(1, 3, 224, 224)
x = dummy_input
for layer in backbone.features:
    x = layer(x)  # Pass the input through the layer
    print(f"{layer.__class__.__name__}: {x.shape}")

Conv2dNormActivation: torch.Size([1, 32, 112, 112])
InvertedResidual: torch.Size([1, 16, 112, 112])
InvertedResidual: torch.Size([1, 24, 56, 56])
InvertedResidual: torch.Size([1, 24, 56, 56])
InvertedResidual: torch.Size([1, 32, 28, 28])
InvertedResidual: torch.Size([1, 32, 28, 28])
InvertedResidual: torch.Size([1, 32, 28, 28])
InvertedResidual: torch.Size([1, 64, 14, 14])
InvertedResidual: torch.Size([1, 64, 14, 14])
InvertedResidual: torch.Size([1, 64, 14, 14])
InvertedResidual: torch.Size([1, 64, 14, 14])
InvertedResidual: torch.Size([1, 96, 14, 14])
InvertedResidual: torch.Size([1, 96, 14, 14])
InvertedResidual: torch.Size([1, 96, 14, 14])
InvertedResidual: torch.Size([1, 160, 7, 7])
InvertedResidual: torch.Size([1, 160, 7, 7])
InvertedResidual: torch.Size([1, 160, 7, 7])
InvertedResidual: torch.Size([1, 320, 7, 7])
Conv2dNormActivation: torch.Size([1, 1280, 7, 7])


In [259]:
# backbone = models.mobilenet_v2(weights=models.MobileNet_V2_Weights.DEFAULT)

# # Keep only the first layer
# first_layer = nn.Sequential(*list(backbone.features.children())[:-1])

# # Create a dummy input tensor
# dummy_input = torch.randn(1, 3, 224, 224)  # Batch size of 1, 3 channels, 224x224 size

# # Pass the dummy input through the first layer
# output = first_layer(dummy_input)

# # Print the output size
# print("Output size:", output.shape)

Output size: torch.Size([1, 320, 7, 7])


In [236]:
class PattLite(nn.Module):
    def __init__(self):
        super(PattLite, self).__init__()
        
        # Preprocessing: resizing and augmentation
#         self.transform = transforms.Compose([
#             transforms.Resize((224, 224)),

#             transforms.ToTensor(),
#         ])
        
        # Backbone (MobileNet with last 29 layers removed)
        self.backbone = models.mobilenet_v2(weights=models.MobileNet_V2_Weights.DEFAULT)
        self.backbone = nn.Sequential(*list(self.backbone.features.children())[:-1])
#         for param in self.backbone.parameters():
#             param.requires_grad = False

        # Patch extraction, attention, and global average pooling layers
        self.patch_extraction = PatchExtraction()
        self.global_avg_pool = nn.AdaptiveAvgPool2d(1)
#         self.dropout = nn.Dropout(0.1)

        # Pre-classification layer
        self.pre_classification = nn.Sequential(
            nn.Linear(256, 32),
            nn.ReLU(),
            nn.BatchNorm1d(32)
        )

        # Self-attention
        self.self_attention = SelfAttention(embed_size=32)

        # Final classification layer
        self.classifier = nn.Linear(32, 7)

    def forward(self, x):
        # Apply transformations
#         x = self.transform(x)
        
        # Backbone (MobileNetV2)
        x = self.backbone(x)
        
        # Patch extraction
        x = self.patch_extraction(x)
        
        # Global average pooling
        x = self.global_avg_pool(x)
        x = x.view(x.size(0), -1)  # Flatten (N, 256)
        
        # Dropout before final classification
#         x = self.dropout(x)
        
        # Pre-classification layer
        x = self.pre_classification(x)
        
        # Self-attention (expects [seq_len, batch_size, embed_dim])
#         x = self.self_attention(x.unsqueeze(0)).squeeze(0)
        x = self.self_attention(x).squeeze(0)# Apply self-attention
        
        # Final classification layer
        x = self.classifier(x)
        
        return x

In [237]:
# pip install --upgrade torchvision

In [238]:
model = PattLite()

In [239]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
scheduler = ReduceLROnPlateau(optimizer, 'max', patience=3, min_lr=1e-6)

In [240]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [241]:
model = model.to(device)

In [242]:
class CustomImageDataset(Dataset):
    def __init__(self, image_dir, csv_file, transform=None):
        self.image_dir = image_dir
        self.transform = transform

        # Read the CSV file
        self.data_frame = pd.read_csv(csv_file)

        # Ensure the CSV file has columns 'filename' and 'class'
        assert 'image' in self.data_frame.columns
        assert 'label' in self.data_frame.columns

    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, idx):
        # Get the filename and class label
        img_name = self.data_frame.iloc[idx, 0]  # Get the filename from the CSV
        class_label = self.data_frame.iloc[idx, 1] - 1  # Get the class label from the CSV

        # Construct the path to the image based on its class label
        class_folder = os.path.join(self.image_dir, str(class_label+1))  # Convert class label to string
        img_path = os.path.join(class_folder, img_name)

        # Load the image
        image = Image.open(img_path).convert('RGB')

        # Apply transformations
        if self.transform:
            image = self.transform(image)

        return image, class_label

In [243]:
image_directory = r"/kaggle/input/raf-db-dataset/DATASET/train"  # Directory containing class subfolders
csv_file_path = r"/kaggle/input/raf-db-dataset/train_labels.csv"

In [244]:
train_dataset = CustomImageDataset(image_dir=image_directory, csv_file=csv_file_path, transform=transform)

In [245]:
train_loader = DataLoader(train_dataset, batch_size=8, num_workers=4, pin_memory=True, shuffle=True)

In [246]:
test_image_directory = r"/kaggle/input/raf-db-dataset/DATASET/test"
test_csv_file_path = r"/kaggle/input/raf-db-dataset/test_labels.csv"

In [247]:
test_dataset = CustomImageDataset(test_image_directory, test_csv_file_path, transform)

In [248]:
test_loader = DataLoader(test_dataset, batch_size=8, num_workers=4, pin_memory=True, shuffle=True)

In [251]:
model.load_state_dict(torch.load(r"/kaggle/working/best_model_epoch15_acc62.25554106910039.pth"))

  model.load_state_dict(torch.load(r"/kaggle/working/best_model_epoch15_acc62.25554106910039.pth"))


<All keys matched successfully>

In [261]:
def train_model(model, train_loader, test_loader, num_epochs):
#     best_val_acc = 0
#     early_stopping_counter = 0
    best_acc = 0
    for epoch in range(1, num_epochs+1):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        # Training loop
        for inputs, labels in tqdm(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
        
        # Calculate training accuracy
        train_acc = 100. * correct / total
        
        # Validation loop
        model.eval()
        val_correct = 0
        val_total = 0
        
        with torch.no_grad():
            for inputs, labels in tqdm(test_loader):
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                _, predicted = outputs.max(1)
                val_total += labels.size(0)
                val_correct += predicted.eq(labels).sum().item()
        
        val_acc = 100. * val_correct / val_total
        
        print(f'Epoch {epoch}/{num_epochs}, Loss: {running_loss:.4f}, Train Acc: {train_acc:.2f}%, Val Acc: {val_acc:.2f}%')
        
        # Scheduler step
        scheduler.step(val_acc)
        
        # Early stopping
        if val_acc > best_acc:
            best_acc = val_acc
#             early_stopping_counter = 0
            torch.save(model.state_dict(), f"best_model_epoch{epoch}_acc{val_acc}.pth")
            print("Model Saved")
#         else:
#             early_stopping_counter += 1
#             if early_stopping_counter > patience:
#                 print("Early stopping!")
#                 break


In [262]:
train_model(model, train_loader, test_loader, 20)

100%|██████████| 1534/1534 [00:32<00:00, 46.82it/s]
100%|██████████| 384/384 [00:07<00:00, 51.58it/s]


Epoch 1/20, Loss: 1328.5245, Train Acc: 70.00%, Val Acc: 62.68%
Model Saved


100%|██████████| 1534/1534 [00:32<00:00, 47.84it/s]
100%|██████████| 384/384 [00:06<00:00, 63.17it/s]


Epoch 2/20, Loss: 1344.6403, Train Acc: 69.53%, Val Acc: 63.43%
Model Saved


100%|██████████| 1534/1534 [00:32<00:00, 46.84it/s]
100%|██████████| 384/384 [00:06<00:00, 63.36it/s]


Epoch 3/20, Loss: 1323.4233, Train Acc: 70.08%, Val Acc: 61.28%


100%|██████████| 1534/1534 [00:32<00:00, 47.34it/s]
100%|██████████| 384/384 [00:06<00:00, 57.71it/s]


Epoch 4/20, Loss: 1341.3119, Train Acc: 69.46%, Val Acc: 61.99%


100%|██████████| 1534/1534 [00:32<00:00, 47.14it/s]
100%|██████████| 384/384 [00:06<00:00, 61.73it/s]


Epoch 5/20, Loss: 1337.2899, Train Acc: 69.96%, Val Acc: 61.60%


100%|██████████| 1534/1534 [00:32<00:00, 47.78it/s]
100%|██████████| 384/384 [00:06<00:00, 58.59it/s]


Epoch 6/20, Loss: 1312.6264, Train Acc: 70.05%, Val Acc: 61.47%


100%|██████████| 1534/1534 [00:32<00:00, 46.89it/s]
100%|██████████| 384/384 [00:05<00:00, 64.42it/s]


Epoch 7/20, Loss: 1334.8165, Train Acc: 69.68%, Val Acc: 62.94%


100%|██████████| 1534/1534 [00:32<00:00, 47.23it/s]
100%|██████████| 384/384 [00:06<00:00, 63.65it/s]


Epoch 8/20, Loss: 1327.7697, Train Acc: 70.08%, Val Acc: 61.51%


100%|██████████| 1534/1534 [00:32<00:00, 47.88it/s]
100%|██████████| 384/384 [00:05<00:00, 64.67it/s]


Epoch 9/20, Loss: 1333.1337, Train Acc: 69.73%, Val Acc: 61.77%


100%|██████████| 1534/1534 [00:32<00:00, 47.74it/s]
100%|██████████| 384/384 [00:06<00:00, 63.25it/s]


Epoch 10/20, Loss: 1340.2644, Train Acc: 69.80%, Val Acc: 61.57%


100%|██████████| 1534/1534 [00:32<00:00, 47.40it/s]
100%|██████████| 384/384 [00:06<00:00, 57.93it/s]


Epoch 11/20, Loss: 1338.5828, Train Acc: 69.33%, Val Acc: 61.96%


100%|██████████| 1534/1534 [00:32<00:00, 47.80it/s]
100%|██████████| 384/384 [00:06<00:00, 61.96it/s]


Epoch 12/20, Loss: 1324.6903, Train Acc: 70.02%, Val Acc: 62.16%


100%|██████████| 1534/1534 [00:33<00:00, 46.34it/s]
100%|██████████| 384/384 [00:06<00:00, 62.16it/s]


Epoch 13/20, Loss: 1336.6331, Train Acc: 69.60%, Val Acc: 62.09%


100%|██████████| 1534/1534 [00:32<00:00, 47.58it/s]
100%|██████████| 384/384 [00:06<00:00, 60.51it/s]


Epoch 14/20, Loss: 1341.3109, Train Acc: 69.85%, Val Acc: 62.74%


100%|██████████| 1534/1534 [00:32<00:00, 47.52it/s]
100%|██████████| 384/384 [00:05<00:00, 65.64it/s]


Epoch 15/20, Loss: 1340.4976, Train Acc: 69.98%, Val Acc: 62.42%


100%|██████████| 1534/1534 [00:32<00:00, 47.91it/s]
100%|██████████| 384/384 [00:06<00:00, 58.74it/s]


Epoch 16/20, Loss: 1326.4616, Train Acc: 69.99%, Val Acc: 62.26%


100%|██████████| 1534/1534 [00:31<00:00, 48.47it/s]
100%|██████████| 384/384 [00:06<00:00, 63.92it/s]


Epoch 17/20, Loss: 1322.6271, Train Acc: 69.73%, Val Acc: 61.34%


100%|██████████| 1534/1534 [00:31<00:00, 48.54it/s]
100%|██████████| 384/384 [00:05<00:00, 64.39it/s]


Epoch 18/20, Loss: 1333.6229, Train Acc: 70.23%, Val Acc: 61.21%


100%|██████████| 1534/1534 [00:31<00:00, 48.53it/s]
100%|██████████| 384/384 [00:06<00:00, 62.47it/s]


Epoch 19/20, Loss: 1336.5736, Train Acc: 69.86%, Val Acc: 61.77%


100%|██████████| 1534/1534 [00:31<00:00, 48.09it/s]
100%|██████████| 384/384 [00:05<00:00, 64.90it/s]

Epoch 20/20, Loss: 1318.6183, Train Acc: 70.08%, Val Acc: 61.77%



