In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    # print(dirname)
    for filename in filenames:
        if filename.endswith(".csv"):
            print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/cards-image-datasetclassification/cards.csv


In [2]:
import os
from PIL import Image

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
from torch.utils.data import Dataset
import torchvision
from torchvision import transforms

In [3]:
test_path = "/kaggle/input/cards-image-datasetclassification/valid"
train_path = "/kaggle/input/cards-image-datasetclassification/train"
csv_path = "/kaggle/input/cards-image-datasetclassification/cards.csv"
folder_path = "/kaggle/input/cards-image-datasetclassification"




# These are calculated from below code to calcuate the mean and std on the train dataset
mean = [0.7786, 0.7310, 0.7048]
std = [0.3061, 0.3360, 0.3424]

IMAGE_SIZE = (224,224)

lr = 0.001



device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device : ",device)

Device :  cuda


# Create the New CNN

### Calculate the Mean & STD of the train dataset

In [4]:
# Create the data aug.

transforms_cal = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor()
])

In [5]:
# Create the custom Image loader class.

class CustomImageDataset(Dataset):

    def __init__(self, csv_file, root_dir, mtype="train", transforms=None):

        self.annotations = pd.read_csv(csv_file)
        self.annotations = self.annotations[self.annotations["data set"]==mtype]
        self.annotations = self.annotations[self.annotations["filepaths"].apply(lambda x: True if x.endswith((".png",".jpg")) else False)]

        self.root_dir = root_dir
        self.transforms = transforms

        self.idx_to_label = {i:x for i, x in enumerate(self.annotations["labels"].unique())}
        self.label_to_idx = {x:i for i, x in self.idx_to_label.items()}

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):

        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join(self.root_dir, self.annotations.iloc[idx, 1])
        img = Image.open(img_name).convert("RGB")

        label_str = self.annotations.iloc[idx, 2]
        label = self.label_to_idx[label_str]

        if self.transforms:
            img = self.transforms(img)

        return img, label

In [6]:
train_dataset_cal = CustomImageDataset(csv_path, folder_path,"train", transforms_cal)
train_dataloader_cal = torch.utils.data.DataLoader(train_dataset_cal, batch_size=32, shuffle=True)

In [7]:
# Calculate the Normalization value

def get_mean_and_std(dataloader):
    channels_sum, channels_squared_sum, num_batches = 0, 0, 0
    for data, _ in dataloader:
        # Shape of data: (batch_size, channels, height, width)
        # Normalize to [0, 1] if not already
        if data.max() > 1.0:
            data = data / 255.0

        channels_sum += torch.mean(data, dim=[0, 2, 3])
        channels_squared_sum += torch.mean(data**2, dim=[0, 2, 3])
        num_batches += 1

    mean = channels_sum / num_batches
    std = (channels_squared_sum / num_batches - mean**2)**0.5
    return mean, std

In [8]:
mean, std = get_mean_and_std(train_dataloader_cal)
print("mean = ",mean)
print("std = ",std)

mean =  tensor([0.7786, 0.7310, 0.7049])
std =  tensor([0.3062, 0.3360, 0.3423])


### Start the CNN

In [9]:
train_transforms = transforms.Compose([
    transforms.Resize(IMAGE_SIZE),
    transforms.RandomHorizontalFlip(), 
    transforms.RandomResizedCrop(224), 
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

val_transforms = transforms.Compose([
    transforms.Resize(IMAGE_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

In [10]:
# Create the train and test data set
train_dataset = CustomImageDataset(csv_path, folder_path,"train", train_transforms)
test_dataset = CustomImageDataset(csv_path, folder_path,"test", val_transforms)
val_dataset = CustomImageDataset(csv_path, folder_path,"valid", val_transforms)

In [11]:
# Create the dataloader
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=True)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=32, shuffle=True)

In [12]:
# Define the CNN Architecutre

class CNN(nn.Module):

    def __init__(self):
        super(CNN, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, padding=1, stride=1)
        # output size = [(inputsize - kernel) + 2 * padding  /  stride ] + 1 = 224-3 + 2   + 1 == 224
        self.bc1 = nn.BatchNorm2d(16)
        self.relu1 = nn.ReLU()
        self.maxpool1 = nn.MaxPool2d(kernel_size=2)
        # output size = [(224 - 2 + 2 * 0)/2] + 1 = 222/2 + 1 = 111 + 1 =  112

        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1, stride=1)
        # output size = [(inputsize - kernel) + 2 * padding  /  stride ] + 1 = 112-3 + 2   + 1 == 112
        self.bc2 = nn.BatchNorm2d(32)
        self.relu2 = nn.ReLU()
        self.maxpool2 = nn.MaxPool2d(kernel_size=2)
        # output size = [(112 - 2 + 2 * 0)/2] + 1 = 110/2 + 1 = 55 + 1 =  56

        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1, stride=1)
        # output size = [(inputsize - kernel) + 2 * padding  /  stride ] + 1 = 56-3 + 2   + 1 == 56
        self.bc3 = nn.BatchNorm2d(64)
        self.relu3 = nn.ReLU()
        self.maxpool3 = nn.MaxPool2d(kernel_size=2)
        # output size = [(56 - 2 + 2 * 0)/2] + 1 = 54/2 + 1 = 27 + 1 =  28

        self.flt = nn.Flatten()

        self.fc1 = nn.Linear(28*28*64, 10000)
        self.relu4 = nn.ReLU()
        # self.drp = nn.Dropout(0.3)
        self.fc2 = nn.Linear(10000, 53)


    def forward(self, x):

        out = self.conv1(x)
        out = self.bc1(out)
        out = self.relu1(out)
        out = self.maxpool1(out)
        
        out = self.conv2(out)
        out = self.bc2(out)
        out = self.relu2(out)
        out = self.maxpool2(out)
        
        out = self.conv3(out)
        out = self.bc3(out)
        out = self.relu3(out)
        out = self.maxpool3(out)

        out = self.flt(out)

        out = self.fc1(out)
        out = self.relu4(out)
        # out = self.drp(out)
        out = self.fc2(out)

        return out
        

        

In [13]:
# Load the model
model = CNN().to(device)

In [14]:
# Load the optimizer and criterion

criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(model.parameters(), lr = lr)

In [15]:
# Start the model training

epochs = 20

for epoch in range(epochs):
    running_loss = 0.0
    for images, labels in train_dataloader:
        images = images.float().to(device)
        labels = labels.long().to(device)

        optimizer.zero_grad()

        output = model(images)

        loss = criterion(output, labels)

        running_loss += loss.item()

        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch+1}/{epochs}, Loss : {running_loss / len(train_dataloader):.3f}")

Epoch 1/20, Loss : 10.451
Epoch 2/20, Loss : 3.372
Epoch 3/20, Loss : 3.245
Epoch 4/20, Loss : 3.160
Epoch 5/20, Loss : 3.126
Epoch 6/20, Loss : 3.115
Epoch 7/20, Loss : 3.089
Epoch 8/20, Loss : 3.071
Epoch 9/20, Loss : 3.075
Epoch 10/20, Loss : 3.061
Epoch 11/20, Loss : 3.035
Epoch 12/20, Loss : 3.017
Epoch 13/20, Loss : 2.990
Epoch 14/20, Loss : 2.990
Epoch 15/20, Loss : 2.975
Epoch 16/20, Loss : 2.967
Epoch 17/20, Loss : 2.953
Epoch 18/20, Loss : 2.938
Epoch 19/20, Loss : 2.908
Epoch 20/20, Loss : 2.893


In [16]:
# Test the accuracy

def test_accuracy(test_dataloader):

    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in test_dataloader:
            images = images.float().to(device)
            labels = labels.long().to(device)
            output = model(images)
            _, probabilities = torch.max(output.data, 1)
            total += labels.size(0)
            correct += (probabilities == labels).sum().item()

        print(f"Accuracy : {correct / total * 100 :.3f}")

In [17]:
test_accuracy(test_dataloader)

Accuracy : 26.038


#### Transfer Learning

In [18]:
# Create the custom Image loader class.

class CustomImageDataset(Dataset):

    def __init__(self, csv_file, root_dir, mtype="train", transforms=None):

        self.annotations = pd.read_csv(csv_file)
        self.annotations = self.annotations[self.annotations["data set"]==mtype]
        self.annotations = self.annotations[self.annotations["filepaths"].apply(lambda x: True if x.endswith((".png",".jpg")) else False)]

        self.root_dir = root_dir
        self.transforms = transforms

        self.idx_to_label = {i:x for i, x in enumerate(self.annotations["labels"].unique())}
        self.label_to_idx = {x:i for i, x in self.idx_to_label.items()}

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):

        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join(self.root_dir, self.annotations.iloc[idx, 1])
        img = Image.open(img_name).convert("RGB")

        label_str = self.annotations.iloc[idx, 2]
        label = self.label_to_idx[label_str]

        if self.transforms:
            img = self.transforms(img)

        return img, label

In [19]:
train_transforms = transforms.Compose([
    transforms.Resize(IMAGE_SIZE),
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

val_transforms = transforms.Compose([
    transforms.Resize(IMAGE_SIZE),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [20]:
# Create the train and test data set
train_dataset = CustomImageDataset(csv_path, folder_path,"train", train_transforms)
test_dataset = CustomImageDataset(csv_path, folder_path,"test", val_transforms)
val_dataset = CustomImageDataset(csv_path, folder_path,"valid", val_transforms)

In [21]:
# Create the dataloader
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=True)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=32, shuffle=True)

In [22]:
model_ft = torchvision.models.efficientnet_b0(weights=torchvision.models.EfficientNet_B0_Weights.IMAGENET1K_V1)

Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth
100%|██████████| 20.5M/20.5M [00:00<00:00, 151MB/s]


In [23]:
# print(model_ft)

In [24]:
in_ftrs = model_ft.classifier[1].in_features
model_ft.classifier[1] = nn.Linear(in_ftrs, 53)

In [25]:
# print(model_ft)

In [26]:
# --- 5. Training Strategy (Option A: Feature Extractor) ---
# Freeze all parameters in the feature extractor part
for param in model_ft.parameters():
    param.required_grad = False

In [27]:
criterion = nn.CrossEntropyLoss()

In [28]:
optimizer_ft = torch.optim.Adam(model_ft.parameters(), lr=lr)

In [29]:
model_ft = model_ft.to(device)

In [30]:
# Train the model

epochs = 15

for epoch in range(epochs):
    running_loss = 0.0
    for images, labels in train_dataloader:
        images = images.float().to(device)
        labels = labels.long().to(device)

        optimizer_ft.zero_grad()
        output = model_ft(images)
        loss = criterion(output, labels)
        running_loss += loss.item()
        loss.backward()
        optimizer_ft.step()

    print(f"Epoch: {epoch+1}/{epochs}, Loss: {running_loss/len(train_dataloader):.3f}")

Epoch: 1/15, Loss: 2.419
Epoch: 2/15, Loss: 1.678
Epoch: 3/15, Loss: 1.408
Epoch: 4/15, Loss: 1.261
Epoch: 5/15, Loss: 1.197
Epoch: 6/15, Loss: 1.132
Epoch: 7/15, Loss: 1.072
Epoch: 8/15, Loss: 1.027
Epoch: 9/15, Loss: 0.993
Epoch: 10/15, Loss: 0.975
Epoch: 11/15, Loss: 0.937
Epoch: 12/15, Loss: 0.893
Epoch: 13/15, Loss: 0.901
Epoch: 14/15, Loss: 0.864
Epoch: 15/15, Loss: 0.836


In [31]:
# check the accuracy

total = 0
correct = 0

with torch.no_grad():
    for images, labels in test_dataloader:
        images = images.float().to(device)
        labels = labels.long().to(device)

        output = model_ft(images)
        _, prediction = torch.max(output.data, 1)
        total += labels.size(0)
        correct += (prediction == labels).sum().item()

    print(f"Accuracy : {correct/total * 100}")

Accuracy : 87.9245283018868
