In [1]:
import os
from PIL import Image
import torch
import torch.nn as nn
import torchvision
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from matplotlib.pyplot import subplots
import joblib

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
import os
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

In [2]:
root_dir = 'train/'
csv_file = 'labels.csv'

In [3]:
#Hyperparameters
BATCH_SIZE = 64

In [4]:
class CustomImageDataset(Dataset):
    def __init__(self, root_dir, csv_file, transform=None):
        self.data = csv_file
        self.root_dir = root_dir
        self.transform = transform
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, self.data.loc[idx, "id"] + ".jpg")
        image = Image.open(img_name)
        label_int = torch.tensor(self.data.loc[idx, "breed"], dtype=torch.long)
        # one_hot_label = torch.zeros(self.num_classes)
        # one_hot_label[label_int] = 1
        if self.transform:
            image = self.transform(image)
        return image, label_int

labels_df = pd.read_csv(csv_file)
num_classes = labels_df['breed'].nunique()

# Create a label encoder object
#class_encoder
mapping = {}
inverse_mapping = {}
for i, c in enumerate(labels_df['breed'].unique()):
    mapping[c] = i
    inverse_mapping[i] = c

# Fit and transform the 'breed' column to get integer labels
for i in range(labels_df.shape[0]):
    labels_df.loc[i, 'breed'] = mapping[labels_df.loc[i, 'breed']]

#Data Transforms
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Imagenet normalization
])

#Datasets
dataset = CustomImageDataset(root_dir=root_dir, csv_file=labels_df, transform=transform)
total_samples = len(dataset)

train_loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

In [5]:
labels_df.head()

Unnamed: 0,id,breed
0,000bec180eb18c7604dcecc8fe0dba07,0
1,001513dfcb2ffafc82cccf4d8bbaba97,1
2,001cdf01b096e06d78e9e5112d419397,2
3,00214f311d5d2247d5dfe4fe24b2303d,3
4,0021f9ceb3235effd7fcde7f7538ed62,4


In [6]:
#Resnet
model = torchvision.models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, num_classes)

#Define Loss Function and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

#Using CUDA
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)



ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [8]:
#Trainning Loop
num_epochs = 12
errors_list = []
for epoch in range(num_epochs):
    
    model.train()
    running_train_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss_val = criterion(outputs, labels)
        loss_val.backward()
        optimizer.step()

        running_train_loss += loss_val.item()


    avg_train_loss = running_train_loss / len(train_loader)
    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {avg_train_loss}")

Epoch 1/12, Loss: 2.54965660199523
Epoch 2/12, Loss: 1.7405034318566321
Epoch 3/12, Loss: 1.32176463752985
Epoch 4/12, Loss: 1.066337402537465
Epoch 5/12, Loss: 0.886464623734355
Epoch 6/12, Loss: 0.7540360622107982
Epoch 7/12, Loss: 0.6369636096060276
Epoch 8/12, Loss: 0.5387725973501801
Epoch 9/12, Loss: 0.4594939272850752
Epoch 10/12, Loss: 0.3962597155943513
Epoch 11/12, Loss: 0.33573355628177526
Epoch 12/12, Loss: 0.28806026354432107


In [9]:
torch.save(model.state_dict(), 'saved_param/ResNet.pth')

In [10]:
model.load_state_dict(torch.load('saved_param/ResNet.pth'))
model.eval();

In [11]:
def predict(model, input):
    with torch.no_grad():
        output = model(input)
        probabilities = torch.nn.functional.softmax(output, dim=1)
    return probabilities

In [13]:
image, label = dataset[0]
image = image.unsqueeze(0)
image = image.to(device)

In [14]:
submission = pd.DataFrame()

for root, dirs, files in os.walk("test"):
    for filename in files:
        image_path = os.path.join('test/', filename)
        image = Image.open(image_path)
        image = transform(image)
        image = image.unsqueeze(0)
        image = image.to(device)
        prediction = predict(model, image).squeeze()
        img_data = {'id': filename[:-4]}
        
        for i in range(len(prediction)):
            img_data[inverse_mapping[i]] = prediction[i].item()
        img_df = pd.DataFrame([img_data])
        submission = pd.concat([submission, img_df], ignore_index=True)


In [15]:
submission.head()

Unnamed: 0,id,boston_bull,dingo,pekinese,bluetick,golden_retriever,bedlington_terrier,borzoi,basenji,scottish_deerhound,...,blenheim_spaniel,silky_terrier,sussex_spaniel,german_short-haired_pointer,french_bulldog,bouvier_des_flandres,tibetan_mastiff,english_springer,cocker_spaniel,rottweiler
0,000621fb3cbb32d8935728e48679680e,0.00106,0.000162,0.266738,5.7e-05,0.000961,2.5e-05,0.000528,0.00017,3.2e-05,...,0.023542,0.000292,0.000156,2.1e-05,0.002116,3.1e-05,0.000202,0.000174,0.001272,5.2e-05
1,00102ee9d8eb90812350685311fe5890,2e-06,7.4e-05,2.5e-05,1.3e-05,1.5e-05,1.6e-05,1.6e-05,9e-06,1e-05,...,2e-06,2e-06,2e-06,2e-06,1.7e-05,3e-06,3e-05,1e-06,4e-06,4e-06
2,0012a730dfa437f5f3613fb75efcd4ce,4e-06,7e-06,6e-06,0.001893,0.000892,8.2e-05,0.000911,1e-06,0.000233,...,0.000623,2.8e-05,0.000429,0.001055,7e-06,8e-06,2.5e-05,0.001681,0.026065,1e-05
3,001510bc8570bbeee98c8d80c8a95ec1,0.009125,9.8e-05,0.007499,0.00054,0.000883,6.8e-05,7e-05,0.0003,0.000619,...,0.000308,0.000829,0.009243,0.005165,0.025021,0.008588,0.007553,0.000547,0.001095,0.006812
4,001a5f3114548acdefa3d4da05474c2e,0.000212,1.1e-05,0.005823,2.5e-05,4.1e-05,2.1e-05,2.3e-05,2.3e-05,0.000142,...,0.000163,0.003425,0.000151,1.7e-05,0.000265,0.002967,0.000784,2.4e-05,0.000142,4.3e-05


In [17]:
submission.to_csv('submissionResNet.csv', index=False)

In [36]:
original = pd.read_csv('labels.csv')
mapping = {}
inverse_mapping = {}
for i, c in enumerate(original['breed'].unique()):
    mapping[c] = i
    inverse_mapping[i] = c

In [39]:
inverse_mapping

{0: 'boston_bull',
 1: 'dingo',
 2: 'pekinese',
 3: 'bluetick',
 4: 'golden_retriever',
 5: 'bedlington_terrier',
 6: 'borzoi',
 7: 'basenji',
 8: 'scottish_deerhound',
 9: 'shetland_sheepdog',
 10: 'walker_hound',
 11: 'maltese_dog',
 12: 'norfolk_terrier',
 13: 'african_hunting_dog',
 14: 'wire-haired_fox_terrier',
 15: 'redbone',
 16: 'lakeland_terrier',
 17: 'boxer',
 18: 'doberman',
 19: 'otterhound',
 20: 'standard_schnauzer',
 21: 'irish_water_spaniel',
 22: 'black-and-tan_coonhound',
 23: 'cairn',
 24: 'affenpinscher',
 25: 'labrador_retriever',
 26: 'ibizan_hound',
 27: 'english_setter',
 28: 'weimaraner',
 29: 'giant_schnauzer',
 30: 'groenendael',
 31: 'dhole',
 32: 'toy_poodle',
 33: 'border_terrier',
 34: 'tibetan_terrier',
 35: 'norwegian_elkhound',
 36: 'shih-tzu',
 37: 'irish_terrier',
 38: 'kuvasz',
 39: 'german_shepherd',
 40: 'greater_swiss_mountain_dog',
 41: 'basset',
 42: 'australian_terrier',
 43: 'schipperke',
 44: 'rhodesian_ridgeback',
 45: 'irish_setter',
 46