In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import torch

gpu_available = torch.cuda.is_available()
print("GPU Available: ", gpu_available)


In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import models, transforms
from PIL import Image
from sklearn.preprocessing import LabelEncoder

In [None]:
# device is basically the name of either GPU(if available) or CPU , will be used to move model,input,label for processing
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [None]:
# saving the paths of all important directories

train_dir = "/kaggle/input/vlg-recruitment-24-challenge/vlg-dataset/train"
test_dir = "/kaggle/input/vlg-recruitment-24-challenge/vlg-dataset/test"
predicate_matrix_file = "/kaggle/input/vlg-recruitment-24-challenge/vlg-dataset/predicate-matrix-continuous.txt"
predicate_matrix_binary= "/kaggle/input/vlg-recruitment-24-challenge/vlg-dataset/predicate-matrix-binary.txt"
classes_file = "/kaggle/input/vlg-recruitment-24-challenge/vlg-dataset/classes.txt"



In [None]:
classes = [line.strip() for line in open(classes_file).readlines()] # list of class names- BT= index is also read

# classes will read the data as follows

'''classes= '1\tantelope',
 '2\tgrizzly+bear',
 '3\tkiller+whale',   # list
 '4\tbeaver',
 '5\tdalmatian',
 '6\tpersian+cat',
'''

# to make classes number free and tab/spaces free
ind=0
import re
for string in classes:
    str_nodigit = re.sub(r'\d', '', string)  # removing digits
    classes[ind]= str_nodigit.replace('\t', '', 1) # remove tab
    ind +=1


''' creating main dictionary with classes and their sequential index,
this index also corresponds to the index of matrix in predicate matrix continuous and binary
 '''
class_to_idx= dict(enumerate(classes)) 

classes_for_train=(os.listdir(train_dir)) # list of names of training classes
print((classes_for_train))
 

class_to_idx_train={} # dict of name and idx of training classes


for key , name in class_to_idx.items():
    if name in classes_for_train:
        class_to_idx_train[key]=name
    else:
        pass

# class_to_idx_train is a part of class_to_idx which only contains training class name and their index


In [None]:
# loading predicate matrix 
predicate_matrix = np.loadtxt(predicate_matrix_file)

#finding total no of classes and attributes
num_classes, num_attributes = predicate_matrix.shape


In [None]:
# preprocessing predicate matrix for better model training
# to remove the -ve values from predicate matrix (total 4 are negative in this matrix out of 4250 )
for i in range(50):
    for j in range(85):
        if predicate_matrix[i][j] <=0 :
            predicate_matrix[i][j]= 0   # converting -ve values to 0 since it doesn't make sense
            
predicate_matrix= predicate_matrix/100 # converting it in range of [0,1]


In [None]:
# following provides the training image and their traget value of attibutes from predicate_matrix of its class
class Train_Dataset(Dataset):
    def __init__(self, img_main_dir, class_to_idx_train, predicate_matrix, transform=None):
        self.img_dir = img_main_dir  # main directory of train data
        self.transform = transform
        self.images = []  # empty list that stores all images path
        self.images_class_idx = []  # list to store index of classes the corresponding images belong to
        self.predicate_matrix = predicate_matrix
        for idx, class_name in class_to_idx_train.items():  # for loop iterates for each train class
            class_dir = os.path.join(img_main_dir, class_name) # class directory containing all images of that class
            for img_name in os.listdir(class_dir): # iterating over all images in the class
                self.images.append(os.path.join(class_dir, img_name))  # images path are stores
                self.images_class_idx.append(idx)      # all images are given their respective class index

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = self.images[idx]
        label = self.images_class_idx[idx]
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        target = torch.tensor(self.predicate_matrix[label], dtype=torch.float32)
        return image, target
#train_dataset = AnimalDataset(train_dir, class_to_idx, predicate_matrix, transform=transform)

In [None]:
# Model definition
class Model_Def(nn.Module):
    def __init__(self, num_attributes):  # model requires only no. of attributes to be predicted
        super(AttributePredictor, self).__init__()
        self.cnn = models.resnet50(pretrained=True)
        self.cnn.fc = nn.Linear(self.cnn.fc.in_features, num_attributes)

    def forward(self, x):
        outputs = self.cnn(x)
        outputs = F.relu(outputs)  # Apply ReLU to the predicted attributes there fore we get only +ve values and in range[0,1]
        return outputs



# Initialize model, loss, and optimizer
model = Model_def(num_attributes)

model.to(device)
criterion = nn.MSELoss()  # Regression loss for attribute prediction
optimizer = optim.Adam(model.parameters(), lr=1e-4)


In [None]:
# Image transformations and augmentation
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),          # 50% chance to flip horizontally
    transforms.RandomVerticalFlip(p=0.5),            # 50% chance to flip vertically
    transforms.RandomRotation(degrees=25),           # Random rotation within ±25 degrees
    transforms.RandomResizedCrop(size=(224, 224),scale=(0.5, 1.0), ratio=(3/4, 4/3)),   # Random crop and resize to 224x224
                                                                                    # Scale range of crop
                                                                                   # Aspect ratio range of crop
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])



train_dataset = AnimalDataset(train_dir, class_to_idx_train, predicate_matrix, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=25, shuffle=True)


In [None]:
import torch.nn.functional as F
# Training loop
num_epochs = 40
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0 # total loss updated after each epoch
    for images, targets in train_loader:
        images, targets = images.to(device), targets.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, targets)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}")

In [None]:

import torch
import numpy as np
import torch.nn.functional as F


#########################################


def zero_shot_inference(model, test_loader, predicate_matrix, class_names):
    # Set model to evaluation mod
    model.eval()

   
    predictions= {} # dictionary that Store image name and final predicted class name

    with torch.no_grad():  # No gradient computation needed during inference
        for images,names in test_loader:
            images= images.to(device)  # Move images to GPU if available
           
        # Forward pass: get predicted attributes for the images
            predicted_attributes = model(images) # it is a pytorch tensor of 30 rows since images are 30
            b=predicted_attributes.cpu().numpy() # b is numpy version of predicted_attributes
                 
         # camparing the predicted attribute of the image with predicate matrix row wise using norm and finding the best ma
            count=0
            for rows in b:  
                distances = np.linalg.norm(predicate_matrix - rows, axis=1) 
                # Find the index of the smallest distance (best match) i.e. the index of the predicted class
                best_match_class_idx = np.argmin(distances)
                predictions[names[count]]= classes[best_match_class_idx]  # we can map the rows of attribute matrix with the names since they are predicted in sequence when passed with batch of 32 images
                count +=1
            
    return predictions 





In [None]:
import os
from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms
# for testing 
class TestDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        
        
            #root_dir (str): Path to the directory containing test images.
            
        
        self.root_dir = root_dir
        self.image_name = sorted(os.listdir(root_dir))  # List of image file names 
        # sorted means to sort alphabetically or numerically the names of images
        # self.image_paths is a list containing names of images in sorted manner
        self.transform = transform

    def __len__(self):
        return len(self.image_name)

    def __getitem__(self, idx):
        """
        Returns:
            image (Tensor): Transformed image.
            image_path (str): Image file name (used for submission).
        """
        img_path = os.path.join(self.root_dir, self.image_name[idx])
        image = Image.open(img_path).convert("RGB")  # Open image and convert to RGB
        
        if self.transform:
            image = self.transform(image)  # Apply transformations
            name= self.image_name[idx] 
        return image, name

        


In [None]:
# Define transformations for test images it do not include augmentation just requires simple preprocessing and 
transform2 = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to 224x224
    transforms.ToTensor(),          # Convert images to PyTorch tensors
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize using ImageNet stats
])

# Create an instance of the custom TestDataset
test_dataset = TestDataset(test_dir, transform=transform2)

# Create a DataLoader for the test dataset
test_loader = DataLoader(test_dataset, batch_size=30, shuffle=False)


In [None]:

# Make predictions
predicted_classes= zero_shot_inference(model, test_loader, predicate_matrix, classes)

# Save predictions to CSV in output of kaggle directory
import pandas as pd

df = pd.DataFrame(list(predicted_classes.items()), columns=['image_id', 'class'])

df.to_csv('/kaggle/working/submission.csv', index=False)
