In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!cp /content/drive/MyDrive/HV-AI-2024.zip /content/HV-AI-2024.zip
!unzip /content/HV-AI-2024.zip
!rm -rf /content/__MACOSX
!rm -rf /content/sample_data
!mv /content/HV-AI-2024/* /content/
!rm -rf /content/HV-AI-2024
!rm /content/HV-AI-2024.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: HV-AI-2024/images/train/84_4878.jpg  
  inflating: __MACOSX/HV-AI-2024/images/train/._84_4878.jpg  
  inflating: HV-AI-2024/images/train/16_894.jpg  
  inflating: __MACOSX/HV-AI-2024/images/train/._16_894.jpg  
  inflating: HV-AI-2024/images/train/43_2439.jpg  
  inflating: __MACOSX/HV-AI-2024/images/train/._43_2439.jpg  
  inflating: HV-AI-2024/images/train/68_3954.jpg  
  inflating: __MACOSX/HV-AI-2024/images/train/._68_3954.jpg  
  inflating: HV-AI-2024/images/train/116_6753.jpg  
  inflating: __MACOSX/HV-AI-2024/images/train/._116_6753.jpg  
  inflating: HV-AI-2024/images/train/18_974.jpg  
  inflating: __MACOSX/HV-AI-2024/images/train/._18_974.jpg  
  inflating: HV-AI-2024/images/train/115_6674.jpg  
  inflating: __MACOSX/HV-AI-2024/images/train/._115_6674.jpg  
  inflating: HV-AI-2024/images/train/37_2090.jpg  
  inflating: __MACOSX/HV-AI-2024/images/train/._37_2090.jpg  
  inflating: HV-AI-2024/images/

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
import os
import cv2
from PIL import Image

In [None]:
train_df = pd.read_csv('/content/train.csv')
test_df = pd.read_csv('/content/test.csv')

print(train_df.head())
print(test_df.head())

                   path  class                    bbox
0  images/train/1_2.jpg      0  139.0 30.0 153.0 264.0
1  images/train/1_4.jpg      0  112.0 90.0 255.0 242.0
2  images/train/1_5.jpg      0   70.0 50.0 134.0 303.0
3  images/train/1_7.jpg      0    7.0 75.0 420.0 262.0
4  images/train/1_8.jpg      0   78.0 86.0 333.0 158.0
                        path                     bbox
0    images/test/test_81.jpg  241.0 113.0 202.0 257.0
1  images/test/test_1592.jpg  119.0 121.0 294.0 337.0
2   images/test/test_565.jpg    33.0 45.0 237.0 139.0
3  images/test/test_2078.jpg    58.0 95.0 262.0 210.0
4  images/test/test_3645.jpg   32.0 141.0 348.0 281.0


In [None]:
def preprocess_image(image_path, target_size=(224, 224)):
    image = Image.open(image_path)
    image = image.resize(target_size)
    image = np.array(image)
    if image.shape[-1] == 4:
        image = image[:, :, :3]
    return image / 255.0

sample_image_path = train_df['path'].iloc[0]
sample_image = preprocess_image(sample_image_path)

In [None]:
train_df['class'] = train_df['class'].astype(str)

In [None]:
datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,
    horizontal_flip=True,
    zoom_range=0.2,
    shear_range=0.2,
    rotation_range=20
)

train_generator = datagen.flow_from_dataframe(
    dataframe=train_df,
    directory='/content/',
    x_col='path',
    y_col='class',
    target_size=(224, 224),
    batch_size=32,
    subset='training',
    class_mode='categorical'
)

validation_generator = datagen.flow_from_dataframe(
    dataframe=train_df,
    directory='/content/',
    x_col='path',
    y_col='class',
    target_size=(224, 224),
    batch_size=32,
    subset='validation',
    class_mode='categorical'
)


Found 4796 validated image filenames belonging to 200 classes.
Found 1198 validated image filenames belonging to 200 classes.


In [None]:
import pandas as pd
import numpy as np
import cv2
import tensorflow as tf
from tensorflow.keras.utils import Sequence

class BoundingBoxDataGenerator(Sequence):
    def __init__(self, dataframe, directory, batch_size, input_shape, is_train=True):
        self.dataframe = dataframe
        self.directory = directory
        self.batch_size = batch_size
        self.input_shape = input_shape
        self.is_train = is_train

    def __len__(self):
        return int(np.ceil(len(self.dataframe) / self.batch_size))

    def __getitem__(self, index):
        batch_df = self.dataframe[index * self.batch_size:(index + 1) * self.batch_size]
        images = []
        labels = []

        for _, row in batch_df.iterrows():
            img_path = os.path.join(self.directory, row['path'])
            bbox = [int(val) for val in row['bbox'].split()]
            image = cv2.imread(img_path)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            image = self.crop_images_with_bbox(image, bbox)
            image = cv2.resize(image, self.input_shape[:2])
            image = image / 255.0
            images.append(image)
            labels.append(row['class'])

        images = np.array(images)
        labels = tf.keras.utils.to_categorical(labels, num_classes=num_classes)
        return images, labels

    def crop_images_with_bbox(self, image, bbox):
        x_min, y_min, x_max, y_max = bbox
        return image[y_min:y_max, x_min:x_max]

input_shape = (224, 224, 3)
batch_size = 32

train_generator = BoundingBoxDataGenerator(
    dataframe=train_df,
    directory='/content/images/train',
    batch_size=batch_size,
    input_shape=input_shape,
    is_train=True
)

validation_generator = BoundingBoxDataGenerator(
    dataframe=train_df,
    directory='/content/images/train',
    batch_size=batch_size,
    input_shape=input_shape,
    is_train=False
)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms
from transformers import ViTModel
from torch.utils.data import DataLoader, Dataset
from PIL import Image

class CustomDataset(Dataset):
    def __init__(self, dataframe, image_dir, transform=None):
        self.dataframe = dataframe
        self.image_dir = image_dir
        self.transform = transform

        self.label_map = {label: idx for idx, label in enumerate(dataframe['class'].unique())}

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_path = self.dataframe.iloc[idx, 0]
        image = Image.open(os.path.join(self.image_dir, img_path)).convert('RGB')  # Ensure image is in RGB mode
        label = self.dataframe.iloc[idx, 1]

        if self.transform:
            image = self.transform(image)

        label = torch.tensor(self.label_map[label], dtype=torch.long)

        return image, label

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_dataset = CustomDataset(dataframe=train_df, image_dir='/content/', transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

class HybridModel(nn.Module):
    def __init__(self, num_classes):
        super(HybridModel, self).__init__()
        self.resnet = models.resnet18(weights='DEFAULT')
        self.resnet.fc = nn.Identity()

        self.vit = ViTModel.from_pretrained('google/vit-base-patch16-224-in21k')
        resnet_output_size = 512
        vit_output_size = self.vit.config.hidden_size

        self.fc1 = nn.Linear(resnet_output_size + vit_output_size, 512)
        self.fc2 = nn.Linear(512, num_classes)

    def forward(self, x):
        resnet_features = self.resnet(x)
        vit_features = self.vit(pixel_values=x).last_hidden_state.mean(dim=1)
        combined_features = torch.cat((resnet_features, vit_features), dim=1)
        out = self.fc2(self.fc1(combined_features))

        return out

num_classes = len(train_df['class'].unique())
model = HybridModel(num_classes=num_classes)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(10):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f'Epoch {epoch+1}/{10}, Loss: {running_loss/len(train_loader)}')


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 177MB/s]
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/502 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

Epoch 1/10, Loss: 4.554098994173902
Epoch 2/10, Loss: 3.3616031991674546
Epoch 3/10, Loss: 2.6093159894993962
Epoch 4/10, Loss: 2.0567349575935525
Epoch 5/10, Loss: 1.5738620669283765
Epoch 6/10, Loss: 1.2538506090641022
Epoch 7/10, Loss: 0.9379146978893178
Epoch 8/10, Loss: 0.7434309078974927
Epoch 9/10, Loss: 0.6123175473606333
Epoch 10/10, Loss: 0.5674897909481474


In [None]:
import pandas as pd
import torch
from torchvision import transforms
from torch.utils.data import DataLoader
from PIL import Image

class TestDataset(Dataset):
    def __init__(self, dataframe, image_dir, transform=None):
        self.dataframe = dataframe
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_path = self.dataframe.iloc[idx, 0]
        image = Image.open(os.path.join(self.image_dir, img_path)).convert('RGB')

        if self.transform:
            image = self.transform(image)

        return image, img_path

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

test_df = pd.read_csv('test.csv')
test_dataset = TestDataset(dataframe=test_df, image_dir='/content/', transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

def generate_and_save_predictions(model, data_loader, output_csv_path):
    model.eval()
    all_preds = []
    all_paths = []
    all_confidences = []

    with torch.no_grad():
        for images, paths in data_loader:
            images = images.to(device)
            outputs = model(images)
            confidences, preds = torch.max(torch.nn.functional.softmax(outputs, dim=1), 1)
            all_preds.extend(preds.cpu().numpy())
            all_paths.extend(paths)
            all_confidences.extend(confidences.cpu().numpy())

    df = pd.DataFrame({
        'path': all_paths,
        'predicted_label': all_preds,
        'confidence_score': all_confidences
    })

    df.to_csv(output_csv_path, index=False)

generate_and_save_predictions(model, test_loader, '/content/predictions.csv')

In [None]:
import requests

def send_results_for_evaluation(name, csv_file, email, college):
    url = "http://43.205.49.236:5050/inference"
    files = {'file': open(csv_file, 'rb')}
    data = {'email': email, 'name': name, 'college': college}
    response = requests.post(url, files=files, data=data)
    return response.json()

print('Accuracy: ')
print(send_results_for_evaluation('PRANAV BALAJI R S', '/content/predictions.csv', 'pranavbalaji.rs2021@vitstudent.ac.in', 'Vellore Institute of Technology, Chennai'))

Accuracy: 
{'overall_accuracy (%)': 40.28305, 'max_accuracy_class': 69, 'max_accuracy (%)': 90.0, 'min_accuracy_class': 143, 'min_accuracy (%)': 0.0}
