In [2]:
from IPython.display import clear_output

In [3]:
!pip install timm
clear_output()

In [14]:
import torch
import timm
import pandas as pd
import os
import torchvision.transforms as transforms
import torch.nn as nn
import numpy as np

from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

In [9]:
!mkdir /content/sports_data/
!cp -r /content/drive/MyDrive/sports_data/vk-made-sports-image-classification.zip /content/sports_data/
!cd /content/sports_data/ && unzip /content/sports_data/vk-made-sports-image-classification.zip
clear_output()

In [10]:
def create_label_encoder(series):
    label_encoder = {}
    inverse_encoder = {}
    counter = 0
    for label in series:
        if not label in label_encoder:
            label_encoder[label] = counter
            inverse_encoder[counter] = label
            counter += 1
    return label_encoder, inverse_encoder

label_encoder, inverse_encoder = create_label_encoder(pd.read_csv("./sports_data/train.csv")["label"])

In [11]:
class SportDataset(Dataset):

    def __init__(self, root, csv, transform, label_encoder=None, train=True):
        super().__init__()

        self.csv = csv
        self.root = root
        self.transform = transform
        self.train = train
        self.label_encoder = label_encoder
    
    def __len__(self, ):
        return len(self.csv)
    
    def __getitem__(self, idx):
        row = self.csv.iloc[idx]
        image_path = os.path.join(self.root, row[0])

        if not os.path.exists(image_path):
            while not os.path.exists(image_path):
                if idx + 1 > len(self.csv):
                    idx = 0
                else:
                    idx += 1
                row = self.csv.iloc[idx]
                image_path = os.path.join(self.root, row[0])
          
        image = Image.open(image_path).convert("RGB")

        if self.transform is not None:
            image = self.transform(image)
        
        if self.train:
            label = self.label_encoder[row[1]]
            return {"image": image, "label": label}
        
        return {"image": image}


In [15]:
train_df, val_df = train_test_split(pd.read_csv("./sports_data/train.csv"), test_size=0.2, random_state=42)
test_df = pd.read_csv("./sports_data/test.csv")

In [16]:
train_transform = transforms.Compose([transforms.RandomResizedCrop((224, 224), scale=(0.7, 1.0)),
                                      transforms.RandomRotation(15),
                                      transforms.RandomVerticalFlip(),
                                      transforms.RandomHorizontalFlip(),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                                      transforms.RandomErasing(scale=(0.02, 0.2))])

test_transform = transforms.Compose([transforms.Resize((224, 224)),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

train_dataset = SportDataset("./sports_data/train", train_df, train_transform, label_encoder)
val_dataset = SportDataset("./sports_data/train", val_df, test_transform, label_encoder)

test_dataset = SportDataset("./sports_data/test", test_df, test_transform, train=False)

In [17]:
train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True, drop_last=True)
val_dataloader = DataLoader(val_dataset, batch_size=128, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False, drop_last=False)

ViT + Sklearn log reg (or SVM)

In [18]:
VIT = True

In [19]:
if VIT:
    model = timm.create_model("vit_large_patch16_224", pretrained=True)
else:
    model = timm.create_model("convnext_small_in22k", pretrained=True)
    model.head.fc = nn.Identity()

model.cuda();

In [20]:
@torch.no_grad()
def get_vit_embed(model, batch, vit=True):
    if vit:
        x = model.forward_features(batch)
        embeds = model.forward_head(x, pre_logits=True)
    else:
        embeds = model(batch)
    return embeds

In [21]:
train_dataset = SportDataset("./sports_data/train", train_df, test_transform, label_encoder)
train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=False)

Here I am calculating embeddings from ViT for our images, once I calculate them once, I save them on google drive and just load them for faster experiments.

In [None]:
train_embeds = []
train_labels = []
val_embeds = []
val_labels = []
test_embeds = []

for idx, batch in enumerate(train_dataloader):
    embed = get_vit_embed(model, batch["image"].cuda(), VIT)
    train_embeds.append(embed.cpu().numpy())
    train_labels.append(batch["label"].cpu().numpy())

In [None]:
train_embeds_concat = np.concatenate(train_embeds, axis=0)
train_labels_concat = np.concatenate(train_labels)

In [None]:
#np.save("/content/drive/MyDrive/train_embeds.npy", train_embeds_concat)
#np.save("/content/drive/MyDrive/train_labels.npy", train_labels_concat)

In [None]:
for idx, batch in enumerate(val_dataloader):
    embed = get_vit_embed(model, batch["image"].cuda(), VIT)
    val_embeds.append(embed.cpu().numpy())
    val_labels.append(batch["label"].cpu().numpy())

val_embeds_concat = np.concatenate(val_embeds, axis=0)
val_labels_concat = np.concatenate(val_labels)

In [None]:
#np.save("/content/drive/MyDrive/val_embeds.npy", val_embeds_concat)
#np.save("/content/drive/MyDrive/val_labels.npy", val_labels_concat)

In [None]:
for idx, batch in enumerate(test_dataloader):
    embed = get_vit_embed(model, batch["image"].cuda(), VIT)
    test_embeds.append(embed.cpu().numpy())

test_embeds_concat = np.concatenate(test_embeds, axis=0)

In [None]:
#np.save("/content/drive/MyDrive/test_embeds.npy", test_embeds_concat)

In [None]:
full_embeds = np.concatenate([train_embeds_concat, val_embeds_concat], axis=0)
full_labels = np.concatenate([train_labels_concat, val_labels_concat])

In [None]:
#np.save("/content/drive/MyDrive/full_embeds.npy", full_embeds)
#np.save("/content/drive/MyDrive/full_labels.npy", full_labels)

In [22]:
full_embeds = np.load("/content/drive/MyDrive/full_embeds.npy")
full_labels = np.load("/content/drive/MyDrive/full_labels.npy")

val_embeds = np.load("/content/drive/MyDrive/val_embeds.npy")
val_labels = np.load("/content/drive/MyDrive/val_labels.npy")

test_embeds_concat = np.load("/content/drive/MyDrive/test_embeds.npy")

In [23]:
clf = LogisticRegression()
clf = SVC(C=2)
clf.fit(full_embeds, full_labels)

Yeah it is strange that I am evaling here on data that I trained, before that I trained on train and validated on val, now that I found good parameter of C for SVM I just run this cell to see how good model overfitted.

In [24]:
preds = clf.predict(val_embeds)
f1 = f1_score(val_labels, preds, average="micro")
print(f"F1 micro on val set: {f1}")

F1 micro on val set: 0.9888705234159779


In [25]:
test_preds = clf.predict(test_embeds_concat)

In [26]:
text_labels = [inverse_encoder[label] for label in test_preds.tolist()]

test_df = pd.read_csv("./sports_data/test.csv")
test_df.insert(1, "label", text_labels, True)

test_df.to_csv("/content/test_submission.csv", index=False)