In [8]:
from google.colab import drive, files
drive.mount('/content/drive')

Mounted at /content/drive


In [9]:
!pip install catalyst

Collecting catalyst
[?25l  Downloading https://files.pythonhosted.org/packages/39/45/24a485b76527a2601f11f12c16b5b11f853b42a3ba029d21d5c80c6c30d1/catalyst-20.11-py2.py3-none-any.whl (489kB)
[K     |████████████████████████████████| 491kB 14.1MB/s 
[?25hCollecting tensorboardX
[?25l  Downloading https://files.pythonhosted.org/packages/af/0c/4f41bcd45db376e6fe5c619c01100e9b7531c55791b7244815bac6eac32c/tensorboardX-2.1-py2.py3-none-any.whl (308kB)
[K     |████████████████████████████████| 317kB 16.4MB/s 
Collecting GitPython>=3.1.1
[?25l  Downloading https://files.pythonhosted.org/packages/24/d1/a7f8fe3df258549b303415157328bfcc63e9b11d06a7ad7a3327f3d32606/GitPython-3.1.11-py3-none-any.whl (159kB)
[K     |████████████████████████████████| 163kB 49.5MB/s 
Collecting deprecation
  Downloading https://files.pythonhosted.org/packages/02/c3/253a89ee03fc9b9682f1541728eb66db7db22148cd94f89ab22528cd1e1b/deprecation-2.1.0-py2.py3-none-any.whl
Collecting gitdb<5,>=4.0.1
[?25l  Downloading ht

In [12]:
import os

from pathlib import Path

import pandas as pd
import numpy as np
import torch
import torch.nn as nn

from PIL import Image
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, models, transforms

from catalyst import dl
from catalyst.dl.callbacks import AccuracyCallback, EarlyStoppingCallback

In [13]:
ROOT_DIR = '/content/drive/My Drive/Colab Notebooks/'
TRAIN_SAMPLES_PATH = 'train.csv'
VAL_SAMPLES_PATH = 'val.csv'
SUBMISSION_FILE = 'submission.csv'
MODEL_PATH = 'model.pkt'
NUM_CLASSES = 38
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(DEVICE)

cuda


In [14]:
class MultiClassDataset(Dataset):

    def __init__(self , csv_file , img_dir , transform=None):
        self.df = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.transform = transform
    
    def __getitem__(self, idx):
        d = self.df.iloc[idx]
        image = Image.open(f'{self.img_dir}/{d.Name}').convert("RGB")
        label = torch.tensor(d[1:].tolist() , dtype=torch.float32)
    
        if self.transform is not None:
            image = self.transform(image)
        return image, label
  
    def __len__(self):
        return len(self.df)

In [15]:
def get_additional_data(dataframe, categories, frac = 2):
    additional_data = [
        dataframe[dataframe[category] == 1].sample(frac = frac, replace = True) for category in categories
    ]
    return pd.concat(additional_data)

In [16]:
categories = ['Open-air museum', 'Food', 'Amusement park', 'Park', 'Sports facility', 'Mine']

df = pd.read_csv(ROOT_DIR + 'data/training_labels.csv')
df_train, df_val = train_test_split(df, test_size = 0.1)
df_additional_data = get_additional_data(df_train, categories)
df_train = pd.concat([df_train, df_additional_data])

df_train.to_csv(TRAIN_SAMPLES_PATH, index=False)
df_val.to_csv(VAL_SAMPLES_PATH, index=False)

In [17]:
batch_size = 128

transform_train = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomCrop(224),
    transforms.ToTensor(),
    transforms.ColorJitter(hue=.05, saturation=.05),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    transforms.RandomHorizontalFlip()                                 
])

transform_val = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

training_set = MultiClassDataset(TRAIN_SAMPLES_PATH , ROOT_DIR + 'data/training_images' , transform_train)
validation_set = MultiClassDataset(VAL_SAMPLES_PATH , ROOT_DIR + 'data/training_images' , transform_val)

print(f'Number of samples: Train: {len(training_set)}, Validation: {len(validation_set)}')
loaders = {
    "train":DataLoader(training_set , shuffle=True, batch_size=batch_size),
    "valid": DataLoader(validation_set , shuffle=False, batch_size=batch_size)
    }

Number of samples: Train: 4176, Validation: 375


In [18]:
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

def get_classification_network(in_features, num_classes):
    return nn.Sequential(
         nn.Linear(in_features, in_features // 2),
         nn.BatchNorm1d(in_features // 2),
         nn.ReLU(),
         nn.Dropout(p = 0.5),
         nn.Linear(in_features // 2, in_features // 4),
         nn.BatchNorm1d(in_features // 4),
         nn.ReLU(),
         nn.Dropout(p = 0.5),
         nn.Linear(in_features // 4, in_features // 8),
         nn.BatchNorm1d(in_features // 8),
         nn.ReLU(),
         nn.Dropout(p = 0.5),
         nn.Linear(in_features // 8, num_classes)
    )

def get_model(num_classes, feature_extract=True):
    model = models.resnext101_32x8d(pretrained = True)
    set_parameter_requires_grad(model, feature_extract)
    num_ftrs = model.fc.in_features
    model.fc = get_classification_network(num_ftrs, num_classes)
    return model

def skyhacks_f1_score(preds, y):
    return f1_score(y, preds, average = 'macro')

In [19]:
model = get_model(NUM_CLASSES)

Downloading: "https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth" to /root/.cache/torch/hub/checkpoints/resnext101_32x8d-8ba56ff5.pth


HBox(children=(FloatProgress(value=0.0, max=356082095.0), HTML(value='')))




In [21]:
optimizer = torch.optim.Adam(model.parameters())
criterion = nn.BCEWithLogitsLoss()
callbacks = [
    EarlyStoppingCallback(patience=8, metric = 'f1-score', minimize = False)
]
num_epochs = 100

class CustomRunner(dl.Runner):
    
    def predict_batch(self, batch):
        x, y = batch
        return self.model(x.to(self.device))
    
    def _handle_batch(self, batch):
        x, y = batch
        output = self.model(x)
        loss = self.state.criterion(output, y)

        preds = torch.sigmoid(output).data > 0.5
        preds = preds.to(torch.float32)

        f1 = skyhacks_f1_score(preds.cpu().numpy(), y.cpu().numpy())
        self.batch_metrics = {
            "loss": loss,
            "f1-score": f1
        }
        
        if self.state.is_train_loader:
            loss.backward()
            self.state.optimizer.step()
            self.state.optimizer.zero_grad()           


In [22]:
runner = CustomRunner()

runner.train(
    loaders=loaders,
    model=model, 
    criterion=criterion, 
    optimizer=optimizer,
    callbacks=callbacks,
    num_epochs=num_epochs, 
    logdir="./logs", 
    verbose=True,
    main_metric='f1-score',
    minimize_metric=False
)

1/100 * Epoch (train):  45% 15/33 [06:33<07:25, 24.76s/it, f1-score=0.117, loss=0.356]


F-score is ill-defined and being set to 0.0 in labels with no true nor predicted samples. Use `zero_division` parameter to control this behavior.



1/100 * Epoch (train): 100% 33/33 [13:41<00:00, 24.90s/it, f1-score=0.110, loss=0.250]
1/100 * Epoch (valid): 100% 3/3 [01:12<00:00, 24.04s/it, f1-score=0.159, loss=0.266]
[2020-11-15 01:17:42,735] 
1/100 * Epoch 1 (train): f1-score=0.1220 | loss=0.3868
1/100 * Epoch 1 (valid): f1-score=0.1625 | loss=0.2619
2/100 * Epoch (train): 100% 33/33 [04:14<00:00,  7.73s/it, f1-score=0.163, loss=0.227]
2/100 * Epoch (valid): 100% 3/3 [00:16<00:00,  5.45s/it, f1-score=0.219, loss=0.202]
[2020-11-15 01:22:39,945] 
2/100 * Epoch 2 (train): f1-score=0.1492 | loss=0.2285
2/100 * Epoch 2 (valid): f1-score=0.2295 | loss=0.1946
3/100 * Epoch (train): 100% 33/33 [04:10<00:00,  7.59s/it, f1-score=0.199, loss=0.245]
3/100 * Epoch (valid): 100% 3/3 [00:16<00:00,  5.40s/it, f1-score=0.255, loss=0.191]
[2020-11-15 01:27:31,840] 
3/100 * Epoch 3 (train): f1-score=0.2051 | loss=0.2093
3/100 * Epoch 3 (valid): f1-score=0.2741 | loss=0.1818
4/100 * Epoch (train): 100% 33/33 [04:15<00:00,  7.74s/it, f1-score=0.282

In [23]:
torch.save(runner.model, MODEL_PATH)
files.download(MODEL_PATH)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>