<a href="https://colab.research.google.com/github/KamilZPWr/Skyhack-3/blob/main/Patryk.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive, files
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install catalyst

In [3]:
import os

from pathlib import Path

import pandas as pd
import numpy as np
import torch
import torch.nn as nn

from PIL import Image
from torch.utils.data import Dataset, DataLoader ,random_split
from torchvision import datasets, models, transforms

from catalyst import dl
from catalyst.dl.callbacks import AccuracyCallback, EarlyStoppingCallback
from catalyst.utils import metrics

In [4]:
ROOT_DIR = '/content/drive/My Drive/Colab Notebooks/'
SUBMISSION_FILE = 'submission.csv'
MODEL_PATH = 'model.pkt'
NUM_CLASSES = 38
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(DEVICE)

cuda


In [5]:
class MultiClassDataset(Dataset):

    def __init__(self , csv_file , img_dir , transform=None):
        self.df = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.transform = transform
    
    def __getitem__(self, idx):
        d = self.df.iloc[idx]
        image = Image.open(f'{self.img_dir}/{d.Name}').convert("RGB")
        label = torch.tensor(d[1:].tolist() , dtype=torch.float32)
    
        if self.transform is not None:
            image = self.transform(image)
        return image, label
  
    def __len__(self):
        return len(self.df)

class TestDataset(Dataset):

    def __init__(self, img_dir, transform):
        self.transform = transform
        self.images = [os.path.join(img_dir, fname) for fname in os.listdir(img_dir)]

    def __getitem__(self, idx):
        image = Image.open(self.images[idx]).convert("RGB")

        if self.transform is not None:
            image = self.transform(image)
        return image
  
    def __len__(self):
        return len(self.images)

In [6]:
batch_size = 16
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
dataset = MultiClassDataset(ROOT_DIR + 'data/training_labels.csv' , ROOT_DIR + 'data/training_images' , transform)
test_set = TestDataset(ROOT_DIR + 'data/live_test_images', transform)
test_loader = DataLoader(test_set, shuffle=False, batch_size=batch_size)

number_of_validation_samples = int(len(dataset)*0.10) 
training_set, validation_set  = random_split(dataset , [len(dataset) - number_of_validation_samples, number_of_validation_samples])

print(f'Number of samples: Train: {len(training_set)}, Validation: {len(validation_set)}')
print(f'Number of samples Test: {len(test_set)}')
loaders = {
    "train":DataLoader(training_set , shuffle=True, batch_size=batch_size),
    "valid": DataLoader(validation_set , shuffle=True, batch_size=batch_size)
    }

Number of samples: Train: 3733, Validation: 414
Number of samples Test: 500


In [7]:
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

def get_model(num_classes, feature_extract=True):
    model = models.alexnet(pretrained = True)
    set_parameter_requires_grad(model, feature_extract)
    num_ftrs = model.classifier[6].in_features
    model.classifier[6] = nn.Linear(num_ftrs, num_classes)
    return model

def skyhacks_f1_score(preds, y):
    return np.mean([metrics.f1_score(preds[:, i], y[:, i])[0].item() for i in range(preds.shape[0])])

In [8]:
model = get_model(NUM_CLASSES)

Downloading: "https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth" to /root/.cache/torch/hub/checkpoints/alexnet-owt-4df8aa71.pth


HBox(children=(FloatProgress(value=0.0, max=244418560.0), HTML(value='')))




In [9]:
optimizer = torch.optim.Adam(model.parameters())
criterion = nn.BCEWithLogitsLoss()
callbacks = [
    EarlyStoppingCallback(patience=5)
]
num_epochs = 100

class CustomRunner(dl.Runner):
    
    def predict_batch(self, batch):
        x, y = batch
        return self.model(x.to(self.device))
    
    def _handle_batch(self, batch):
        x, y = batch
        output = self.model(x)
        loss = self.state.criterion(output, y)

        preds = torch.sigmoid(output).data > 0.5
        preds = preds.to(torch.float32)

        f1 = skyhacks_f1_score(preds, y)
        self.batch_metrics = {
            "loss": loss,
            "f1-score": f1
        }
        
        if self.state.is_train_loader:
            loss.backward()
            self.state.optimizer.step()
            self.state.optimizer.zero_grad()           


In [10]:
runner = CustomRunner()

runner.train(
    loaders=loaders,
    model=model, 
    criterion=criterion, 
    optimizer=optimizer,
    callbacks=callbacks,
    num_epochs=num_epochs, 
    logdir="./logs", 
    verbose=True
)

1/100 * Epoch (train): 100% 234/234 [16:32<00:00,  4.24s/it, f1-score=1.000, loss=0.130]
1/100 * Epoch (valid): 100% 26/26 [01:50<00:00,  4.25s/it, f1-score=0.964, loss=0.151]
[2020-11-14 08:28:24,540] 
1/100 * Epoch 1 (train): f1-score=0.9359 | loss=0.2263
1/100 * Epoch 1 (valid): f1-score=0.9436 | loss=0.1981
2/100 * Epoch (train): 100% 234/234 [02:05<00:00,  1.86it/s, f1-score=1.000, loss=0.153]
2/100 * Epoch (valid): 100% 26/26 [00:14<00:00,  1.81it/s, f1-score=0.963, loss=0.150]
[2020-11-14 08:30:49,938] 
2/100 * Epoch 2 (train): f1-score=0.9461 | loss=0.1715
2/100 * Epoch 2 (valid): f1-score=0.9438 | loss=0.1999
3/100 * Epoch (train): 100% 234/234 [02:04<00:00,  1.88it/s, f1-score=0.950, loss=0.143]
3/100 * Epoch (valid): 100% 26/26 [00:14<00:00,  1.84it/s, f1-score=0.945, loss=0.268]
[2020-11-14 08:33:12,039] 
3/100 * Epoch 3 (train): f1-score=0.9529 | loss=0.1510
3/100 * Epoch 3 (valid): f1-score=0.9414 | loss=0.2100
4/100 * Epoch (train): 100% 234/234 [02:04<00:00,  1.88it/s, 

In [11]:
results = []

for batch in test_loader:
    res = runner.model(batch.to(runner.device))
    res = (torch.sigmoid(res) > 0.5).to(torch.float32).cpu().numpy()
    results.append(res)

In [12]:
df = pd.read_csv(ROOT_DIR + 'data/training_labels.csv')

In [14]:
df_test = pd.DataFrame(np.vstack(results), columns = df.columns[1:], dtype=int)
names = pd.DataFrame({'Name': [Path(el).parts[-1] for el in test_loader.dataset.images]})

In [15]:
submission = pd.concat([names, df_test], axis = 1)
submission.to_csv(SUBMISSION_FILE, index = False)
files.download(SUBMISSION_FILE) 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [16]:
torch.save(runner.model, MODEL_PATH)
files.download(MODEL_PATH)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>