<a href="https://colab.research.google.com/github/GrigoryBartosh/hse08_ip/blob/master/hw3_neural.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [1]:
cd 'drive/My Drive/ip_hw_3'

/content/drive/My Drive/ip_hw_3


In [0]:
! pip install wandb >> /dev/null

In [0]:
import os
import copy
import zipfile
import pandas as pd

import numpy as np
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import torch.nn.functional as F
from torchvision import models

from PIL import Image

import wandb

from tqdm.auto import tqdm, trange
import matplotlib.pyplot as plt

PATH_DATA = 'tl-signs-hse-itmo-2020-winter.zip'

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

BATCH_SIZE = 256
LR = 0.001
W_L2 = 0
EPOCHS = 30

In [4]:
xs, ys = [], []
with zipfile.ZipFile(PATH_DATA, 'r') as zip_file:
    with zip_file.open('train.csv') as file:
        train_data = pd.read_csv(file)

    for _, x in tqdm(train_data.iterrows()):
        name, lable = x['filename'], x['class_number']
        with zip_file.open(os.path.join('train', 'train', name)) as img_file:
            image = Image.open(img_file).convert('RGB')
        xs += [image]
        ys += [lable]

num_classes = max(ys) + 1

x_train, x_val, y_train, y_val = train_test_split(xs, ys, test_size=0.1)

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




In [0]:
class SiamDataset(data.Dataset):
    def __init__(self, xs, ys):
        self.xs = xs
        self.ys = ys

    def __getitem__(self, index):
        x1 = self.xs[index]
        y1 = self.ys[index]

        index2 = np.random.randint(len(self.xs))
        x2 = self.xs[index2]
        y2 = self.ys[index2]

        x1 = torch.FloatTensor(np.array(x1))
        x2 = torch.FloatTensor(np.array(x2))
        y = torch.FloatTensor([0] if y1 == y2 else [1])

        return x1, x2, y

    def __len__(self):
        return len(self.xs)

In [0]:
def collate_xs(xs):
    xs = torch.stack(xs, axis=0)
    xs = xs * 2 / 255 - 1
    xs = xs.permute(0, 3, 1, 2)

    return xs

def collate_ys(ys):
    ys = torch.cat(ys)
    return ys

def collate_fn(data):
    xs1, xs2, ys = zip(*data)
    
    xs1 = collate_xs(xs1)
    xs2 = collate_xs(xs2)
    ys = collate_ys(ys)

    return xs1, xs2, ys

train_dataset = SiamDataset(x_train, y_train)
val_dataset = SiamDataset(x_val, y_val)

train_data_loader = data.DataLoader(
    dataset=train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=8,
    collate_fn=collate_fn
)
val_data_loader = data.DataLoader(
    dataset=val_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=8,
    collate_fn=collate_fn
)

In [0]:
def conv3(in_planes, out_planes, stride=1, padding=1):
    return nn.Conv2d(in_planes, out_planes, 3, 
                     stride, padding, bias=False)

class FeatureExtractor(nn.Module):
    def __init__(self):
        super(FeatureExtractor, self).__init__()
                 
        self.model = nn.Sequential(
            conv3(3, 16, 2), # 24
            nn.BatchNorm2d(16),
            nn.ReLU(),
            conv3(16, 32, 2), # 12
            nn.BatchNorm2d(32),
            nn.ReLU(),
            conv3(32, 64, 2), # 6
            nn.BatchNorm2d(64),
            nn.ReLU(),
            conv3(64, 128, 2), # 3
            nn.BatchNorm2d(128),
            nn.ReLU(),
            conv3(128, 256, padding=0), # 1
        )
                 
    def forward(self, x):
        x = self.model(x)
        x = x.squeeze()
        return x

class Model(nn.Module):
    def __init__(self, num_classes):
        super(Model, self).__init__()
                 
        self.model = FeatureExtractor()
        
        self.l2 = nn.PairwiseDistance(p=2)
        self.sigmoid = nn.Sigmoid()
                 
    def forward(self, x1, x2):
        x1 = self.model(x1)
        x2 = self.model(x2)
        res = self.sigmoid(self.l2(x1, x2) ** 2)
        return res

In [0]:
def train(model, criterion, optimizer, scheduler, epochs):
    wandb.init(project="hse08_ip_hw_3")

    best_model = copy.deepcopy(model)
    best_loss = 10 ** 10
    for _ in trange(epochs):
        for xs1, xs2, ys in train_data_loader:
            xs1 = xs1.to(device)
            xs2 = xs2.to(device)
            ys = ys.to(device)
            
            optimizer.zero_grad()

            outputs = model(xs1, xs2)
            loss = criterion(outputs, ys)

            wandb.log({'Train loss': loss.item()})

            loss.backward()
            optimizer.step()

        losses = []
        model.eval()
        with torch.no_grad():
            for xs1, xs2, ys in val_data_loader:
                xs1 = xs1.to(device)
                xs2 = xs2.to(device)
                ys = ys.to(device)

                outputs = model(xs1, xs2)
                loss = criterion(outputs, ys)

                losses += [loss.item()]
            
        model.train()

        loss = np.array(losses).mean()
        wandb.log({'Val loss': loss})
        
        if best_loss < loss:
            best_loss = loss
            best_model = copy.deepcopy(model)
        
        if scheduler:
            scheduler.step()

    return best_model

In [9]:
model = Model(num_classes=num_classes)
model.to(device)

criterion = nn.BCELoss()

optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), 
                       LR, weight_decay=W_L2)

model = train(model, criterion, optimizer, None, EPOCHS)

HBox(children=(IntProgress(value=0, max=30), HTML(value='')))




In [0]:
def get_score(xs, ys, x):
    i = 0
    while i < len(xs):
        in = 
        batch = xs[i:]

In [0]:
images = []
with zipfile.ZipFile(PATH_DATA, 'r') as zip_file:
    image_names = [f[10:] for f in zip_file.namelist() if 'test' in f]

    for name in tqdm(image_names):
        with zip_file.open(os.path.join('test', 'test', name)) as img_file:
            image = Image.open(img_file).convert('RGB')
        images += [(name, image)]

model.eval()
predictions = {'filename': [], 'class_number': []}
with torch.no_grad():
    for name, image in tqdm(images):
        x = collate_xs([image])
        x = x.to(device)

        y = model(x)[0].argmax()
        y = y.item()

        predictions['filename'] += [name]
        predictions['class_number'] += [y]

predictions = pd.DataFrame(predictions)
predictions.to_csv('test_neural.csv', index=False)

HBox(children=(IntProgress(value=0, max=7551), HTML(value='')))




HBox(children=(IntProgress(value=0, max=7551), HTML(value='')))


