<a href="https://colab.research.google.com/github/GrigoryBartosh/hse08_ip/blob/master/hw3_neural.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [2]:
cd 'drive/My Drive/ip_hw_3'

/content/drive/My Drive/ip_hw_3


In [0]:
! pip install wandb >> /dev/null

In [0]:
import os
import zipfile
import pandas as pd

import numpy as np
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import torch.nn.functional as F
from torchvision import models

from PIL import Image

import wandb

from tqdm.auto import tqdm, trange
import matplotlib.pyplot as plt

PATH_DATA = 'tl-signs-hse-itmo-2020-winter.zip'

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

BATCH_SIZE = 128
LR = 0.001
W_L2 = 0
EPOCHS = 100

In [5]:
xs, ys = [], []
with zipfile.ZipFile(PATH_DATA, 'r') as zip_file:
    with zip_file.open('train.csv') as file:
        train_data = pd.read_csv(file)

    for _, x in tqdm(train_data.iterrows()):
        name, lable = x['filename'], x['class_number']
        with zip_file.open(os.path.join('train', 'train', name)) as img_file:
            image = Image.open(img_file).convert('RGB')
        xs += [image]
        ys += [lable]

num_classes = max(ys) + 1

x_train, x_val, y_train, y_val = train_test_split(xs, ys, test_size=0.1)
train_data = list(zip(x_train, y_train))
val_data = list(zip(x_val, y_val))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




In [0]:
def collate_xs(xs):
    xs = np.stack(xs, axis=0)
    xs = torch.tensor(xs, dtype=torch.float32)
    xs = xs * 2 / 255 - 1
    xs = xs.permute(0, 3, 1, 2)

    return xs

def collate_ys(ys):
    ys = torch.LongTensor(ys)
    return ys

def collate_fn(data):
    xs, ys = zip(*data)
    
    xs = collate_xs(xs)
    ys = collate_ys(ys)

    return xs, ys

train_data_loader = data.DataLoader(
    dataset=train_data,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=8,
    collate_fn=collate_fn
)
val_data_loader = data.DataLoader(
    dataset=val_data,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=8,
    collate_fn=collate_fn
)

In [0]:
class Model(nn.Module):
    def __init__(self, num_classes):
        super(Model, self).__init__()
                 
        self.model = models.resnet18(pretrained=True)
        
        for parma in self.model.parameters():
            parma.requires_grad = True

        num_ftrs = self.model.fc.in_features
        self.model.fc = nn.Linear(num_ftrs, num_classes)
                 
    def forward(self, x):
        return self.model(x)

In [0]:
def train(model, criterion, optimizer, scheduler, epochs):
    wandb.init(project="hse08_ip_hw_3")

    for _ in trange(epochs):
        for xs, ys in train_data_loader:
            xs = xs.to(device)
            ys = ys.to(device)
            
            optimizer.zero_grad()

            outputs = model(xs)
            loss = criterion(outputs, ys)

            wandb.log({'Train loss': loss.item()})

            loss.backward()
            optimizer.step()

        losses = []
        accuracy = (0, 0)
        model.eval()
        with torch.no_grad():
            for xs, ys in val_data_loader:
                xs = xs.to(device)
                ys = ys.to(device)

                outputs = model(xs)
                loss = criterion(outputs, ys)
                true_detections = (ys == outputs.argmax(axis=1)).sum().item()
                accuracy = (accuracy[0] + true_detections,
                            accuracy[1] + ys.shape[0])

                losses += [loss.item()]
            
        model.train()

        wandb.log({'Val loss': np.array(losses).mean(),
                   'Val accuracy': accuracy[0] / accuracy[1]})
        
        if scheduler:
            scheduler.step()

In [12]:
model = Model(num_classes=num_classes)
model.to(device)

criterion = nn.CrossEntropyLoss()

optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), 
                       LR, weight_decay=W_L2)

train(model, criterion, optimizer, None, EPOCHS)

<IPython.core.display.Javascript object>

wandb: ERROR Not authenticated.  Copy a key from https://app.wandb.ai/authorize


API Key: ··········


wandb: Appending key for api.wandb.ai to your netrc file: /root/.netrc


HBox(children=(IntProgress(value=0), HTML(value='')))

KeyboardInterrupt: ignored

In [14]:
images = []
with zipfile.ZipFile(PATH_DATA, 'r') as zip_file:
    image_names = [f[10:] for f in zip_file.namelist() if 'test' in f]

    for name in tqdm(image_names):
        with zip_file.open(os.path.join('test', 'test', name)) as img_file:
            image = Image.open(img_file).convert('RGB')
        images += [(name, image)]

model.eval()
predictions = {'filename': [], 'class_number': []}
with torch.no_grad():
    for name, image in tqdm(images):
        x = collate_xs([image])
        x = x.to(device)

        y = model(x)[0].argmax()
        y = y.item()

        predictions['filename'] += [name]
        predictions['class_number'] += [y]

predictions = pd.DataFrame(predictions)
predictions.to_csv('test_neural.csv', index=False)

HBox(children=(IntProgress(value=0, max=7551), HTML(value='')))

HBox(children=(IntProgress(value=0, max=7551), HTML(value='')))