In [1]:
from time import sleep
import pickle
import os
import shutil
from collections import Counter
from threading import Thread
import queue
from collections import defaultdict
from tqdm.auto import tqdm
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.metrics.pairwise import cosine_similarity

from src.utils import load_model, get_dataloaders, load_images_in_folder, show_images, modify_keys, save_results, transforms
from src.train import train_model

In [2]:
lr = 0.0001
batch_size = 64
num_epochs = 30
num_classes = 2
TRAIN_PATH = '/mnt/hdd/1/imageData/train/lanNonLan/'
TEST_PATH = '/mnt/hdd/1/imageData/index/lanNonLan/'
        
state_path = 'state_binary.pkl'
model_name = 'landNonLandmark'

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
datasets, dataloaders = get_dataloaders(TRAIN_PATH, TEST_PATH, batch_size)

cuda:0


In [3]:
class ResnetClassifier(nn.Module):
    def __init__(self):
        super(ResnetClassifier, self).__init__()
        self.model = torchvision.models.resnet50(pretrained=True)
        in_features = self.model.fc.in_features
        self.model.fc = nn.Sequential(nn.Linear(in_features, 512), 
                                      nn.ReLU(), 
                                      nn.Dropout(0.4),
                                      nn.Linear(512, num_classes))
        
        
    def __call__(self, x):
        y = self.model(x)
        return y
    
    def check_predictions(self, dataloader):
        ys = []
        pred = []
        with torch.no_grad():
            for x, y in tqdm(dataloader):
                output = self(x.to(device))
                pred.append(torch.argmax(output, dim=1))
                ys.extend(y)
        correct = {}
        pred = torch.cat(pred).cpu()

        for y, p in zip(ys, pred.cpu()):
            correct[y.item()] = correct.get(y.item(), np.array([0, 0])) + np.array([y == p, 1])
        return accuracy_score(ys, pred), correct
        
    def confusion_matrix(self, dataloader):
        ys = []
        pred = []
        with torch.no_grad():
            for x, y in dataloader:
                output = self(x.to(device))
                pred.append(torch.argmax(output, dim=1))
                ys.extend(y)
        return confusion_matrix(ys, torch.cat(pred).cpu())
    
    def predictions_for_class(self, x):
        with torch.no_grad():
            output = self(x.to(device))
            return torch.sort(torch.softmax(output.cpu(), dim=1), dim=1)

In [4]:
model = ResnetClassifier().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.9)
criterion = nn.CrossEntropyLoss()
train_loss, val_loss = train_model(dataloaders, device, model, criterion, optimizer, state_path, model_name, num_epochs=num_epochs,
                                  continue_train=False, scheduler=scheduler)

HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))

RuntimeError: CUDA out of memory. Tried to allocate 226.00 MiB (GPU 0; 7.93 GiB total capacity; 981.86 MiB already allocated; 190.56 MiB free; 36.14 MiB cached)

In [None]:
with open(state_path, 'rb') as f:
    state = pickle.load(f)
train_loss = state['loss']
val_loss = state['val_losses']

plt.plot(np.arange(len(train_loss)), train_loss)
plt.plot(np.arange(len(val_loss)), val_loss)
plt.legend(('train', 'validation'));

In [None]:
state['accuracy']

In [None]:
files = os.listdir(TRAIN_PATH + 'nonLan')
a = Image.open(os.path.join(TRAIN_PATH + 'nonLan/' + files[np.random.choice(len(files))]))
plt.imshow(a)

In [None]:
model2 = ResnetClassifier().to(device)
model2 = load_model(model2, model_name, 6)
model2.eval()
_, dataloader = get_dataloaders('/mnt/hdd/1/imageData/train/russianDataCleanAdded', 
                       '/mnt/hdd/1/imageData/index/russianDataCleanAdded', 
                       1)

In [None]:
results = []
for i, (x, y) in tqdm(enumerate(dataloader['train'])):
    with torch.no_grad():
        res = model2(x.to(device)).softmax(dim=1)
        mask = res[:, 0] < res[:, 1]
        if res[:, 0] < res[:, 1]:
            results.append(dataloader['train'].dataset.samples[i])

In [None]:
with open('nonLan.pickle', 'wb') as f:
    pickle.dump(results, f)

In [None]:
len(results)

In [None]:
results_dict = defaultdict(list)
for r in results:
    clazz = r[0].split('/')[-2]
    results_dict[clazz].append(r[0])
    
def show(img, size=3):
    plt.figure(figsize=(size,size))
    ax = plt.axes([0,0,1,1], frameon=False)
    ax.set_axis_off()
    plt.imshow(img)

items = sorted(results_dict.values(), key=len)

# for k, vv in results_dict.items():
#     os.mkdir('../Bad examples/' + k)
#     for v in vv:
#         shutil.copy(v, '../Bad examples/' + k + "/" + v.split('/')[-1])

i = 4
print(len(items[-i]))
for v in items[-i]:
    show(Image.open(v))


In [None]:
len(items)