In [None]:
from __future__ import print_function
from __future__ import division

In [None]:
import torch
from torch.utils.data import DataLoader

In [None]:
import pytorch_lightning as pl
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.callbacks import ModelCheckpoint

In [None]:
import torchvision
from torchvision import transforms

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
import time
import os
import copy
from tqdm.notebook import tqdm

In [None]:
os.chdir('../')

In [None]:
from input.yelpimports.lightning_model import init_cls_model
from input.yelpimports.dataset_loaders import build_datasets, YelpDataset
from input.yelpimports.metrics import multiclass_stats

In [None]:
import gc
gc.collect()
torch.cuda.empty_cache()

In [None]:
print("PyTorch Version: ",torch.__version__)
print("Torchvision Version: ",torchvision.__version__)
print("PyTorch Lightning Version: ",pl.__version__)

In [None]:
!nvidia-smi -L

In [None]:
FLAGS = {
    'model_name': 'custom',
    'photo_dir': 'input/yelpbusinessrestaurant/br_photos/photos/',
    'csv_path': 'input/yelpbusinessrestaurant/business_restaurant.csv',
    'output_dim': 2, #6,
    'batch_size': 512, # 512 / N gpu
    'num_workers': 4,  # 4 per gpu
    'learning_rate': 0.02,  # 0.02 * N gpu
    'max_epochs': 20,  # arbitrary
    'feature_extract': True,
    'use_pretrained': True,
    'multilabel': False,
    'threshold': 0.5,
    'class_weight': [1,1]
        }

In [None]:
csv_file = pd.read_csv(FLAGS['csv_path'])

In [None]:
assert len(os.listdir(FLAGS['photo_dir'])) == len(csv_file)

In [None]:
yelp_model, input_size = init_cls_model(FLAGS)

In [None]:
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(input_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'dev': transforms.Compose([
        transforms.Resize(input_size),
        transforms.CenterCrop(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize(input_size),
        transforms.CenterCrop(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

In [None]:
print("Initializing Datasets and Dataloaders...")

In [None]:
csv_file.head()

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
train, test = train_test_split(csv_file, test_size=0.2, random_state=42)
dev, test = train_test_split(test, test_size=0.5, random_state=42)

In [None]:
data_frames = {'train': train,
              'dev': dev,
              'test': test}

In [None]:
dataloaders_dict = build_datasets(FLAGS, data_frames, data_transforms)

In [None]:
dataloaders_dict

In [None]:
checkpoint_callback = ModelCheckpoint(monitor='val_loss')
early_stop_callback = EarlyStopping(monitor='val_loss', patience=2)

In [None]:
trainer = pl.Trainer(callbacks=[checkpoint_callback, early_stop_callback],
                     progress_bar_refresh_rate=1, max_epochs=FLAGS['max_epochs'],
                     stochastic_weight_avg=True, precision=16, gpus=-1)

In [None]:
trainer.fit(yelp_model, dataloaders_dict['train'], dataloaders_dict['dev'])

In [None]:
if 'checkpoints' not in os.listdir('working'):
    os.mkdir('working/checkpoints')
trainer.save_checkpoint(f"working/checkpoints/{FLAGS['model_name']}_amb.ckpt")

In [None]:
print(checkpoint_callback.best_model_path)

In [None]:
yelp_model, _ = init_cls_model(FLAGS, checkpoint_callback.best_model_path)

In [None]:
trainer.test(yelp_model, dataloaders_dict['test'])

In [None]:
model = yelp_model.eval().cuda(device=0)

In [None]:
y_pred, y_true = [], []

In [None]:
test_dataset = YelpDataset(test, FLAGS['photo_dir'], data_transforms['test'])

In [None]:
for image, labels in tqdm(test_dataset):
    output = model(image.unsqueeze(0).cuda(0)).cpu().data.numpy()[0]
    y_pred.append(output)
    y_true.append(labels.numpy().astype('int'))

In [None]:
from sklearn.metrics import accuracy_score, f1_score

In [None]:
accuracy_score(y_true, y_pred)

In [None]:
f1_score(y_true, y_pred)

In [None]:
os.listdir('working/checkpoints')

In [None]:
from IPython.display import FileLink
os.chdir('working')
FileLink(r"checkpoints/custom_amb.ckpt")