In [1]:
from __future__ import print_function
from __future__ import division

In [2]:
import torch
from torch.utils.data import DataLoader

In [3]:
import pytorch_lightning as pl
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.callbacks import ModelCheckpoint

In [4]:
import torchvision
from torchvision import transforms

In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [6]:
import time
import os
import copy
import json
from tqdm.notebook import tqdm

In [7]:
os.chdir('../')

In [8]:
from imports.lightning_model import init_cls_model
from imports.dataset_loaders import build_datasets, YelpDataset
from imports.metrics import multiclass_stats

In [9]:
import gc
gc.collect()
torch.cuda.empty_cache()

In [10]:
print("PyTorch Version: ",torch.__version__)
print("Torchvision Version: ",torchvision.__version__)
print("PyTorch Lightning Version: ",pl.__version__)

PyTorch Version:  1.8.1+cu102
Torchvision Version:  0.9.1+cu102
PyTorch Lightning Version:  1.1.8


In [11]:
!nvidia-smi -L

/bin/bash: nvidia-smi: command not found


In [12]:
with open('config.json', 'r') as f:
    FLAGS = json.load(f)['lightning']

In [13]:
FLAGS

{'__comment': 'output_dim 2 for business / restaurant, 6 for ambience; class weights [1.0, 1.0]',
 'model_name': 'densenet',
 'photo_dir': 'data/photos',
 'csv_path': 'data/business_ambience.csv',
 'output_dim': 6,
 'batch_size': 512,
 'num_workers': 4,
 'learning_rate': 0.02,
 'max_epochs': 20,
 'feature_extract': True,
 'use_pretrained': True,
 'multilabel': True,
 'threshold': 0.5,
 'class_weight': [3.513, 1.63, 6.084, 9.839, 6.502, 4.625]}

In [14]:
csv_file = pd.read_csv(FLAGS['csv_path'])

In [15]:
yelp_model, input_size = init_cls_model(FLAGS)

In [16]:
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(input_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'dev': transforms.Compose([
        transforms.Resize(input_size),
        transforms.CenterCrop(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize(input_size),
        transforms.CenterCrop(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

In [17]:
print("Initializing Datasets and Dataloaders...")

Initializing Datasets and Dataloaders...


In [18]:
csv_file.head()

Unnamed: 0,photo_id,touristy,hipster,romantic,divey,intimate,upscale
0,ZlTwL6uWx6rW_L9Df5RT8A,False,False,True,False,True,False
1,fHbSMxueQfXFRb9e-6bJuw,False,False,False,True,False,False
2,74oWvVVIjms9LjfHQOgxMQ,False,False,False,False,False,True
3,QY6c1OKsIpujF4MDHQdbag,False,True,False,False,False,False
4,0AYEzNJYFF2PeXo71cpKuw,False,True,False,False,False,False


In [19]:
from sklearn.model_selection import train_test_split

In [20]:
train, test = train_test_split(csv_file, test_size=0.2, random_state=42)
dev, test = train_test_split(test, test_size=0.5, random_state=42)

In [21]:
data_frames = {'train': train,
              'dev': dev,
              'test': test}

In [22]:
dataloaders_dict = build_datasets(FLAGS, data_frames, data_transforms)

In [23]:
dataloaders_dict

{'train': <torch.utils.data.dataloader.DataLoader at 0x7fe81f124b00>,
 'dev': <torch.utils.data.dataloader.DataLoader at 0x7fe81f124a58>,
 'test': <torch.utils.data.dataloader.DataLoader at 0x7fe81f124da0>}

In [24]:
checkpoint_callback = ModelCheckpoint(monitor='val_loss')
early_stop_callback = EarlyStopping(monitor='val_loss', patience=3)

In [25]:
trainer = pl.Trainer(callbacks=[checkpoint_callback, early_stop_callback],
                     progress_bar_refresh_rate=1, max_epochs=FLAGS['max_epochs'], #)
                     stochastic_weight_avg=True, precision=16, gpus=-1)  # gpu-only parameters

TypeError: __init__() got an unexpected keyword argument 'stochastic_weight_avg'

In [29]:
trainer.fit(yelp_model, dataloaders_dict['train'], dataloaders_dict['dev'])

Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

1

In [30]:
if 'checkpoints' not in os.listdir('working'):
    os.mkdir('working/checkpoints')
trainer.save_checkpoint(f"working/checkpoints/{FLAGS['model_name']}_amb.ckpt")

In [31]:
print(checkpoint_callback.best_model_path)

/kaggle/lightning_logs/version_0/checkpoints/epoch=4-step=259.ckpt


In [33]:
yelp_model, _ = init_cls_model(FLAGS, checkpoint_callback.best_model_path)

In [34]:
trainer.test(yelp_model, dataloaders_dict['test'])

Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_loss': 1.1109058856964111}
--------------------------------------------------------------------------------


[{'test_loss': 1.1109058856964111}]

In [35]:
model = yelp_model.eval().cuda(device=0)

In [65]:
y_pred, y_true = [], []

In [66]:
test_dataset = YelpDataset(test, FLAGS['photo_dir'], data_transforms['test'])

In [67]:
for image, labels in tqdm(test_dataset):
    output = model(image.unsqueeze(0).cuda(0)).cpu().data.numpy()[0]
    if output.size == 1:
        out_array = np.zeros(FLAGS['num_classes']).astype('int')
        out_array[output] = 1
        y_pred.append(out_array)
    else:
        y_pred.append(output)
    y_true.append(labels.numpy().astype('int'))

  0%|          | 0/3293 [00:00<?, ?it/s]

In [68]:
cls_report, stats = multiclass_stats(np.array(y_true), np.array(y_pred))
print(cls_report)

              precision    recall  f1-score   support

           0       0.22      1.00      0.37       739
           1       0.38      1.00      0.55      1256
           2       0.14      1.00      0.25       473
           3       0.08      1.00      0.15       274
           4       0.13      1.00      0.23       429
           5       0.17      1.00      0.30       571

   micro avg       0.19      1.00      0.32      3742
   macro avg       0.19      1.00      0.31      3742
weighted avg       0.24      1.00      0.37      3742
 samples avg       0.19      1.00      0.31      3742



In [69]:
stats

{'jaccard_score': 0.18939163882984106,
 'hamming_loss': 0.8106083611701589,
 'f1_score': 0.3082786700080738,
 'roc_auc_score': 0.5}

In [None]:
os.listdir('working/checkpoints')

In [None]:
from IPython.display import FileLink
os.chdir('working')
FileLink(f"checkpoints/{FLAGS['model_name']}_amb.ckpt")