In [1]:
from google.colab import drive
import os

In [2]:
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


In [3]:
!pip install pytorch-lightning

Collecting pytorch-lightning
[?25l  Downloading https://files.pythonhosted.org/packages/0b/87/1dda4ba592b66b2cd53854f5092e2ed3da5b41b64fb32e6388db689094a3/pytorch_lightning-1.3.0-py3-none-any.whl (804kB)
[K     |████████████████████████████████| 808kB 18.4MB/s 
Collecting pyDeprecate==0.3.0
  Downloading https://files.pythonhosted.org/packages/14/52/aa227a0884df71ed1957649085adf2b8bc2a1816d037c2f18b3078854516/pyDeprecate-0.3.0-py3-none-any.whl
Collecting fsspec[http]>=2021.4.0
[?25l  Downloading https://files.pythonhosted.org/packages/e9/91/2ef649137816850fa4f4c97c6f2eabb1a79bf0aa2c8ed198e387e373455e/fsspec-2021.4.0-py3-none-any.whl (108kB)
[K     |████████████████████████████████| 112kB 54.2MB/s 
Collecting torchmetrics>=0.2.0
[?25l  Downloading https://files.pythonhosted.org/packages/3b/e8/513cd9d0b1c83dc14cd8f788d05cd6a34758d4fd7e4f9e5ecd5d7d599c95/torchmetrics-0.3.2-py3-none-any.whl (274kB)
[K     |████████████████████████████████| 276kB 51.8MB/s 
Collecting future>=0.17.1
[

In [4]:
from __future__ import print_function
from __future__ import division

In [5]:
import torch
from torch.utils.data import DataLoader

In [6]:
import pytorch_lightning as pl
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.callbacks import ModelCheckpoint

In [7]:
import torchvision
from torchvision import transforms

In [8]:
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [9]:
import time
import os
import copy
from tqdm.notebook import tqdm

In [10]:
os.listdir()

['.config', 'gdrive', 'sample_data']

In [11]:
from gdrive.MyDrive.yelp_task.lightning_model import init_small_model
from gdrive.MyDrive.yelp_task.dataset_loaders import ObjDataset
from gdrive.MyDrive.yelp_task.metrics import multiclass_stats

In [12]:
import gc
gc.collect()
torch.cuda.empty_cache()

In [13]:
print("PyTorch Version: ",torch.__version__)
print("Torchvision Version: ",torchvision.__version__)
print("PyTorch Lightning Version: ",pl.__version__)

PyTorch Version:  1.8.1+cu101
Torchvision Version:  0.9.1+cu101
PyTorch Lightning Version:  1.3.0


In [14]:
!nvidia-smi -L

GPU 0: Tesla T4 (UUID: GPU-58bae768-a281-bdf6-666d-34e39119c01d)


In [15]:
FLAGS = {
    'trf_dim': 1024,
    'obj_dim': 182,
    'trf_path': 'gdrive/MyDrive/yelp_task/yelp_data/transfer_features/densenet_features.npz',
    'object_path': 'gdrive/MyDrive/yelp_task/yelp_data/img_objects.pickle',
    'csv_path': 'gdrive/MyDrive/yelp_task/yelp_data/business_restaurant.csv',
    'output_dim': 2, #6,
    'batch_size': 512, # 512 / N gpu
    'num_workers': 4,  # 4 per gpu
    'learning_rate': 0.02,  # 0.02 * N gpu
    'max_epochs': 20,  # arbitrary
    'multilabel': False,
    'threshold': 0.5,
    'class_weight': [1.0, 1.0]  #[3.513, 1.63, 6.084, 9.839, 6.502, 4.625]
        }

In [16]:
csv_file = pd.read_csv(FLAGS['csv_path'])

In [17]:
yelp_model = init_small_model(FLAGS)

In [18]:
print("Initializing Datasets and Dataloaders...")

Initializing Datasets and Dataloaders...


In [19]:
csv_file.head()

Unnamed: 0,photo_id,is_business
0,3V7tgMx3Qw5L9ZjRLNbthA,True
1,fZo1owoYqwAHW7uZlTz1XQ,False
2,zwOCQ8w3gFuF3zi_dyIWpw,False
3,hQBfeDngFMpB9HX2CPKtag,True
4,Fjh4N5B38vJWVbuQk-v3aQ,True


In [20]:
from sklearn.model_selection import train_test_split

In [21]:
train, test = train_test_split(csv_file, test_size=0.2, random_state=42)
dev, test = train_test_split(test, test_size=0.5, random_state=42)

In [22]:
dataframes = {'train': train,
              'dev': dev,
              'test': test}

In [23]:
trf_features = np.load(FLAGS['trf_path'], allow_pickle=True)['arr_0'][()]

In [24]:
with open(FLAGS['object_path'], 'rb') as op:
    object_features = pickle.load(op)

In [25]:
object_features = {key[0]: value for key, value in object_features.items()}

In [26]:
datasets = {x: ObjDataset(dataframes[x], trf_features, object_features) for x in dataframes}

In [27]:
dataloaders_dict = {x: DataLoader(datasets[x],
                                  batch_size=FLAGS['batch_size'],
                                  shuffle=True,
                                  num_workers=FLAGS['num_workers'],
                                  pin_memory=True) for x in ['train', 'dev']}
dataloaders_dict['test'] = DataLoader(datasets['test'],
                                      batch_size=FLAGS['batch_size'],
                                      shuffle=False,
                                      num_workers=FLAGS['num_workers'],
                                      pin_memory=True)

In [28]:
checkpoint_callback = ModelCheckpoint(monitor='val_loss')
early_stop_callback = EarlyStopping(monitor='val_loss', patience=2)

In [29]:
trainer = pl.Trainer(callbacks=[checkpoint_callback, early_stop_callback],
                     progress_bar_refresh_rate=1, max_epochs=FLAGS['max_epochs'],
                     stochastic_weight_avg=True, precision=16, gpus=-1)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.


In [30]:
trainer.fit(yelp_model, dataloaders_dict['train'], dataloaders_dict['dev'])

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type             | Params
-----------------------------------------------
0 | model     | SmallLinear      | 2.4 K 
1 | criterion | CrossEntropyLoss | 0     
-----------------------------------------------
2.4 K     Trainable params
0         Non-trainable params
2.4 K     Total params
0.010     Total estimated model params size (MB)


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…



HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

Traceback (most recent call last):
  File "/usr/lib/python3.7/multiprocessing/util.py", line 300, in _run_finalizers
    finalizer()
  File "/usr/lib/python3.7/multiprocessing/util.py", line 224, in __call__
    res = self._callback(*self._args, **self._kwargs)
  File "/usr/lib/python3.7/multiprocessing/queues.py", line 202, in _finalize_close
    notempty.notify()
  File "/usr/lib/python3.7/threading.py", line 352, in notify
    waiter.release()
RuntimeError: release unlocked lock


In [31]:
print(checkpoint_callback.best_model_path)




In [32]:
yelp_model = init_small_model(FLAGS)#, checkpoint_callback.best_model_path)

In [33]:
trainer.test(yelp_model, dataloaders_dict['test'])

KeyboardInterrupt: ignored

In [None]:
model = yelp_model.eval().cuda(device=0)

In [None]:
y_pred, y_true = [], []

In [None]:
test_dataset = datasets['test']

In [None]:
for vec, labels in tqdm(test_dataset):
    output = model(vec.unsqueeze(0).cuda(0)(0).cuda(0)).cpu().data.numpy()[0]
    y_pred.append(output)
    y_true.append(labels.numpy().astype('int'))

In [None]:
from sklearn.metrics import accuracy_score, f1_score

In [None]:
y_pred = [np.argmax(p) for p in y_pred]

In [None]:
accuracy_score(y_true, y_pred)

In [None]:
f1_score(y_true, y_pred)