In [None]:
from google.colab import drive
import os

In [None]:
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


In [None]:
!cp --verbose gdrive/MyDrive/yelp_task/photo_tars/br_photos.tar br_photos.tar

'gdrive/MyDrive/yelp_task/photo_tars/br_photos.tar' -> 'br_photos.tar'


In [None]:
!tar -xf br_photos.tar

In [None]:
!rm br_photos.tar

In [None]:
assert len(os.listdir('photos')) == 59556

In [None]:
!pip install pytorch-lightning

Collecting pytorch-lightning
[?25l  Downloading https://files.pythonhosted.org/packages/07/0c/e2d52147ac12a77ee4e7fd7deb4b5f334cfb335af9133a0f2780c8bb9a2c/pytorch_lightning-1.2.10-py3-none-any.whl (841kB)
[K     |████████████████████████████████| 849kB 4.0MB/s 
[?25hCollecting torchmetrics==0.2.0
[?25l  Downloading https://files.pythonhosted.org/packages/3a/42/d984612cabf005a265aa99c8d4ab2958e37b753aafb12f31c81df38751c8/torchmetrics-0.2.0-py3-none-any.whl (176kB)
[K     |████████████████████████████████| 184kB 8.7MB/s 
[?25hCollecting future>=0.17.1
[?25l  Downloading https://files.pythonhosted.org/packages/45/0b/38b06fd9b92dc2b68d58b75f900e97884c45bedd2ff83203d933cf5851c9/future-0.18.2.tar.gz (829kB)
[K     |████████████████████████████████| 829kB 8.2MB/s 
Collecting PyYAML!=5.4.*,>=5.1
[?25l  Downloading https://files.pythonhosted.org/packages/64/c2/b80047c7ac2478f9501676c988a5411ed5572f35d1beff9cae07d321512c/PyYAML-5.3.1.tar.gz (269kB)
[K     |█████████████████████████████

In [None]:
!nvidia-smi -L

NVIDIA-SMI has failed because it couldn't communicate with the NVIDIA driver. Make sure that the latest NVIDIA driver is installed and running.



In [None]:
from __future__ import print_function
from __future__ import division

In [None]:
import torch
from torch.utils.data import DataLoader

In [None]:
import pytorch_lightning as pl
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.callbacks import ModelCheckpoint

In [None]:
import torchvision
from torchvision import transforms

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
import time
#import os
import copy
from tqdm.notebook import tqdm

In [None]:
from gdrive.MyDrive.yelp_task.lightning_model import LightningTransfer, init_cls_model
from gdrive.MyDrive.yelp_task.dataset_loaders import IsRestaurantDataset

In [None]:
import gc
gc.collect()
torch.cuda.empty_cache()

In [None]:
print("PyTorch Version: ",torch.__version__)
print("Torchvision Version: ",torchvision.__version__)
print("PyTorch Lightning Version: ",pl.__version__)

PyTorch Version:  1.8.1+cu101
Torchvision Version:  0.9.1+cu101
PyTorch Lightning Version:  1.2.10


In [None]:
photo_dir = 'photos/'
csv_path = "gdrive/MyDrive/yelp_task/yelp_data/business_restaurant.csv"

In [None]:
FLAGS = {
    'model_name': 'resnet',
    'num_classes': 2,
    'batch_size': 512,  # 512 / N gpu
    'num_workers': 4,  # 4 per gpu
    'learning_rate': 0.02,  # 0.02 * N gpu
    'max_epochs': 10,  # arbitrary
    'feature_extract': True,
    'use_pretrained': True
        }

In [None]:
transfer_model, input_size = init_cls_model(FLAGS)

In [None]:
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(input_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'dev': transforms.Compose([
        transforms.Resize(input_size),
        transforms.CenterCrop(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize(input_size),
        transforms.CenterCrop(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

In [None]:
print("Initializing Datasets and Dataloaders...")

Initializing Datasets and Dataloaders...


In [None]:
csv_file = pd.read_csv(csv_path)

In [None]:
csv_file.head()

Unnamed: 0,photo_id,is_business
0,3V7tgMx3Qw5L9ZjRLNbthA,True
1,fZo1owoYqwAHW7uZlTz1XQ,False
2,zwOCQ8w3gFuF3zi_dyIWpw,False
3,hQBfeDngFMpB9HX2CPKtag,True
4,Fjh4N5B38vJWVbuQk-v3aQ,True


In [None]:
from sklearn.model_selection import train_test_split

In [None]:
train, test = train_test_split(csv_file, test_size=0.2, random_state=42, stratify=csv_file.is_business)

In [None]:
dev, test = train_test_split(test, test_size=0.5, random_state=42, stratify=test.is_business)

In [None]:
data_frames = {'train': train,
              'dev': dev,
              'test': test}

In [None]:
# Create training and validation datasets
image_datasets = {x: IsRestaurantDataset(data_frames[x], photo_dir, data_transforms[x]) for x in ['train', 'dev', 'test']}
# Create training and validation dataloaders
dataloaders_dict = {x: DataLoader(image_datasets[x],
                                  batch_size=FLAGS['batch_size'],
                                  shuffle=True,
                                  num_workers=FLAGS['num_workers'],
                                  pin_memory=True) for x in ['train', 'dev']}
dataloaders_dict['test'] = DataLoader(image_datasets['test'],
                                      batch_size=FLAGS['batch_size'],
                                      shuffle=False,
                                      num_workers=FLAGS['num_workers'],
                                      pin_memory=True)

In [None]:
dataloaders_dict

{'dev': <torch.utils.data.dataloader.DataLoader at 0x7f7fe1130ad0>,
 'test': <torch.utils.data.dataloader.DataLoader at 0x7f7fe1130a90>,
 'train': <torch.utils.data.dataloader.DataLoader at 0x7f7fe1130890>}

In [None]:
checkpoint_callback = ModelCheckpoint(monitor='val_loss')
early_stop_callback = EarlyStopping(monitor='val_loss', patience=2)

In [None]:
trainer = pl.Trainer(callbacks=[checkpoint_callback, early_stop_callback],
                     progress_bar_refresh_rate=1,)
                  #   stochastic_weight_avg=True, precision=16, max_epochs=FLAGS['max_epochs'], gpus=-1)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores


In [None]:
trainer.fit(transfer_model, dataloaders_dict['train'], dataloaders_dict['dev'])


  | Name  | Type   | Params
---------------------------------
0 | model | ResNet | 11.2 M
---------------------------------
1.0 K     Trainable params
11.2 M    Non-trainable params
11.2 M    Total params
44.710    Total estimated model params size (MB)


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…



HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…




1

In [None]:
trainer.save_checkpoint(f"gdrive/MyDrive/yelp_task/yelp_data/checkpoints/{FLAGS['model_name']}.ckpt")

In [None]:
print(checkpoint_callback.best_model_path)

/content/lightning_logs/version_0/checkpoints/epoch=2-step=281.ckpt


In [None]:
transfer_model, _ = init_cls_model(FLAGS, checkpoint_callback.best_model_path)

In [None]:
trainer.test(transfer_model, dataloaders_dict['test'])

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Testing', layout=Layout(flex='2'), max=…


--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_loss': 0.7108507752418518}
--------------------------------------------------------------------------------


[{'test_loss': 0.7108507752418518}]

In [None]:
model = transfer_model.eval().cuda(device=0)

In [None]:
preds, labels = [], []

In [None]:
for image, is_restaurant in tqdm(image_datasets['test']):
    output = model(image.unsqueeze(0).cuda(0)).data.cpu().numpy()
    preds.append(output[0])
    labels.append(is_restaurant)

HBox(children=(FloatProgress(value=0.0, max=5956.0), HTML(value='')))




In [None]:
from sklearn.metrics import accuracy_score, f1_score

In [None]:
accuracy_score(labels, preds)

0.6633646742780389

In [None]:
f1_score(labels, preds)

0.6895804304071836