In [None]:
from google.colab import drive
import os

In [None]:
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
!cp --verbose gdrive/MyDrive/yelp_task/yelp_photos.tar yelp_photos.tar

'gdrive/MyDrive/yelp_task/yelp_photos.tar' -> 'yelp_photos.tar'


In [None]:
!tar -xf yelp_photos.tar

In [None]:
!rm yelp_photos.tar

In [None]:
len(os.listdir('photos'))

199999

In [None]:
from __future__ import print_function
from __future__ import division

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

In [None]:
import numpy as np
import pandas as pd
import torchvision
from torchvision import datasets, models, transforms
from PIL import Image
import matplotlib.pyplot as plt

In [None]:
import time
import os
import copy
from tqdm.notebook import tqdm

In [None]:
print("PyTorch Version: ",torch.__version__)
print("Torchvision Version: ",torchvision.__version__)

PyTorch Version:  1.8.1+cu101
Torchvision Version:  0.9.1+cu101


In [None]:
photo_dir = './photos'

In [None]:
# Define Parameters
FLAGS = {}
FLAGS['model_name'] = 'squeezenet'
FLAGS['num_classes'] = 2
FLAGS['batch_size'] = 512
FLAGS['num_workers'] = 2
FLAGS['feature_extract'] = True

In [None]:
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

In [None]:
def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True):
    # Initialize these variables which will be set in this if statement. Each of these
    #   variables is model specific.
    model_ft = None
    input_size = 0

    if model_name == "resnet":
        """ Resnet18
        """
        model_ft = models.resnet18(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.fc.in_features
        modules = list(model_ft.children())[:-1]
        model_ft = nn.Sequential(*modules)
        input_size = 224

    elif model_name == "alexnet":
        """ Alexnet
        """
        model_ft = models.alexnet(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        new_classifier = nn.Sequential(*list(model_ft.classifier.children())[:-2])
        model_ft.classifier = new_classifier
        input_size = 224

    elif model_name == "vgg":
        """ VGG11_bn
        """
        model_ft = models.vgg11_bn(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        model_ft.classifier = model_ft.classifier[:-3]
        input_size = 224

    elif model_name == "squeezenet":
        """ Squeezenet
        """
        model_ft = models.squeezenet1_0(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        model_ft.classifier = model_ft.classifier[:-2]
        input_size = 224

    elif model_name == "densenet":
        """ Densenet
        """
        model_ft = models.densenet121(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier.in_features
        model_ft.classifier = nn.Identity()
        input_size = 224

    else:
        print("Invalid model name, exiting...")
        exit()

    return model_ft, input_size

In [None]:
# Initialize the model for this run
model_ft, input_size = initialize_model(FLAGS['model_name'], FLAGS['num_classes'], FLAGS['feature_extract'], use_pretrained=True)

Downloading: "https://download.pytorch.org/models/squeezenet1_0-a815701f.pth" to /root/.cache/torch/hub/checkpoints/squeezenet1_0-a815701f.pth


HBox(children=(FloatProgress(value=0.0, max=5017600.0), HTML(value='')))




In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model_ft = model_ft.to(device)

In [None]:
print(model_ft)

SqueezeNet(
  (features): Sequential(
    (0): Conv2d(3, 96, kernel_size=(7, 7), stride=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
    (3): Fire(
      (squeeze): Conv2d(96, 16, kernel_size=(1, 1), stride=(1, 1))
      (squeeze_activation): ReLU(inplace=True)
      (expand1x1): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))
      (expand1x1_activation): ReLU(inplace=True)
      (expand3x3): Conv2d(16, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (expand3x3_activation): ReLU(inplace=True)
    )
    (4): Fire(
      (squeeze): Conv2d(128, 16, kernel_size=(1, 1), stride=(1, 1))
      (squeeze_activation): ReLU(inplace=True)
      (expand1x1): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))
      (expand1x1_activation): ReLU(inplace=True)
      (expand3x3): Conv2d(16, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (expand3x3_activation): ReLU(inplace=True)
    )
    (5): Fire(
   

In [None]:
img_transforms = transforms.Compose([
                 transforms.Resize(input_size),
                 transforms.CenterCrop(input_size),
                 transforms.ToTensor(),
                 transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [None]:
class IsRestaurantDataset(Dataset):
    """Is It A Restaurant Dataset."""

    def __init__(self, root_dir, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.root_dir = root_dir
        self.img_names = os.listdir(root_dir)
        self.transform = transform

    def __len__(self):
        return len(self.img_names)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = self.img_names[idx].split('.')[0]
        img_path = os.path.join(self.root_dir,
                                img_name + '.jpg')
        image = Image.open(img_path)

        if self.transform:
            image = self.transform(image)

        return image, img_name

In [None]:
trans_dataset = IsRestaurantDataset(photo_dir, img_transforms)
# Create training and validation dataloaders
trans_loader = torch.utils.data.DataLoader(trans_dataset, batch_size=FLAGS['batch_size'], shuffle=False, num_workers=FLAGS['num_workers'])

In [None]:
squeezenet_dict = {}

In [None]:
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in tqdm(trans_loader):
        images, img_names = data
        images = images.cuda()

        vectors = model_ft(images)
        for idx in range(len(vectors)):
            vector = vectors[idx]
            print(vector[0])
            break
            #squeezenet_dict[img_names[idx]] = vector
        break
     #   print(vector.numpy()[0])
     #   print(vector.shape)
      #  resnet_dict[img_name] = vector

HBox(children=(FloatProgress(value=0.0, max=391.0), HTML(value='')))

In [None]:
np.savez('gdrive/MyDrive/yelp_task/yelp_data/transfer_features/squeezenet_features.npz', squeezenet_dict)