In [2]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [3]:
!mkdir data
!cp "/content/drive/MyDrive/cleansed_product_images.npy" "data/cleansed_product_images.npy"

In [5]:
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import torchvision
import torch.nn.functional as F
import numpy as np
from PIL import Image
from torchvision.transforms import transforms
from torch.utils.data import SubsetRandomSampler
from torch.utils.tensorboard import SummaryWriter
import time
import copy
import torch.optim as optim
import torch.nn as nn
from torch.optim import lr_scheduler
import torch.nn.functional as F
from google.colab import drive
from tqdm import tqdm
import pickle


device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(f'Using {device} for inference')
products = np.load("data/cleansed_product_images.npy", allow_pickle=True)
products = pd.DataFrame(products, columns=['product_id', 'product_name', 'category', 'product_description', 'price', 'location', 'url', 'page_id', 'create_time', 'local_area', 'city', 'price_gbp', 'sub_cat_0', 'sub_cat_1', 'sub_cat_2', 'sub_cat_3', 'sub_cat_4', 'img_id', 'img_array'])
products.head(5)




Using cpu for inference


Unnamed: 0,product_id,product_name,category,product_description,price,location,url,page_id,create_time,local_area,city,price_gbp,sub_cat_0,sub_cat_1,sub_cat_2,sub_cat_3,sub_cat_4,img_id,img_array
0,243809c0-9cfc-4486-ad12-3b7a16605ba9,Mirror wall art,"Home & Garden / Dining, Living Room Furniture ...","Mirror wall art. Posted by Nisha in Dining, Li...",£5.00,"Wokingham, Berkshire",https://www.gumtree.com/p/mirrors-clocks-ornam...,1426704584,2022-02-26,Wokingham,Berkshire,5.0,Home & Garden,"Dining, Living Room Furniture","Mirrors, Clocks & Ornaments",,,64aa79f3-e9fa-417c-a332-714b8ce933f1,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ..."
1,1c58d3f9-8b93-47ea-9415-204fcc2a22e6,Stainless Steel Food Steamer,Home & Garden / Other Household Goods,Morphy Richard’s (model no 48755)Stainless ste...,£20.00,"Inverness, Highland",https://www.gumtree.com/p/other-household-good...,1426704579,2022-02-26,Inverness,Highland,20.0,Home & Garden,Other Household Goods,,,,4e670f9e-7feb-458f-b529-ac52547abe2b,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ..."
2,1c58d3f9-8b93-47ea-9415-204fcc2a22e6,Stainless Steel Food Steamer,Home & Garden / Other Household Goods,Morphy Richard’s (model no 48755)Stainless ste...,£20.00,"Inverness, Highland",https://www.gumtree.com/p/other-household-good...,1426704579,2022-02-26,Inverness,Highland,20.0,Home & Garden,Other Household Goods,,,,a864ee52-d91e-46e7-94d1-2418e9bb2877,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ..."
3,860673f1-57f6-47ba-8d2f-13f9e05b8f9a,Sun loungers,Home & Garden / Garden & Patio / Outdoor Setti...,I have 2 of these - collection only as I don’t...,£20.00,"Skegness, Lincolnshire",https://www.gumtree.com/p/outdoor-settings-fur...,1426704576,2022-02-26,Skegness,Lincolnshire,20.0,Home & Garden,Garden & Patio,Outdoor Settings & Furniture,,,bfe77c38-c9eb-47fb-b3d6-31ffdefb6ff9,"[[[29, 37, 40], [22, 30, 33], [22, 30, 33], [2..."
4,59948726-29be-4b35-ade5-bb2fd7331856,Coffee side table from Ammunition ammo box hai...,"Home & Garden / Dining, Living Room Furniture ...",Great reclaimed army ammunition box used as co...,£115.00,"Radstock, Somerset",https://www.gumtree.com/p/other-dining-living-...,1426704575,2022-02-26,Radstock,Somerset,115.0,Home & Garden,"Dining, Living Room Furniture",Other,,,a92e56b7-94fc-41b4-ba6c-f2f224f42bb8,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ..."


In [6]:
class ProductImageCategoryDataset(Dataset):
    def __init__(self, data, transform=None):
        super().__init__()
        self.labels = data['sub_cat_0'].to_list()
        self.num_classes = len(set(self.labels))
        self.data_size = len(data['img_id'])
        self.files = data['img_id']
        self.category_encoder = {y: x for (x, y) in enumerate(set(self.labels))}
        self.category_decoder = {x: y for (x, y) in enumerate(set(self.labels))}
        self.transform = transform
        if transform is None:
          self.transform = transforms.Compose([
                transforms.Resize(256),
                transforms.CenterCrop(256),
                transforms.RandomHorizontalFlip(p=0.3),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                    std=[0.229, 0.224, 0.225])
            ])
                               
        assert len(self.files) == len(self.labels)

    def __getitem__(self, index):
        label = self.category_encoder[self.labels[index]]
        label = torch.as_tensor(label)
        image = Image.open('/content/drive/MyDrive/original_256/' + self.files[index] + '.jpg')
        image = self.transform(image)
        return (image, label)

    def __len__(self):
        return self.data_size

In [7]:
# Loading pretrained model
from torchvision import models


class ResNet50(torch.nn.Module):
    def __init__(self,
                num_classes):
        super(ResNet50, self).__init__()
        self.resnet50 = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_resnet50', pretrained=True)
        out_features = self.resnet50.fc.out_features
        self.linear = nn.Linear(out_features, num_classes).to(device)
        self.main = nn.Sequential(self.resnet50, self.linear).to(device)
    
    def forward(self, inp):
        x = self.main(inp)
        return x

In [12]:
batch_size = 8
dataset = ProductImageCategoryDataset(products)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=1)
num_classes = dataset.num_classes
print(num_classes)
model = ResNet50(num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimiser = optim.Adam(model.parameters(), lr = 0.001)
epochs = 5

13


Using cache found in /root/.cache/torch/hub/NVIDIA_DeepLearningExamples_torchhub


In [15]:
for epoch in range(epochs):
    since = time.time()
    hist_accuracy = []
    hist_loss = []
    accuracy = 0
    pbar = tqdm(enumerate(dataloader), total=len(dataloader))
    for i, batch in pbar:
        image_features, labels = batch
        image_features = image_features.to(device)
        labels = labels.to(device)
        optimiser.zero_grad()
        outputs = model(image_features)
        loss = criterion(outputs, labels)
        loss.backward()
        accuracy = torch.sum(torch.argmax(outputs, dim=1) == labels).item()/len(labels)
        hist_accuracy.append(accuracy)
        hist_loss.append(loss.item())
        optimiser.step()
        pbar.set_description(f"Epoch = {epoch+1}/{epochs}. loss = {loss.item():.4f} Acc = {round(torch.sum(torch.argmax(outputs, dim=1) == labels).item()/len(labels), 2)}, Total_acc = {round(np.mean(hist_accuracy), 2)}" )

time_elapsed = time.time() - since
print(time_elapsed)

torch.save(model.state_dict(), 'image_model.pt')

with open('image_decoder.pkl', 'wb') as f:
    pickle.dump(dataset.category_decoder, f)

Epoch = 1/5. loss = 2.7761 Acc = 0.0, Total_acc = 0.1:   1%|▏         | 19/1383 [01:54<2:17:30,  6.05s/it]


KeyboardInterrupt: ignored