In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import torch
from torch.autograd import Variable
from torch.utils.data.sampler import SubsetRandomSampler
import torch.optim as optim
import torch.nn.functional as F
from torch.optim import lr_scheduler
from torchsummary import summary
import torchvision
from torchvision import datasets, models, transforms
import time
import copy
import math
import os
import glob
import imageio
from PIL import Image
import torch.backends.cudnn as cudnn
import csv


In [None]:
df = pd.read_csv('/home/shah/Desktop/FB-Marketplace-Recommendation-Ranking-System/data/y_label.csv')
df

In [None]:
df.dtypes

In [2]:
class ProductImageCategoryDataset():
    def __init__(self, root_dir, num_classes, transform=None):
        super().__init__()
        self.root_dir = root_dir
        self.data = pd.read_csv("/home/shah/Desktop/FB-Marketplace-Recommendation-Ranking-System/data/y_label.csv")
        self.num_classes = num_classes
        self.transform=transform
    
    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, index):
        image_name, label = self.data[index]
        image_path = {x: datasets.ImageFolder(os.path.join(self.root_dir, image_name, x),
                                          self.transform[x])
                  for x in ['train', 'val']}
        image = Image.open(image_path)
        if self.transform is not None:
            image = self.transform(image)

        #image = np.array(imageio.imread(image_path))
        #label = image_name[index]
        label = int(label)

        return image, label

In [3]:
#split the dataset to train, val, test

cudnn.benchmark = True
plt.ion()   # interactive mode

# Data augmentation and normalization for training
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

root_dir = 'data/data_images/'

dataset = ProductImageCategoryDataset(root_dir, num_classes=13, transform=data_transforms)

image_datasets = {
    'train': 
    datasets.ImageFolder(root_dir + 'train', data_transforms['train']),
    'val': 
    datasets.ImageFolder(root_dir + 'val', data_transforms['val']),
}

dataloaders = {
    'train':
    torch.utils.data.DataLoader(image_datasets['train'],
                                batch_size=4,
                                shuffle=True,
                                num_workers=0), 
    'val':
    torch.utils.data.DataLoader(image_datasets['val'],
                                batch_size=4,
                                shuffle=True,
                                num_workers=0),
}

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device = torch.device("cpu")

TypeError: unsupported operand type(s) for +: 'ProductImageCategoryDataset' and 'str'

In [None]:

examples = {x: (image_datasets) for x in ['train', 'val']}
features, label = examples
print(label)

In [None]:
dataset_sizes

In [None]:
class_names

In [None]:
model = torchvision.models.resnet50(pretrained=True)
for param in model.parameters():
    param.requires_grad = False

#summary(model_ft, (3, 64, 64))
num_fltrs = model.fc.in_features
num_classes = 13
feature_extract = True

model.fc = torch.nn.Linear(num_fltrs, num_classes)

In [None]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=4):
    #Train the model
    
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)
        model.train()

        running_loss = 0.0

        for i, data in enumerate(train_loader['train'], 0):
            #features, label = batch
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()

            with torch.set_grad_enabled(True):
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                scheduler.step()

                running_loss += loss.item() * inputs.size(0)
            

                if (i+1) % 200 == 199:
                    print(f'epoch {epoch+1}/{num_epochs}, step {i+1}/{n_iteration}, loss: {running_loss / 200:.3f}, inputs {inputs.shape}')
                    running_loss = 0.0
        epoch_loss = running_loss / dataset_sizes
        print('Loss: {:.4f}'.format(epoch_loss))

    return model


In [None]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
model_ft = train_model(model, criterion, optimizer, scheduler, num_epochs=20)

In [None]:
#test the model
model.eval()

with torch.no_grad():
        correct = 0
        total = 0
        for batch in train_loader['test']:
                features, label = batch
                test_output = model(features)
                last_layer = test_output
                pred_y = torch.max(test_output, 1)[1].data.squeeze()
                correct += (pred_y == label).sum().item()
                total += pred_y.size(0)
        print('Got %d / %d correct (%.2f)' % (correct, total, float(correct / total)))
        pass

In [None]:
import matplotlib.pyplot as plt

# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()