In [None]:
#create image classification model
#first create an image dataset with correct labelling
#load batches of data samples in pytorch and use cleaned images dataset

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tqdm import tqdm
import torch
import torchvision
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader, Subset
from torch.utils.data.sampler import SubsetRandomSampler
import pickle
import random
import torch.optim as optim
from torch.optim import lr_scheduler
from torchsummary import summary
import torchvision
import torchdata as td
from torch.utils.tensorboard import SummaryWriter
import time
import copy

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class ProductImageCategoryDataset():
    def __init__(self, learning_rate = 1e-3):
        super().__init__()
        self.X = pd.read_pickle('data/image_model_X.pkl')
        #self.X['image_array'] = self.X.values.tolist()
        #self.X = self.X['image_array']
        self.y = pd.read_pickle('data/image_model_y.pkl')
        #print(train_X.shape)
        #print(len(self.X))
        #print(len(self.y))
        assert len(self.X) == len(self.y)

    def __getitem__(self, index):
        features = self.X.iloc[index]
        label = self.y.iloc[index]
        #print(index)
        features = torch.tensor(features).float()
        #3=num of batch(get 3 images at every iteration of training the network)
        features = features.reshape(3, 64, 64)
        #print(features.shape)
        label = int(label)
        
        return (features, label)
    
    def __len__(self):
        return len(self.X)

dataset = ProductImageCategoryDataset()
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
#print(len(dataset))
#dataset[12667]
#for idx in range(len(dataset)):
    #example = dataset[idx]
    #features, label = example
    #print(features.shape)
    #print(label)
  #  break

In [3]:
train_loader = torch.utils.data.DataLoader(dataset, batch_size=16, shuffle=True, num_workers=1)

In [None]:
random_seed = 42
test_split_size = 0.2
shuffle_dataset = True

dataset_size = len(dataset)
indices = list(range(dataset_size))
split = int(np.floor(test_split_size * dataset_size))
if shuffle_dataset:
    np.random.seed(random_seed)
    np.random.shuffle(indices)
train_indices, test_indices = indices[split:], indices[:split]

train_sampler = SubsetRandomSampler(train_indices)
test_sampler = SubsetRandomSampler(test_indices)



In [None]:
train_loader = {
    'train' : torch.utils.data.DataLoader(dataset, batch_size=16, sampler=train_sampler, num_workers=1),
    'test' : torch.utils.data.DataLoader(dataset, batch_size=16, sampler=test_sampler, num_workers=1)
}

In [None]:
def imshow(inp, title=None):
    inp = inp.cpu() if device else inp
    inp = inp.numpy().transpose((1, 2, 0))

    mean = np.array([0.485, 0.456, 0.406]) 
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)
    
images, label = next(iter(train_loader['train'])) 
print("images-size:", images.shape)

out = torchvision.utils.make_grid(images)
print("out-size:", out.shape)

imshow(out, title=[label])

In [None]:
#class CNN(torch.nn.Module):
 #   def __init__(self):
  #      super(CNN, self).__init__()
        #self.layers = torch.nn.Sequential(
            #3 input image channels, 8 output channels,
            #9x9 square convolution kernel
   #     self.conv1 = torch.nn.Conv2d(3, 6, 5)
            #Max pooling over a (2, 2) window, #kernel and stride=2
    #    self.maxpool = torch.nn.MaxPool2d(2, 2)
     #   self.conv2 = torch.nn.Conv2d(6, 16, 5)
            #torch.nn.Flatten(), #flatten
      #  self.fc1 = torch.nn.Linear(2704, 120) #simplifiy further with linear layers - output channel((128-5)/1+1) = 124, 124/2=62 so 62*62*6=23064
      #  self.fc2 = torch.nn.Linear(120, 84)
      #  self.fc3 = torch.nn.Linear(84, 14)
      #  torch.nn.Softmax(dim=1)

    #def forward(self, x):
     #   x = self.maxpool(F.relu(self.conv1(x)))
      #  x = self.maxpool(F.relu(self.conv2(x)))
       # x = torch.flatten(x, 1) # flatten all dimensions except batch
       # x = F.relu(self.fc1(x))
       # x = F.relu(self.fc2(x))
       # x = self.fc3(x)  
       # return x

#model = CNN()
#criterion = torch.nn.CrossEntropyLoss()
#optimizer = Optimizer = optim.Adadelta(model.parameters(), lr=0.001)



In [5]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=10):
#Train the model
    batch_loss =[]
    writer = SummaryWriter()
    writer2 = SummaryWriter()
    total_step = len(train_loader)
    for epoch in range(num_epochs):
        running_loss = 0.0
        for i, batch in enumerate(train_loader):
        
            features, label = batch
            #features = features.to(device)
            #label = label.to(device)

            outputs = model(features)
            loss = criterion(outputs, label)

   
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            batch_loss.append(loss.item())
            if (i+1) % 100 == 0:
                print('[%d, %5d] loss: %.3f' % (epoch +1, i+1, running_loss / 200))
                running_loss = 0.0
                #print('Epoch [{}/{}, Step [{}/{}], Loss: {:.4f}'.format(epoch + 1, num_epochs, i + 1, total_step, loss.item()))
        avg_loss = sum(batch_loss[-3:])/3
        print('epoch: %d, avg_loss: %.3f' % (epoch + 1, avg_loss))
        writer2.add_scalar('Avg Loss', avg_loss, epoch)
    writer.flush()
    writer2.flush()
    #print('{} loss: {: {:.4f}, acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
        
    print(outputs)
model_ft = torchvision.models.resnet50(pretrained=True)
for param in model_ft.parameters():
    param.requires_grad = False

#summary(model_ft, (3, 64, 64))
num_fltrs = model_ft.fc.in_features

model_ft.fc = torch.nn.Linear(num_fltrs, 13)
model_ft = model_ft.to(device)

criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model_ft.parameters(), lr=0.001)

scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
model = train_model(model_ft, criterion, optimizer, scheduler, num_epochs=10)

#still getting more losses improve the loop and fine tune the network further

2022-05-26 18:53:03.073873: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-05-26 18:53:03.073892: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


[1,   100] loss: 1.427
[1,   200] loss: 1.393
[1,   300] loss: 1.418
[1,   400] loss: 1.418
[1,   500] loss: 1.436
[1,   600] loss: 1.434
[1,   700] loss: 1.434
epoch: 1, avg_loss: 2.841
[2,   100] loss: 1.375
[2,   200] loss: 1.396
[2,   300] loss: 1.414
[2,   400] loss: 1.390
[2,   500] loss: 1.437
[2,   600] loss: 1.421
[2,   700] loss: 1.413
epoch: 2, avg_loss: 2.932
[3,   100] loss: 1.379
[3,   200] loss: 1.374
[3,   300] loss: 1.403
[3,   400] loss: 1.391
[3,   500] loss: 1.398
[3,   600] loss: 1.412
[3,   700] loss: 1.398
epoch: 3, avg_loss: 2.909
[4,   100] loss: 1.360
[4,   200] loss: 1.383
[4,   300] loss: 1.367
[4,   400] loss: 1.411
[4,   500] loss: 1.403
[4,   600] loss: 1.409
[4,   700] loss: 1.390
epoch: 4, avg_loss: 2.850
[5,   100] loss: 1.339
[5,   200] loss: 1.368
[5,   300] loss: 1.375
[5,   400] loss: 1.376
[5,   500] loss: 1.384
[5,   600] loss: 1.368
[5,   700] loss: 1.383
epoch: 5, avg_loss: 2.655
[6,   100] loss: 1.352
[6,   200] loss: 1.362
[6,   300] loss: 1.

In [7]:
#test the model
model_ft.eval()

with torch.no_grad():
    correct = 0
    total = 0
    for batch in train_loader:
            features, label = batch
            test_output = model_ft(features)
            last_layer = test_output
            pred_y = torch.max(test_output, 1)[1].data.squeeze()
            accuracy = (pred_y == label).sum().item() / float(label.size(0))
    print('Test Accuracy of the model on the test images: %.2f' % accuracy)
    pass

Test Accuracy of the model on the test images: 0.50
