In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torch.nn.functional as F
import torchvision.models as models
import torchvision
import os 
import pandas as pd
from skimage import io
from torch.utils.data import (
    Dataset,
    DataLoader,
)


In [None]:
transform = transforms.Compose([ transforms.ToPILImage(),
                                  transforms.Resize(256),
                                  transforms.CenterCrop(224),
                                  transforms.ToTensor(),
                                  transforms.Normalize([0.485, 0.456, 0.406], 
                                                       [0.229, 0.224, 0.225])])

In [None]:
class FashionDataset(Dataset):
  def __init__(self,csv_file,root_dir,transform):
    self.annotations = pd.read_csv(csv_file, dtype={
                     'article_id': str,
                     'product_type_name': int
                 })
    self.root_dir = root_dir
    self.transform = transform

  def __len__(self):
    return len(self.annotations)

  def __getitem__(self,index):
    image_path = os.path.join(self.root_dir, '0' + str(self.annotations.iloc[index,0]) + '.jpg')
    image = io.imread(image_path)
    y_label = torch.tensor(self.annotations.iloc[index,1])

    if self.transform:
      image = self.transform(image)

    return (image,y_label)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# get the images
import zipfile
root_path = './'
with zipfile.ZipFile("/content/drive/MyDrive/data.zip","r") as zip_ref:
    zip_ref.extractall(root_path)

In [None]:
device = ("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
#Hyperparameters
num_classes = 14
learning_rate = 1e-3
batch_size = 32
num_epochs = 10 

In [None]:
#Load dataset
dataset = FashionDataset(csv_file="articles_final.csv",root_dir="data", transform= transform)
train_size = int(0.8 * len(dataset))
test_size = int(0.1 * len(dataset))
validation_size = len(dataset) - (train_size + test_size)
train_set,test_set, validation_set = torch.utils.data.random_split(dataset,[train_size,test_size,validation_size])
train_loader = DataLoader(train_set,batch_size,shuffle=True)
test_loader = DataLoader(test_set,batch_size,shuffle=True)
validate_loader = DataLoader(validation_set,batch_size,shuffle=True)

In [None]:
# Model
model = torchvision.models.resnet50(pretrained=True)
model.to(device)

In [None]:
# Freeze pretrained model parameters to avoid backpropogating through them
for parameter in model.parameters():
    parameter.requires_grad = False


from collections import OrderedDict

# Change the final layer of ResNet50 Model for Transfer Learning
fc_inputs = model.fc.in_features

classifier = nn.Sequential(
    nn.Linear(fc_inputs, 256),
    nn.ReLU(),
    nn.Dropout(0.4),
    nn.Linear(256, num_classes), # Since 14 possible outputs
    nn.LogSoftmax(dim=1) # For using NLLLoss()
)


# The last layer (FC2) will output the probabilities of 14 classes.

model.fc = classifier

In [None]:
model

In [None]:
#Loss and optimizer
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)

In [None]:
# Function for the validation pass
def validation(model, validateloader, criterion):
    
    val_loss = 0
    accuracy = 0
    
    for images, labels in iter(validateloader):
        
        images, labels = images.to('cuda'), labels.to('cuda')

        output = model.forward(images)
        val_loss += criterion(output, labels).item()

        probabilities = torch.exp(output)
        
        equality = (labels.data == probabilities.max(dim=1)[1])
        accuracy += equality.type(torch.FloatTensor).mean()
    
    return val_loss, accuracy

In [None]:
# Train the classifier

def train_classifier():

      epochs = 15
      steps = 0
      print_every = 40

      model.to('cuda')

      for e in range(epochs):
      
          model.train()
  
          running_loss = 0
  
          for images, labels in iter(train_loader):
      
              steps += 1

              images = images.to('cuda')
              labels = labels.to('cuda')
      
              optimizer.zero_grad()
      
              output = model.forward(images)
              loss = criterion(output, labels)
              loss.backward()
              optimizer.step()
      
              running_loss += loss.item()
      
              if steps % print_every == 0:
              
                  model.eval()
              
                  # Turn off gradients for validation, saves memory and computations
                  with torch.no_grad():
                      validation_loss, accuracy = validation(model, validate_loader, criterion)
          
                  print("Epoch: {}/{}.. ".format(e+1, epochs),
                        "Training Loss: {:.3f}.. ".format(running_loss/print_every),
                        "Validation Loss: {:.3f}.. ".format(validation_loss/len(validate_loader)),
                        "Validation Accuracy: {:.3f}".format(accuracy/len(validate_loader)))
          
                  running_loss = 0
                  model.train()
                    
train_classifier()

In [None]:
def test_accuracy(model, test_loader):

    # Do validation on the test set
    model.eval()
    model.to('cuda')

    with torch.no_grad():
    
        accuracy = 0
    
        for images, labels in iter(test_loader):
    
            images, labels = images.to('cuda'), labels.to('cuda')
    
            output = model.forward(images)

            probabilities = torch.exp(output)
        
            equality = (labels.data == probabilities.max(dim=1)[1])
        
            accuracy += equality.type(torch.FloatTensor).mean()
        
        print("Test Accuracy: {}".format(accuracy/len(test_loader)))    
        
        
test_accuracy(model, test_loader)

In [None]:
torch.save(model.state_dict(),"sample_data/resnet50-fashionclassifier.pth")

In [None]:
# Save the checkpoint

def save_checkpoint(model):

    idx_to_class = {
    0: "Sweater",
    1: "Trousers",
    2: "Hoodie",
    3: "Skirt",
    4: "T-shirt",
    5: "Dress",
    6: "Shorts",
    7: "Shirt",
    8: "Cardigan",
    9: "Blazer",
    10: "Jacket",
    11: "Coat",
    12: "Polo shirt",
    13: "Blouse"
}



    model.idx_to_class = idx_to_class

    checkpoint = {'arch': "resnet50",
                  'idx_to_class': model.idx_to_class,
                  'model_state_dict': model.state_dict()
                 }

    torch.save(checkpoint, 'checkpoint-resnet50.pth')
    
save_checkpoint(model)    

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
from collections import OrderedDict

# Function that loads a checkpoint and rebuilds the model

def load_checkpoint(filepath):
    
    checkpoint = torch.load(filepath)
    
    if checkpoint['arch'] == 'resnet50':
        
        model = torchvision.models.resnet50(pretrained=True)
        
        for param in model.parameters():
            param.requires_grad = False
    else:
        print("Architecture not recognized.")
    
    model.idx_to_class = checkpoint['idx_to_class']
    
    # Build custom classifier
    fc = nn.Sequential(OrderedDict([('fc1', nn.Linear(2048, 512)),
                                    ('fc2', nn.Linear(512, 14)),
                                ('output', nn.LogSoftmax(dim=1))]))

    model.fc = fc
    
    model.load_state_dict(checkpoint['model_state_dict'])
    
    return model

model = load_checkpoint('/content/drive/MyDrive/checkpoint-resnet50.pth')
print(model)