In [1]:
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

from torch.utils.data import DataLoader
from torch.nn.functional import relu

from collections import defaultdict, OrderedDict
from sklearn import metrics


In [2]:
print('Torch', torch.__version__, 'CUDA', torch.version.cuda)
print('Device:', torch.device('cuda:0'))

!nvcc --version

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.cuda.is_available(), device

Torch 1.12.1 CUDA None
Device: cuda:0
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2022 NVIDIA Corporation
Built on Tue_Mar__8_18:18:20_PST_2022
Cuda compilation tools, release 11.6, V11.6.124
Build cuda_11.6.r11.6/compiler.31057947_0


(False, device(type='cpu'))

In [21]:
config = {}

NUM_TARGETS= 3

USER_PATHWAY  = [256, 128, 64]
ITEM_PATHWAY = [256, 128, 64]
COMBINED_PATHWAY = [256, 128, 64, 16]

EMBED_DIM = 10
NUM_ITEM_EMBED = 1378
NUM_USER_EMBED = 47958
NUM_CUPSIZE_EMBED =  12
NUM_CATEGORY_EMBED = 7

NUM_USER_NUMERIC = 5
NUM_ITEM_NUMERIC = 2

DROPOUT = 0.3

EPOCHS = 1
LR = 0.001
WEIGHT_DECAY = 0.0001
BATCH_SIZE = 128


In [30]:
class ModCloth(torch.utils.data.Dataset):
    def __init__(self,datapath):
        self.data = pd.read_csv(datapath)

    def __len__(self):
        return self.data.shape[0]

    def __getitem__(self, idx):

        r = self.data.iloc[idx,:]

        return {
            "user_id" : np.array(r['user_id'], dtype=np.int64),
            "cup_size" : np.array(r['cup_size'], dtype=np.int64),
            "user_numeric" : np.array([r['waist'], r['hips'], r['bra_size'], r['height'], r['shoe_size']], dtype=np.float32),
            "item_id" : np.array(r['item_id'], dtype = np.int64),
            "category" :np.array(r['category'], dtype = np.int64),
            "item_numeric" : np.array([r['size'], r['quality']], dtype=np.float32),
            "fit" : np.array(r['fit'], dtype=np.int64)
        }

datasets = OrderedDict()
splits = ['train', 'valid']
datasets['train'] =  ModCloth("data/modcloth_final_data_processed_train.csv")
datasets['valid'] =  ModCloth("data/modcloth_final_data_processed_valid.csv")
datasets['test'] = ModCloth("data/modcloth_final_data_processed_test.csv")

In [31]:
# macro - Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account
# weighted - Calculate metrics for each label, and find their average weighted by support (the number of true instances for each label). This alters ‘macro’ to account for label imbalance; it can result in an F-score that is not between precision and recall.

def compute_metrics(target, pred_probs, averaging = "macro"):

    pred_labels = pred_probs.argmax(-1)
    precision = metrics.precision_score(target, pred_labels, average=averaging)
    recall = metrics.recall_score(target, pred_labels, average=averaging)
    f1_score = metrics.f1_score(target, pred_labels, average=averaging)
    accuracy = metrics.accuracy_score(target, pred_labels)
    auc = metrics.roc_auc_score(target, pred_probs, average=averaging, multi_class="ovr")

    return precision, recall, f1_score, accuracy, auc

In [32]:
class Base(nn.Module):
    def __init__(self, user_pathway, item_pathway, combined_pathway, embed_dim, num_item_embed, num_user_embed, num_cupsize_embed, num_category_embed, dropout):
        super().__init__()
       
        self.user_pathway = user_pathway
        self.item_pathway = item_pathway
        self.combined_pathway = combined_pathway
        self.embedding_dim = embed_dim

        self.user_embedding = nn.Embedding(num_user_embed, embed_dim, max_norm=1.0 )
        self.cup_size_embedding = nn.Embedding(num_cupsize_embed, embed_dim, max_norm=1.0 )
        self.item_embedding = nn.Embedding(num_item_embed, embed_dim, max_norm=1.0 )
        self.category_embedding = nn.Embedding(num_category_embed, embed_dim, max_norm=1.0 )


    def forward(self, batch_input):
        # Customer Pathway
        user_emb = self.user_embedding(batch_input["user_id"])
        cup_size_emb = self.cup_size_embedding(batch_input["cup_size"])
        user_representation = torch.cat( [user_emb, cup_size_emb, batch_input["user_numeric"]], dim=-1 )
        user_representation = self.user_transform_blocks(user_representation)

        # Article Pathway
        item_emb = self.item_embedding(batch_input["item_id"])
        category_emb = self.category_embedding(batch_input["category"])
        item_representation = torch.cat( [item_emb, category_emb, batch_input["item_numeric"]], dim=-1 )
        item_representation = self.item_transform_blocks(item_representation)

        # Combine the pathways
        combined_representation = torch.cat( [user_representation, item_representation], dim=-1 )
        combined_representation = self.combined_blocks(combined_representation)

        # Output layer of logits
        logits = self.hidden2output(combined_representation)
        pred_probs = F.softmax(logits, dim=-1)

        return logits, pred_probs

In [35]:
class SFNet(Base):
    def __init__(self, user_pathway, item_pathway, combined_pathway, embed_dim, num_item_embed, num_user_embed, num_cupsize_embed, num_category_embed, dropout):
        super().__init__(user_pathway, item_pathway, combined_pathway, embed_dim, num_item_embed, num_user_embed, num_cupsize_embed, num_category_embed, dropout)

        # Customer pathway transformation  ==  user_embedding_dim + cup_size_embedding_dim + num_user_numeric_features
        user_features_input_size = 2 * self.embedding_dim + NUM_USER_NUMERIC
        self.user_pathway.insert(0, user_features_input_size)
        self.user_transform_blocks = []
        for i in range(1, len(self.user_pathway)):
            self.user_transform_blocks.append( SkipBlock( self.user_pathway[i - 1], self.user_pathway[i] ) )
            self.user_transform_blocks.append(nn.Dropout(DROPOUT))
        self.user_transform_blocks = nn.Sequential(*self.user_transform_blocks)

        # Article pathway transformation == item_embedding_dim + category_embedding_dim + num_item_numeric_features
        item_features_input_size = 2 * self.embedding_dim + NUM_ITEM_NUMERIC
        self.item_pathway.insert(0, item_features_input_size)
        self.item_transform_blocks = []
        for i in range(1, len(self.item_pathway)):
            self.item_transform_blocks.append( SkipBlock( self.item_pathway[i - 1], self.item_pathway[i]) )
            self.item_transform_blocks.append(nn.Dropout(DROPOUT))
        self.item_transform_blocks = nn.Sequential(*self.item_transform_blocks)

        # Combined top layer pathway
        # u = output dim of user_transform_blocks, # t = output dim of item_transform_blocks
        # Pathway combination through [u, t] # Hence, input dimension will be 2*dim(u)
        combined_layer_input_size = 2 * self.user_pathway[-1]
        self.combined_pathway.insert(0, combined_layer_input_size)
        self.combined_blocks = []
        for i in range(1, len(self.combined_pathway)):
            self.combined_blocks.append( SkipBlock( self.combined_pathway[i - 1], self.combined_pathway[i]) )
            self.combined_blocks.append(nn.Dropout(DROPOUT))
        self.combined_blocks = nn.Sequential(*self.combined_blocks)

        # Linear transformation from last hidden layer to output
        self.hidden2output = nn.Linear(self.combined_pathway[-1], NUM_TARGETS)


class SkipBlock(nn.Module):
    def __init__(self, input_dim, output_dim):
        """ Skip Connection for feed-forward  - ResNet Block """
        super().__init__()
        self.W1 = nn.Linear(input_dim, output_dim)
        self.W2 = nn.Linear(output_dim, output_dim)
        self.I = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        """  z = ReLU(   W2( ReLU( W1(x))) + Projection(x))    """
        z = relu(self.W2(relu(self.W1(x))) + self.I(x))
        return z


In [36]:
class MLP(Base):
    def __init__(self,user_pathway, item_pathway, combined_pathway, embed_dim, num_item_embed, num_user_embed, num_cupsize_embed, num_category_embed, dropout):
        super().__init__(user_pathway, item_pathway, combined_pathway, embed_dim, num_item_embed, num_user_embed, num_cupsize_embed, num_category_embed, dropout)

        # Customer pathway transformation  ==  user_embedding_dim + cup_size_embedding_dim + num_user_numeric_features
        user_features_input_size = 2 * self.embedding_dim + NUM_USER_NUMERIC
        self.user_pathway.insert(0, user_features_input_size)
        self.user_transform_blocks = []
        for i in range(1, len(self.user_pathway)):
            self.user_transform_blocks.append( LinearBlock( self.user_pathway[i - 1], self.user_pathway[i] ) )
        self.user_transform_blocks = nn.Sequential(*self.user_transform_blocks)

        # Article pathway transformation == item_embedding_dim + category_embedding_dim + num_item_numeric_features
        item_features_input_size = 2 * self.embedding_dim + NUM_ITEM_NUMERIC
        self.item_pathway.insert(0, item_features_input_size)
        self.item_transform_blocks = []
        for i in range(1, len(self.item_pathway)):
            self.item_transform_blocks.append( LinearBlock( self.item_pathway[i - 1], self.item_pathway[i])  )
        self.item_transform_blocks = nn.Sequential(*self.item_transform_blocks)

        # Combined top layer pathway
        # u = output dim of user_transform_blocks, # t = output dim of item_transform_blocks
        # Pathway combination through [u, t] # Hence, input dimension will be 4*dim(u)
        combined_layer_input_size = 2 * self.user_pathway[-1]
        self.combined_pathway.insert(0, combined_layer_input_size)
        self.combined_blocks = []
        for i in range(1, len(self.combined_pathway)):
            self.combined_blocks.append( LinearBlock( self.combined_pathway[i - 1], self.combined_pathway[i]) )
        self.combined_blocks = nn.Sequential(*self.combined_blocks)

        # Linear transformation from last hidden layer to output
        self.hidden2output = nn.Linear(self.combined_pathway[-1], NUM_TARGETS)


class LinearBlock(nn.Module):
    def __init__(self, input_dim, output_dim):
        """ Skip Connection for feed-forward  - ResNet Block """
        super().__init__()
        self.W1 = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        """  z = ReLU(   W2( ReLU( W1(x))) + Projection(x))    """
        return relu(self.W1(x))


In [45]:
model = MLP(USER_PATHWAY, ITEM_PATHWAY, COMBINED_PATHWAY, EMBED_DIM, NUM_ITEM_EMBED, NUM_USER_EMBED, NUM_CUPSIZE_EMBED, NUM_CATEGORY_EMBED, DROPOUT)
model = model.to(device)

# print("-" * 50)
# print(model)

print("-" * 50)
total_params = sum(p.numel() for p in model.parameters())
print(f"Number of model parameters: {total_params}")
print("-" * 50)

loss_criterion = torch.nn.CrossEntropyLoss(reduction="mean")
optimizer = torch.optim.Adam(model.parameters(), lr = LR, weight_decay= WEIGHT_DECAY)

--------------------------------------------------
Number of model parameters: 769673
--------------------------------------------------


In [28]:
step = 0

tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.Tensor

for epoch in range(EPOCHS):

    for d in datasets:
        for split in splits:
            data_loader = DataLoader( dataset=datasets[split], batch_size=BATCH_SIZE, shuffle = (split == "train") )

            loss_tracker = defaultdict(tensor)

            # Enable/Disable Dropout
            if split == "train":
                model.train()
            else:
                model.eval()
                target_tracker = []
                pred_tracker = []

            for iteration, batch in enumerate(data_loader):

                for k, v in batch.items():
                    batch[k] = v.to(device)

                # Forward pass
                logits, pred_probs = model(batch)

                # loss calculation
                loss = loss_criterion(logits, batch["fit"])   # batch['fit'] are the true labels

                # backward + optimization
                if split == "train":
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    step += 1

                # bookkeepeing
                loss_tracker["Total Loss"] = torch.cat((loss_tracker["Total Loss"], loss.view(1)))

                if iteration % 100 == 0 or iteration + 1 == len(data_loader):
                    print(f"{split.upper()} Batch Stats {iteration}/{len(data_loader)}, Loss={loss.item() :.2f}")

                if split == "valid":
                    target_tracker.append(batch["fit"].cpu().numpy())
                    pred_tracker.append(pred_probs.cpu().data.numpy())

            print( f"{split.upper()} Epoch {epoch + 1} / {EPOCHS}, Mean Total Loss {torch.mean(loss_tracker['Total Loss'])}")



TRAIN Batch Stats 0/518, Loss=1.06
TRAIN Batch Stats 100/518, Loss=0.81
TRAIN Batch Stats 200/518, Loss=0.88
TRAIN Batch Stats 300/518, Loss=0.73
TRAIN Batch Stats 400/518, Loss=0.75
TRAIN Batch Stats 500/518, Loss=0.78
TRAIN Batch Stats 517/518, Loss=0.79
TRAIN Epoch 1 / 1, Mean Total Loss 0.8127315044403076
VALID Batch Stats 0/65, Loss=0.69
VALID Batch Stats 64/65, Loss=0.80
VALID Epoch 1 / 1, Mean Total Loss 0.7955179214477539
TRAIN Batch Stats 0/518, Loss=0.82
TRAIN Batch Stats 100/518, Loss=0.81
TRAIN Batch Stats 200/518, Loss=0.88
TRAIN Batch Stats 300/518, Loss=0.77
TRAIN Batch Stats 400/518, Loss=0.70
TRAIN Batch Stats 500/518, Loss=0.74
TRAIN Batch Stats 517/518, Loss=0.86
TRAIN Epoch 1 / 1, Mean Total Loss 0.7844616770744324
VALID Batch Stats 0/65, Loss=0.66
VALID Batch Stats 64/65, Loss=0.77
VALID Epoch 1 / 1, Mean Total Loss 0.7659412026405334
TRAIN Batch Stats 0/518, Loss=0.70
TRAIN Batch Stats 100/518, Loss=0.76
TRAIN Batch Stats 200/518, Loss=0.74
TRAIN Batch Stats 300/5

In [95]:
target_tracker = []
pred_tracker = []

print("Preparing test data ...")

data_loader = DataLoader(dataset = datasets['test'], batch_size = BATCH_SIZE, shuffle=False)

print("Evaluating model on test data ...")
model.eval()
with torch.no_grad():

    for iteration, batch in enumerate(data_loader):

        for k, v in batch.items():
            batch[k] = v.to(device)

        # Forward pass
        _, pred_probs = model(batch)

        target_tracker.append(batch["fit"].cpu().numpy())
        pred_tracker.append(pred_probs.cpu().data.numpy())

target_tracker = np.stack(target_tracker[:-1]).reshape(-1)
pred_tracker = np.stack(pred_tracker[:-1], axis=0).reshape(-1, NUM_TARGETS)
precision, recall, f1_score, accuracy, auc = compute_metrics(target_tracker, pred_tracker, averaging = "weighted")

print("-" * 50)
print(f"Metrics:\n Precision = {precision}\n Recall = {recall}\n F1-score = {f1_score}\n Accuracy = {accuracy}\n AUC = {auc}\n ")
print("-" * 50)

Preparing test data ...
Evaluating model on test data ...
--------------------------------------------------
Metrics:
 Precision = 0.53569623951807
 Recall = 0.6873779296875
 F1-score = 0.561165759107278
 Accuracy = 0.6873779296875
 AUC = 0.6634122911456146
 
--------------------------------------------------


  _warn_prf(average, modifier, msg_start, len(result))


In [46]:
model = SFNet(USER_PATHWAY, ITEM_PATHWAY, COMBINED_PATHWAY, EMBED_DIM, NUM_ITEM_EMBED, NUM_USER_EMBED, NUM_CUPSIZE_EMBED, NUM_CATEGORY_EMBED, DROPOUT)
model = model.to(device)

# print("-" * 50)
# print(model)

print("-" * 50)
total_params = sum(p.numel() for p in model.parameters())
print(f"Number of model parameters: {total_params}")
print("-" * 50)

loss_criterion = torch.nn.CrossEntropyLoss(reduction="mean")
optimizer = torch.optim.Adam(model.parameters(), lr = LR, weight_decay= WEIGHT_DECAY)

--------------------------------------------------
Number of model parameters: 1464421
--------------------------------------------------


In [47]:
step = 0

tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.Tensor

for epoch in range(EPOCHS):

    for d in datasets:
        for split in splits:
            data_loader = DataLoader( dataset=datasets[split], batch_size=BATCH_SIZE, shuffle = (split == "train") )

            loss_tracker = defaultdict(tensor)

            # Enable/Disable Dropout
            if split == "train":
                model.train()
            else:
                model.eval()
                target_tracker = []
                pred_tracker = []

            for iteration, batch in enumerate(data_loader):

                for k, v in batch.items():
                    batch[k] = v.to(device)

                # Forward pass
                logits, pred_probs = model(batch)

                # loss calculation
                loss = loss_criterion(logits, batch["fit"])   # batch['fit'] are the true labels

                # backward + optimization
                if split == "train":
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    step += 1

                # bookkeepeing
                loss_tracker["Total Loss"] = torch.cat((loss_tracker["Total Loss"], loss.view(1)))

                if iteration % 100 == 0 or iteration + 1 == len(data_loader):
                    print(f"{split.upper()} Batch Stats {iteration}/{len(data_loader)}, Loss={loss.item() :.2f}")

                if split == "valid":
                    target_tracker.append(batch["fit"].cpu().numpy())
                    pred_tracker.append(pred_probs.cpu().data.numpy())

            print( f"{split.upper()} Epoch {epoch + 1} / {EPOCHS}, Mean Total Loss {torch.mean(loss_tracker['Total Loss'])}")



TRAIN Batch Stats 0/518, Loss=1.13
TRAIN Batch Stats 100/518, Loss=0.93
TRAIN Batch Stats 200/518, Loss=0.80
TRAIN Batch Stats 300/518, Loss=0.80
TRAIN Batch Stats 400/518, Loss=0.88


KeyboardInterrupt: 

In [None]:
target_tracker = []
pred_tracker = []

print("Preparing test data ...")

data_loader = DataLoader(dataset = datasets['test'], batch_size = BATCH_SIZE, shuffle=False)

print("Evaluating model on test data ...")
model.eval()
with torch.no_grad():

    for iteration, batch in enumerate(data_loader):

        for k, v in batch.items():
            batch[k] = v.to(device)

        # Forward pass
        _, pred_probs = model(batch)

        target_tracker.append(batch["fit"].cpu().numpy())
        pred_tracker.append(pred_probs.cpu().data.numpy())

target_tracker = np.stack(target_tracker[:-1]).reshape(-1)
pred_tracker = np.stack(pred_tracker[:-1], axis=0).reshape(-1, NUM_TARGETS)
precision, recall, f1_score, accuracy, auc = compute_metrics(target_tracker, pred_tracker, averaging = "weighted")

print("-" * 50)
print(f"Metrics:\n Precision = {precision}\n Recall = {recall}\n F1-score = {f1_score}\n Accuracy = {accuracy}\n AUC = {auc}\n ")
print("-" * 50)