# Title here

In [53]:
# Import statements

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import torch
from torch import nn, optim
from torchsummary import summary
import time
from sklearn.metrics import r2_score

In [54]:
# Read data
data = pd.read_csv('final_test.csv')
print(data.shape)
data.head()

(119734, 4)


Unnamed: 0,weight,age,height,size
0,62,28.0,172.72,XL
1,59,36.0,167.64,L
2,61,34.0,165.1,M
3,65,27.0,175.26,L
4,62,45.0,172.72,M


In [22]:
print('Average height in the dataset: ',data.height.mean())
print('Average age in the dataset: ',data.age.mean())
print('Average weight in the dataset: ',data.weight.mean())
print('dtype of Height: ', data.height.dtype)
print('dtype of Age: ', data.age.dtype)
print('dtype of Weight: ', data.weight.dtype)

Average height in the dataset:  0.5126305346859098
Average age in the dataset:  0.2908317153485679
Average weight in the dataset:  0.3487439555341827
dtype of Height:  float64
dtype of Age:  float64
dtype of Weight:  float64


In [55]:
def map_size(size):
    if size == 'XXS':
        return 0.0
    elif size == 'S':
        return 1.0
    elif size == 'M':
        return 2.0
    elif size == 'L':
        return 3.0
    elif size == 'XL':
        return 4.0
    elif size == 'XXL':
        return 5.0
    elif size == 'XXXL':
        return 6.0

In [56]:
data = data.dropna()
data['weight'] = data['weight'].astype('float64')
data['size'] = data['size'].apply(map_size)


In [57]:
data.head()

Unnamed: 0,weight,age,height,size
0,62.0,28.0,172.72,4.0
1,59.0,36.0,167.64,3.0
2,61.0,34.0,165.1,2.0
3,65.0,27.0,175.26,3.0
4,62.0,45.0,172.72,2.0


In [None]:
def NormalizeData01(data, min_, max_):
    return (data - min_) / (max_ - min_)
def NormalizeData(data, min_, max_):
    return (2*((data - min_) / (max_ - min_))) - 1

data['weight'] = data['weight'].apply(lambda x: NormalizeData01(x, data['weight'].min(), data['weight'].max()))     
data['age'] = data['age'].apply(lambda x: NormalizeData01(x, data['age'].min(), data['age'].max()))      
data['height'] = data['height'].apply(lambda x: NormalizeData01(x, data['height'].min(), data['height'].max()))     



In [None]:
X = data[['weight', 'age', 'height']]
Y = data['size']
X = X.values
Y = Y.values

# DATASET 80-10-10 SPLIT PROPORTION TRAIN-VALIDATE-TEST RESPECTIVELY

X_train, X_val_test, Y_train, Y_val_test = train_test_split(X, Y, test_size=0.4, random_state=42)
X_val, X_test, Y_val, Y_test = train_test_split(X_val_test, Y_val_test, test_size=0.5, random_state=42)

In [None]:
# CONVERTING DATA TO TENSORS

X_train = torch.from_numpy(X_train).type(torch.FloatTensor)

X_val = torch.from_numpy(X_val).type(torch.FloatTensor)

X_test = torch.from_numpy(X_test).type(torch.FloatTensor)

Y_train = torch.from_numpy(Y_train).type(torch.FloatTensor)

Y_val = torch.from_numpy(Y_val).type(torch.FloatTensor)

# SETTING TENSOR DATASETS

train_tensor = torch.utils.data.TensorDataset(X_train, Y_train)

validation_tensor = torch.utils.data.TensorDataset(X_val, Y_val)

test_tensor = torch.utils.data.TensorDataset(X_test)

# FINAL DATA LOADERS - SETTING BATCH CONFIGURATION

train_loader = torch.utils.data.DataLoader(train_tensor,
                                           batch_size=64,
                                           shuffle=True)

validation_loader = torch.utils.data.DataLoader(validation_tensor,
                                                batch_size=64,
                                                shuffle=False)

test_loader = torch.utils.data.DataLoader(test_tensor,
                                          batch_size=64,
                                          shuffle=False)


In [None]:
# MODEL SET-UP

input_size = 3
hidden_sizes = [64, 64]
output_size = 7
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = nn.Sequential(nn.Linear(input_size, hidden_sizes[0]),
                      nn.LeakyReLU(),
                      nn.Linear(hidden_sizes[0], hidden_sizes[1]),
                      nn.LeakyReLU(),
                      nn.Linear(hidden_sizes[1], output_size),
                      )

In [None]:
# XAVIER'S WEIGHTS INITIALIZATION

for name, module in model.named_modules():
    if isinstance(module, torch.nn.Linear):
        torch.nn.init.xavier_uniform_(module.weight, gain=1.0)

In [None]:
# PRE-TRAINING SET-UP

optimizer = optim.Adam(model.parameters(), lr=0.01) # , momentum=0.9
epochs = 100
criterion = nn.CrossEntropyLoss()
model.to(device)
running_loss = 0
steps = 0
best_model_r2 = 0

In [None]:
# TRAINING START

print('Training Started!')
start = time.time()
for e in range(epochs):
    print('Epoch number: ', e + 1)

    for inputs, labels in train_loader:

        # TRAINING LOOP

        inputs, labels = inputs.float(), labels.float()
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels.long())

        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        steps += 1

        # 10 VALIDATION LOOPS PER EPOCH
        
        if steps == int(len(train_loader) / 5):
            sanity = []
            model.eval()
            accuracy = 0
            valid_loss = 0

            with torch.no_grad():

                for inputs, labels in validation_loader:

                    # VALIDATION LOOP

                    inputs, labels = inputs.float(), labels.float()
                    inputs, labels = inputs.to(device), labels.to(device)

                    outputs = model(inputs)

                    loss_valid = criterion(outputs, labels.long())
                    valid_loss += loss_valid.item()
                    
                    labels = labels.cpu()
                    outputs = outputs.cpu()
                    # _, outputs = torch.max(outputs.data, 1)
                    
                    accuracy += r2_score(labels, outputs)
                    #top_p, top_class = outputs.topk(1, dim=1)
                    #equals = top_class == labels.view(*top_class.shape)
                    #accuracy += torch.mean(equals.type(torch.FloatTensor)).item()
                    
                overall_accuracy = (accuracy * 100) / len(validation_loader)
                if overall_accuracy > best_model_r2:
                    best_model_r2 = overall_accuracy
                    torch.save(model, 'test.pth')
                print(
                    f"Train loss: {running_loss / steps:.4f}.. "
                    f"Valid loss: {valid_loss / len(validation_loader):.4f}.. "
                    f"Valid accuracy: {(accuracy * 100) / len(validation_loader):.5f}..")

            running_loss = 0
            steps = 0
            model.train()

print('Training finished!')
print('Training lasted: ', time.time() - start)


In [None]:
summary(model)

In [44]:
outputs = model(torch.Tensor([0.5,0.5,0.5]).to(device))
