# Title here

In [69]:
# Import statements

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import torch
from torch import nn, optim
from torchsummary import summary
import time
from sklearn.metrics import r2_score

In [70]:
# Read data
data = pd.read_csv('final_test.csv')
print(data.shape)
data.head()

(119734, 4)


Unnamed: 0,weight,age,height,size
0,62,28.0,172.72,XL
1,59,36.0,167.64,L
2,61,34.0,165.1,M
3,65,27.0,175.26,L
4,62,45.0,172.72,M


In [71]:
data = pd.get_dummies(data, columns=['size'])
data.head()

Unnamed: 0,weight,age,height,size_L,size_M,size_S,size_XL,size_XXL,size_XXS,size_XXXL
0,62,28.0,172.72,0,0,0,1,0,0,0
1,59,36.0,167.64,1,0,0,0,0,0,0
2,61,34.0,165.1,0,1,0,0,0,0,0
3,65,27.0,175.26,1,0,0,0,0,0,0
4,62,45.0,172.72,0,1,0,0,0,0,0


In [72]:
print('Average height in the dataset: ',data.height.mean())
print('Average age in the dataset: ',data.age.mean())
print('Average weight in the dataset: ',data.weight.mean())

Average height in the dataset:  165.8057942782486
Average age in the dataset:  34.027310695782454
Average weight in the dataset:  61.75681093089682


In [73]:
data = data.dropna()

In [74]:
X = data[['weight', 'age', 'height']]
Y = data.drop(['weight', 'age', 'height'], axis=1)
X = X.values
Y = Y.values

# DATASET 80-10-10 SPLIT PROPORTION TRAIN-VALIDATE-TEST RESPECTIVELY

X_train, X_val_test, Y_train, Y_val_test = train_test_split(X, Y, test_size=0.2, random_state=42)
X_val, X_test, Y_val, Y_test = train_test_split(X_val_test, Y_val_test, test_size=0.5, random_state=42)

In [75]:
# CONVERTING DATA TO TENSORS

X_train = torch.from_numpy(X_train)

X_val = torch.from_numpy(X_val)

X_test = torch.from_numpy(X_test)

Y_train = torch.from_numpy(Y_train)

Y_val = torch.from_numpy(Y_val)

# SETTING TENSOR DATASETS

train_tensor = torch.utils.data.TensorDataset(X_train, Y_train)

validation_tensor = torch.utils.data.TensorDataset(X_val, Y_val)

test_tensor = torch.utils.data.TensorDataset(X_test)

# FINAL DATA LOADERS - SETTING BATCH CONFIGURATION

train_loader = torch.utils.data.DataLoader(train_tensor,
                                           batch_size=64,
                                           shuffle=True)

validation_loader = torch.utils.data.DataLoader(validation_tensor,
                                                batch_size=64,
                                                shuffle=False)

test_loader = torch.utils.data.DataLoader(test_tensor,
                                          batch_size=64,
                                          shuffle=False)


In [76]:
# MODEL SET-UP

input_size = 3
hidden_sizes = [64, 64]
output_size = 7
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = nn.Sequential(nn.Linear(input_size, hidden_sizes[0]),
                      nn.LeakyReLU(),
                      nn.Linear(hidden_sizes[0], hidden_sizes[1]),
                      nn.LeakyReLU(),
                      nn.Linear(hidden_sizes[1], output_size),
                      )

In [77]:
# XAVIER'S WEIGHTS INITIALIZATION

for name, module in model.named_modules():
    if isinstance(module, torch.nn.Linear):
        torch.nn.init.xavier_uniform_(module.weight, gain=1.0)

In [78]:
# PRE-TRAINING SET-UP

optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
epochs = 3
criterion = nn.L1Loss()
model.to(device)
running_loss = 0
steps = 0
best_model_r2 = 0

In [79]:
# TRAINING START

print('Training Started!')
start = time.time()
for e in range(epochs):
    print('Epoch number: ', e + 1)

    for inputs, labels in train_loader:

        # TRAINING LOOP

        inputs, labels = inputs.float(), labels.float()
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        steps += 1

        # 10 VALIDATION LOOPS PER EPOCH

        if steps == int(len(train_loader) / 5):
            sanity = []
            model.eval()
            accuracy = 0
            valid_loss = 0

            with torch.no_grad():

                for inputs, labels in validation_loader:

                    # VALIDATION LOOP
                    print('entre')

                    inputs, labels = inputs.float(), labels.float()
                    inputs, labels = inputs.to(device), labels.to(device)

                    outputs = model(inputs)

                    loss_valid = criterion(outputs, labels)
                    valid_loss += loss_valid.item()

                    labels = labels.cpu()
                    outputs = outputs.cpu()
                    accuracy += r2_score(labels, outputs)
                    
                overall_accuracy = (accuracy * 100) / len(validation_loader)
                if overall_accuracy > best_model_r2:
                    best_model_r2 = overall_accuracy
                    torch.save(model, 'clothing_size.pth')
                print(
                    f"Train loss: {running_loss / steps:.4f}.. "
                    f"Valid loss: {valid_loss / len(validation_loader):.4f}.. "
                    f"Valid accuracy: {(accuracy * 100) / len(validation_loader):.5f}..")

            running_loss = 0
            steps = 0
            model.train()

print('Training finished!')
print('Training lasted: ', time.time() - start)


Training Started!
Epoch number:  1
entre


ValueError: Input contains NaN, infinity or a value too large for dtype('float32').

In [None]:
summary(model)