In [None]:
import numpy as np
from matplotlib import pyplot as plt
from sklearn import metrics

import torch # Libreria
from torch.utils.data import Dataset # Clase u objeto que va a contener la informacion que vamos a utilizar para entrenar y evaluar nuestro algoritmo
from torch.utils.data import DataLoader

## Uso de GPU

In [None]:
device = ""
if torch.cuda.is_available():
    device = "cuda:0"
else:
    device = "cpu"

print(torch.cuda.is_available())

In [None]:
if torch.cuda.is_available():
    print('__CUDNN VERSION:', torch.backends.cudnn.version())
    print('__Number CUDA Devices:', torch.cuda.device_count())
    print('__CUDA Device Name:',torch.cuda.get_device_name(0))
    print('__CUDA Device Total Memory [GB]:',torch.cuda.get_device_properties(0).total_memory/1e9)

In [None]:
print("Device: ",device)

## Normalizacion

In [None]:
normalized_data = (final_dataset - np.min(final_dataset, axis=0)) / (np.max(final_dataset, axis=0) - np.min(final_dataset, axis=0))
print(np.max(normalized_data, axis=0))
print(np.min(normalized_data, axis=0))
print(normalized_data)

## CustomDataset y DataLoader

In [None]:
class CustomDataset(Dataset):
    def __init__(self, x, y):
        super().__init__()
        self.x = x
        self.y = y

    def __len__(self):
        return self.x.shape[0]
  
    def __getitem__(self, idx):
        return self.x[idx,:], self.y[idx]

In [None]:
training_set = CustomDataset(x_train, l_train)
print(training_set.__len__())
print(training_set.__getitem__(0))

In [None]:
test_set = CustomDataset(x_test, l_test)
print(test_set.__len__())
print(test_set.__getitem__(0))

El **DataLoader** simpre espera el set de datos, el batch_size que preferentemente deberia ser potencia de 2 para optimizar los calculos, y opcional el shuffel que mezcla los datos cada vez que comienza una epoch

In [None]:
training_dataloader = DataLoader(training_set, batch_size = 512, shuffle=True)
test_dataloader = DataLoader(test_set, batch_size = 256, shuffle=True)

In [None]:
len(training_dataloader) # Muestra el tamaño de cada batch

## Regresion Logistica Bivariada

In [None]:
# Definimos el modelo
class NNet(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_1 = torch.nn.Linear(in_features = 2, out_features = 10, bias = True)
        self.sigmoid_1 = torch.nn.ReLU()
        self.linear_2 = torch.nn.Linear(in_features = 10, out_features = 20, bias = True)
        self.sigmoid_2 = torch.nn.ReLU()
        self.linear_3 = torch.nn.Linear(in_features = 20, out_features = 1, bias = True)
        self.sigmoid_3 = torch.nn.ReLU()

    def forward(self, x):
                                    # x.shape = 512 x 2
        z1 = self.linear_1(x)       # z1.shape = 512 x 10
        a1 = self.relu_1(z1)        # a1.shape = 512 x 10
        z2 = self.linear_2(a1)      # z2.shape = 512 x 20 
        a2 = self.relu_2(z2)        # a2.shape = 512 x 20
        z3 = self.linear_3(a2)      # z3.shape = 512 x 1 -> [-inf, +inf]
        y = self.relu_3(z3)         # y.shape = 512 x 1 -> [0, 1]
        return y

In [None]:
nnet = NNet()
print(nnet)

In [None]:
criterion = torch.nn.BCELoss(reduction='sum') # criterion, is my lost fuction
optimizer = torch.optim.SGD(nnet.parameters(), lr=0.005) # is my optimizer

In [None]:
nnet.to(device)

In [None]:
for epoch in range(100):
    running_loss = 0
    for i, data in enumerate(training_dataloader):
        # data
        x, y = data
        x = x.to(device).float()
        y = y.to(device).float().reshape(-1,1)

        # set gradient to zero
        optimizer.zero_grad()  #pytorch requiere que inicializemos en cada corrida los gradientes

        #forward
        y_hat = nnet(x)

        #loss
        loss = criterion(y_hat, y)

        #backward
        loss.backward()

        #update of parameters
        optimizer.step()

        #compute metrics and statistics
        running_loss += loss.item()
  
    print(f"Epoch = {epoch} - loss = {running_loss / len(training_set)}")

In [None]:
for i, data in enumerate(test_dataloader):
    # compute metrics
    # precision, recall,  acc, f1
    # use scikit learn
  
    # data
    x_tst, y_tst = data
    x_tst = x_tst.to(device).float()
    #y_tst = y_tst.to(device).float().reshape(-1,1)

    y_hat_test = (nnet(x_tst).detach().numpy() >= 0.5)

    print("Accuracy:",metrics.accuracy_score(y_tst, y_hat_test))
    print("Precision:",metrics.precision_score(y_tst, y_hat_test))
    print("Recall:",metrics.recall_score(y_tst, y_hat_test))
    print("F1:",metrics.f1_score(y_tst, y_hat_test))
    print()

## Regresion Lineal

In [None]:
# Definimos el modelo
class NNet(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_1 = torch.nn.Linear(in_features = 2, out_features = 100, bias = True)
        self.dropout_1 = torch.nn.Dropout(p=0.5)
        self.relu_1 = torch.nn.ReLU()
        self.linear_2 = torch.nn.Linear(in_features = 100, out_features = 500, bias = True)
        self.dropout_2 = torch.nn.Dropout(p=0.25)
        self.relu_2 = torch.nn.ReLU()
        self.linear_3 = torch.nn.Linear(in_features = 500, out_features = 800, bias = True)
        self.dropout_3 = torch.nn.Dropout(p=0.25)
        self.relu_3 = torch.nn.ReLU()
        self.linear_4 = torch.nn.Linear(in_features = 800, out_features = 200, bias = True)
        self.dropout_4 = torch.nn.Dropout(p=0.5)
        self.relu_4 = torch.nn.ReLU()
        self.linear_5 = torch.nn.Linear(in_features = 200, out_features = 1, bias = True)
        
    def forward(self, x):
                                    
        z1 = self.linear_1(x)
        z1_1 = self.dropout_1(z1)
        a1 = self.relu_1(z1_1)
        z2 = self.linear_2(a1)
        z2_1 = self.dropout_2(z2)
        a2 = self.relu_2(z2_1)
        z3 = self.linear_3(a2)
        z3_1 = self.dropout_3(z3)
        a3 = self.relu_3(z3_1)
        z4 = self.linear_4(a3)
        z4_1 = self.dropout_4(z4)
        a4 = self.relu_4(z4_1)
        y = self.linear_5(a4)

        return y

In [None]:
nnet = NNet()

In [None]:
print(nnet)

In [None]:
criterion = torch.nn.MSELoss(reduction='mean')
optimizer = torch.optim.SGD(nnet.parameters(), lr=0.005)

In [None]:
nnet.to(device)

In [None]:
for epoch in range(100):
    running_loss = 0
    for i, data in enumerate(training_dataloader):
        # data
        x, y = data
        x = x.to(device).float()
        y = y.to(device).float().reshape(-1,1)
        
        # set gradient to zero
        optimizer.zero_grad()

        #forward
        y_hat = nnet(x).reshape(-1,1)

        #loss
        loss = criterion(y_hat, y)

        #backward
        loss.backward()

        #update of parameters
        optimizer.step()

        #compute metrics and statistics
        running_loss += loss.item()
    
    nnet.eval()
    with torch.no_grad():
        nnet_test_scores = []
        truth = []
        
        for i, data in enumerate(test_dataloader):
            #batch
            x, y = data
            x = x.to(device).float()
            y = y.to(device).float().reshape(-1,1)
        
            # forward 
            y_hat = nnet(x)
        
            # accumulate data
            truth = list(y.cpu().detach().numpy()) 
            nnet_test_scores = list(y_hat.cpu().detach().numpy())
            
        mse = metrics.mean_squared_error(truth, nnet_test_scores)
        
    print(f"Epoch = {epoch} - loss = {running_loss / len(training_set)} - mse: {mse}")

## Regresion Logistica Bivariada con Embeddings

In [None]:
# Tratamiento similar para la variable vendor id
print(ds.vendor_id.unique())
print(len(ds.vendor_id.unique()))

In [None]:
class CustomDatasetWithEmbedding(Dataset):
    def __init__(self, X, vendor_idx, Y):
        super().__init__()
        self.vendor_idx = vendor_idx
        self.X = X
        self.Y = Y

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, idx):
        return self.X[idx,:], self.vendor_idx[idx], self.Y[idx]

In [None]:
training = CustomDatasetWithEmbedding(X_train, vendor_index_train, y_train)
testing = CustomDatasetWithEmbedding(X_test, vendor_index_test, y_test)

In [None]:
training_dataloader = DataLoader(training, batch_size=128, shuffle=True)
test_dataloader = DataLoader(testing, batch_size=128, shuffle=True)

In [None]:
class NNet(torch.nn.Module):
    def __init__(self, number_of_vendors, embedding_dim):
        super().__init__()
        self.embedding = torch.nn.Embedding(num_embeddings=number_of_vendors, embedding_dim=embedding_dim)
        self.linear_1 = torch.nn.Linear(in_features=(13 + embedding_dim), out_features=200, bias=True)
        self.relu_1 = torch.nn.ReLU()
        self.linear_2 = torch.nn.Linear(in_features=200, out_features=100, bias=True)
        self.relu_2 = torch.nn.ReLU()
        self.linear_3 = torch.nn.Linear(in_features=100, out_features=1, bias=True)
    
    def forward(self, x, vendor_idx):
        vendor_emb = self.embedding(vendor_idx)
        final_input = torch.cat([x, vendor_emb], dim=1)
        z1 = self.linear_1(final_input)
        a1 = self.relu_1(z1)
        z2 = self.linear_2(a1)
        a2 = self.relu_2(z2)
        y = self.linear_3(a2)
        return y

In [None]:
nnet = NNet(number_of_vendors=len(unique), embedding_dim=16)

In [None]:
print(nnet)

In [None]:
criterion = torch.nn.BCEWithLogitsLoss(reduction='sum') # criterion, is my lost function
optimizer = torch.optim.Adam(nnet.parameters(), lr=0.01) # is my optimizer

In [None]:
nnet.to(device)

for epoch in range(20):

    running_loss = 0
    nnet.train()
    for i, data in enumerate(training_dataloader):

        # batch
        x, vendor_idx, y = data

        #vendor_idx = vendor_idx.to(device).reshape(-1,1)
        x = x.to(device).float()
        y = y.to(device).float().reshape(-1,1)

        # set gradient to zero
        optimizer.zero_grad()

        # forward
        y_hat = nnet(x, vendor_idx)

        # loss
        loss = criterion(y_hat, y)

        # backward
        loss.backward()

        # update of parameters
        optimizer.step()

        # compute metrics and statistics
        running_loss += loss.item()
    
    nnet.eval()
    with torch.no_grad():
        nnet_test_scores = []
        truth = []
        for i, data in enumerate(test_dataloader):
            # batch
            x, vendor_idx, y = data
            x = x.to(device).float()
            y = y.to(device).float().reshape(-1,1)

            # forward 
            y_hat = nnet(x, vendor_idx)
            y_hat = torch.sigmoid(y_hat)

            # accumulate data
            truth += list(y.detach().numpy()) 
            nnet_test_scores += list(y_hat.detach().numpy())

        fpr, tpr, thresholds = metrics.roc_curve(truth, nnet_test_scores)
        auc = metrics.auc(fpr, tpr)
        print(f"Epoch = {epoch} | loss = {running_loss / len(training)} | auc = {auc}")

## Clasificacion (Softmax)

In [None]:
class NNet(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_1 = torch.nn.Linear(in_features=2, out_features=10, bias = True)
        self.activation_1 = torch.nn.ReLU()
        self.dropout_1= torch.nn.Dropout(p=0.05)
        self.linear_2 = torch.nn.Linear(in_features=10, out_features=20, bias = True)
        self.activation_2 = torch.nn.ReLU()
        self.dropout_2= torch.nn.Dropout(p=0.05)
        self.linear_3 = torch.nn.Linear(in_features=20, out_features=4, bias = True)

    def forward(self, x):
        # X es el batch que va a entrar
        z1 = self.linear_1(x)
        a1 = self.activation_1(z1)
        d1 = self.dropout_1(a1)
        z2 = self.linear_2(d1)
        a2 = self.activation_2(z2)
        d2 = self.dropout_2(a2)
        y = self.linear_3(d2)     
        return y

In [None]:
criterion = torch.nn.CrossEntropyLoss(reduction='sum')
optimizer = torch.optim.Adam(nnet.parameters(), lr=0.005)

In [None]:
nnet.to(device)