# Entrenamiento

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torch.utils import data

from google.colab import drive
import csv
import numpy as np
import pandas as pd
import os
import time

import pickle

Si se trabaja local, ignorar la siguiente celda

In [2]:
drive.mount('/content/drive')#, force_remount=True 

#drive.flush_and_unmount()

Mounted at /content/drive


Lectura de datos: en este caso se trabajó en colab, por lo que se debe correr la celda indicada más arriba y luego se modifica el file_path según el nombre de la carpeta de drive en la que estén almacenados los archivos checkpoint, loss y encoding2. 
En caso de trabajar local, se debe definir estas variables como *'Carpeta/NombreArchivo.extensión'*

In [None]:
checkpoint_folder = 'drive/MyDrive/checkpoint3'
loss_file = 'drive/MyDrive/loss3.txt'
serv_enc = 'drive/MyDrive/encoding2.pickle'

dataset_path = 'drive/MyDrive/401'
train_path = dataset_path + '/train2.csv'
val_path = dataset_path + '/val2.csv'

A continuación se definen los parámetros e hiperparámetros. Se pueden modificar según se estime conveniente.

- learning_rate: paso con el cual la red aprende.
- epochs: épocas de entrenamiento.
- batch_size: número de 'divisiones' aleatorias que se realiza a los datos de entrenamiento para agilizar el proceso.
- suffle: si se quiere modificar el orden de los datos.
- log_interval: parámetro que se utiliza en el proceso de entrenamiento.
- checkpoint_every: cada cuánto guardar un checkpoint (para luego no tener que correr todo desde cero)
- alpha: hiperparámetro de la red neuronal.

In [4]:
learning_rate = 1e-3
epochs = 2000
batch_size = 256
shuffle = True
log_interval = 100
checkpoint_every = 1
alpha = 1/1000

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
torch.manual_seed(0)
if use_cuda:
    torch.cuda.manual_seed(0)

In [5]:
if not os.path.exists(checkpoint_folder):
    os.makedirs(checkpoint_folder)

In [6]:
class Net(nn.Module):
    def __init__(self, input_size=16):
      super(Net, self).__init__()

      self.dropout1 = nn.Dropout(0.25)
      self.dropout2 = nn.Dropout(0.25)
      self.dropout3 = nn.Dropout(0.25)
      self.dropout4 = nn.Dropout(0.25)
      self.dropout5 = nn.Dropout(0.25)

      self.fc1 = nn.Linear(input_size, 16)
      self.fc2 = nn.Linear(16, 32)
      self.fc3 = nn.Linear(32, 64)
      self.fc4 = nn.Linear(64, 128)
      self.fc5 = nn.Linear(128, 10)
      self.fc6 = nn.Linear(10, 1)

    # x represents our data
    def forward(self, x):

      x = F.relu(self.fc1(x))
      x = self.dropout1(x)

      x = F.relu(self.fc2(x))
      x = self.dropout2(x)

      x = F.relu(self.fc3(x))
      x = self.dropout3(x)

      x = F.relu(self.fc4(x))
      x = self.dropout4(x)

      x = F.relu(self.fc5(x))
      x = self.dropout5(x)

      output = self.fc6(x)
      return output

In [7]:
class RMSELoss(nn.Module):
    def __init__(self, eps=1e-6):
        super().__init__()
        self.mse = nn.MSELoss()
        self.eps = eps
        
    def forward(self,yhat,y):
        loss = torch.sqrt(self.mse(yhat,y) + self.eps)
        return loss

class CustomRMSELoss(nn.Module):
    def __init__(self, alpha, eps=1e-6):
        super().__init__()
        self.mse = nn.MSELoss()
        self.eps = eps
        self.alpha = alpha
        
    def forward(self,yhat,y):
        loss = torch.sqrt(torch.mean(torch.exp(-self.alpha*y) * (yhat - y)**2) + self.eps)
        #loss = torch.exp(-self.alpha*y)*torch.sqrt(self.mse(yhat,y) + self.eps)
        return loss

In [8]:
class MyDataset(data.Dataset):
    def __init__(self, csv_path, serv_enc):
        self.dataset = pd.read_csv(csv_path, delimiter=',')
        self.length = self.dataset.shape[0]
        with open(serv_enc, 'rb') as handle:
          self.servs_encoding = pickle.load(handle)
    
    def __len__(self):
        return self.length

    def __getitem__(self, index):
      row = self.dataset.iloc[index]
      #row['Servicio'] = self.servs_encoding[row['Servicio']]
      y = row["Diferencia"].astype(np.float32)
      x = row.drop(columns='Servicio').values[:15].astype(np.float32)
      return torch.from_numpy(np.asarray(x)), torch.from_numpy(np.asarray(y))

In [9]:
def save_model(net, optimizer, EPOCH, PATH, LOSS):
  torch.save({
              'epoch': EPOCH,
              'model_state_dict': net.state_dict(),
              'optimizer_state_dict': optimizer.state_dict(),
              'loss': LOSS,
              }, PATH)

def load_model(PATH, optimizer, eval=False):
  model = Net(input_size=15)

  checkpoint = torch.load(PATH)
  model.load_state_dict(checkpoint['model_state_dict'])
  optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
  epoch = checkpoint['epoch']
  loss = checkpoint['loss']

  if eval:
    model.eval()
  else:
    model.train()
  return model.to(device), optimizer, epoch, loss

In [10]:
train_dataset = MyDataset(train_path, serv_enc)
train_loader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=shuffle, num_workers=4, pin_memory=True)

val_dataset = MyDataset(val_path, serv_enc)
val_loader = data.DataLoader(val_dataset, batch_size=batch_size, shuffle=shuffle, num_workers=4, pin_memory=True)

In [11]:
net = Net(input_size=15).to(device)

# create a stochastic gradient descent optimizer
optimizer = optim.Adam(net.parameters(), lr=learning_rate)
# create a loss function
#criterion = nn.MSELoss()
#criterion = RMSELoss()
criterion = CustomRMSELoss(alpha=alpha)

In [12]:
def train_model(train_loader, val_loader, net, optimizer, criterion, epochs, checkpoint_folder, epoch=None, log_interval=100, checkpoint_every=1):
  begin = 0
  if not epoch is None:
    begin = epoch+1
  epoch_loss = []
  for epoch in range(begin, epochs):
      running_loss = 0
      for batch_idx, (x, y) in enumerate(train_loader):
          x = x.to(device)
          y = y.to(device)
          optimizer.zero_grad()
          net_out = net(x)
          loss = criterion(torch.reshape(net_out, (-1,)), y)
          loss.backward()
          optimizer.step()

          running_loss += loss.item( )
          if batch_idx % log_interval == 0:
              print('\rTrain Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                      epoch, batch_idx * len(x), len(train_dataset),
                            100. * batch_idx / len(train_loader), loss.item()), end='')
      if epoch % checkpoint_every == 0:
        save_model(net, optimizer, epoch, '{}/{}-{}.pt'.format(checkpoint_folder, epoch, time.time()), loss.item())
      
      error = eval_model(val_loader, criterion, net)
      with open(loss_file, 'a+') as loss_:
        loss_.write('{}, {}, {}\n'.format(epoch, running_loss, error))
      epoch_loss.append(running_loss)

def eval_model(dataloader, criterion, model):
  model.eval()
  error = []
  with torch.no_grad():
    for batch_idx, (x, y) in enumerate(dataloader):
        x = x.to(device)
        y = y.to(device)
        net_out = model(x)
        loss = criterion(torch.reshape(net_out, (-1,)), y)
        error.append(loss.data.cpu().numpy())
  model.train()
  return np.mean(error)

In [None]:
#file_ = '12-1608224067.6807644.pt'
#net, optimizer, epoch, loss = load_model('{}/{}'.format(checkpoint_folder, file_), optimizer, eval=False)
epoch = None
train_model(train_loader, val_loader, net, optimizer, criterion, epochs, checkpoint_folder, epoch=epoch, log_interval=log_interval, checkpoint_every=checkpoint_every)

