## Boston Dataset

In [6]:
from torch.utils.data import random_split
import pandas as pd
import torch
from torch.utils.data import Dataset

import warnings
warnings.filterwarnings('ignore')

In [2]:
class StandardScaler:

    def __init__(self, mean=None, std=None, epsilon=1e-7):
        """Standard Scaler.
        The class can be used to normalize PyTorch Tensors using native functions. The module does not expect the
        tensors to be of any specific shape; as long as the features are the last dimension in the tensor, the module
        will work fine.
        :param mean: The mean of the features. The property will be set after a call to fit.
        :param std: The standard deviation of the features. The property will be set after a call to fit.
        :param epsilon: Used to avoid a Division-By-Zero exception.
        """
        self.mean = mean
        self.std = std
        self.epsilon = epsilon

    def fit(self, values):
        dims = list(range(values.dim() - 1))
        self.mean = torch.mean(values, dim=dims)
        self.std = torch.std(values, dim=dims)
        

    def transform(self, values):
        return (values - self.mean) / (self.std + self.epsilon)

    def fit_transform(self, values):
        self.fit(values)
        return self.transform(values)

In [3]:
class BostonDataset(Dataset):
  
  def __init__(self, src_file, root_dir, transform=None):
    bostonDataset = pd.read_csv(src_file, delim_whitespace=True,names=['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV'])
    X = bostonDataset.loc[:, ~bostonDataset.columns.isin(['MEDV'])]
    Y = bostonDataset[["MEDV"]]
    
    y_tensor = torch.tensor(Y.values)
    
    y_tensor = torch.tensor(Y.values).type(torch.float32)
    
    df_dict = dict.fromkeys(X.columns, '')
    
    X.rename(columns = df_dict)
    
    s1=X.iloc[:,0:13].values
    
    x_tensor = torch.tensor(s1)
    
    scaler = StandardScaler()
    XScalada = scaler.fit_transform(x_tensor).type(torch.float32)
    YScalada = scaler.fit_transform(y_tensor).type(torch.float32)
    self.data = torch.cat((XScalada,YScalada),1)
    self.root_dir = root_dir
    self.transform = transform
    
  def __len__(self):
    return len(self.data)

  def __getitem__(self, idx):
    if torch.is_tensor(idx):
      idx = idx.tolist()
    preds = self.data[idx, 0:13]
    spcs = self.data[idx, 13:]
    sample = (preds, spcs)
    if self.transform:
      sample = self.transform(sample)
    return sample 

In [4]:
# Cargamos los datos y comprobamos que tenemos salida de datos
dataset = BostonDataset("data/housing.csv",".")
display(dataset[0])

(tensor([-0.4194,  0.2845, -1.2866, -0.2723, -0.1441,  0.4133, -0.1199,  0.1401,
         -0.9819, -0.6659, -1.4576,  0.4406, -1.0745]),
 tensor([0.1595]))

### División de Train y test

In [7]:
from torch.utils.data import random_split

lonxitudeDataset = len(dataset)
tamTrain =int(lonxitudeDataset*0.8)
tamVal = lonxitudeDataset - tamTrain
print(f"Tam dataset: {lonxitudeDataset} train: {tamTrain} tamVal: {tamVal}")
train_set, val_set = random_split(dataset,[tamTrain,tamVal])
train_ldr = torch.utils.data.DataLoader(train_set, batch_size=2,
    shuffle=True, drop_last=False)
validation_loader =torch.utils.data.DataLoader(val_set, batch_size=4, shuffle=False, num_workers=2)

Tam dataset: 506 train: 404 tamVal: 102


### Creación del Modelo

In [8]:
import torch.nn.functional as F
import torch.nn as nn

class Model(nn.Module):
    def __init__(self, entradas):
        super(Model, self).__init__()
        self.layer1 = nn.Linear(entradas, 100)
        self.layer2 = nn.Linear(100, 50)
        self.layer3 = nn.Linear(in_features=50, out_features=1)
    
    def forward(self, x):
        x = F.relu(self.layer1(x))
        x = F.relu(self.layer2(x))
        x = self.layer3(x)
        return x

### Instanciamos el modelo

In [9]:
model     = Model(13)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loss_fn   = nn.MSELoss(reduction='sum')
display(model)

Model(
  (layer1): Linear(in_features=13, out_features=100, bias=True)
  (layer2): Linear(in_features=100, out_features=50, bias=True)
  (layer3): Linear(in_features=50, out_features=1, bias=True)
)

### Prueba de una iteración de entrenamiento

In [10]:
entradaProba,dest = next(iter(train_ldr))
print("Entrada:")
display(entradaProba)
print("Desexada:")
display(dest)
saida = model(entradaProba) 
print("Saída:")
display(saida)
loss_fn(saida, dest)

Entrada:


tensor([[-0.4038,  0.4561, -0.7692, -0.2723, -1.0675,  0.2880, -1.9743,  1.7104,
         -0.2928, -0.4642,  0.2977,  0.4338, -0.9457],
        [ 0.6656, -0.4872,  1.0150, -0.2723,  1.0727,  0.1357,  0.9601, -0.8676,
          1.6596,  1.5294,  0.8058, -3.2417,  1.6002]])

Desexada:


tensor([[ 0.2030],
        [-1.4171]])

Saída:


tensor([[0.0999],
        [0.1636]], grad_fn=<AddmmBackward0>)

tensor(2.5090, grad_fn=<MseLossBackward0>)

### Función de Entrenamiento

In [11]:
def train_one_epoch(epoch_index, tb_writer):
    running_loss = 0.
    # Here, we use enumerate(train_ldr) instead of
    # iter(train_ldr) so that we can track the batch
    # index and do some intra-epoch reporting
    for i, data in enumerate(train_ldr):
        # Every data instance is an input + label pair
        inputs, labels = data

        # Zero your gradients for every batch!
        optimizer.zero_grad()

        # Make predictions for this batch
        outputs = model(inputs)

        # Compute the loss and its gradients
        loss = loss_fn(outputs, labels)
        loss.backward()

        # Adjust learning weights
        optimizer.step()

        # Gather data and report
        running_loss += loss.item()

    return running_loss / len(train_ldr)



In [13]:
from torchmetrics import MeanSquaredError, MeanAbsoluteError, R2Score
from torch.utils.tensorboard import SummaryWriter

In [17]:
EPOCHS = 100
writer = None
tb = SummaryWriter()

for epoch in range(EPOCHS):
    # Make sure gradient tracking is on, and do a pass over the data
    model.train(True)
    avg_loss = train_one_epoch(epoch, tb)
    model.train(False)
    
    # Implementamos la métrica
    mean_squared_error = MeanSquaredError()
    mean_absolute_error = MeanAbsoluteError()
    r2Score = R2Score()
    model.train(False)
    with torch.no_grad():
        for entradas, saidas in validation_loader:
            voutputs = model(entradas)
            mean_squared_error(voutputs,saidas)
            mean_absolute_error(voutputs,saidas)
            r2Score(voutputs,saidas)
    errorMedio = mean_squared_error.compute()
    errorAbsolute =mean_absolute_error.compute()
    r2 = r2Score.compute()
    
    ## Impresión en pantalla si no se quiere implementar tensorboard
    #print(f"MSE: {errorMedio}")
    #print(f"MAE: {errorAbsolute}")
    #print(f"R^2: {r2}")
    
    # Implementamos tensorboard
    tb.add_scalar("MSE/train", errorMedio,epoch)
    tb.add_scalar("MAE/train", errorAbsolute,epoch)
    tb.add_scalar("R^2/train", r2,epoch)

#### Nota: Tras ejecutar la última parte se crea en la ruta, la carpeta "runs", se ejecuta en la consola: 

### Para ver los gráficos:
#### En la consola escribe: tensorboard --logdir runs/
#### Nos dará una dirección http, en la que podemos visionar la ejecución del programa en tiempo real
#### http://localhost:6006/