# Aprendizaje Profundo - C07 Clase 3


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Dataset

In [2]:
# Cargo el dataset
df = pd.read_csv("/content/drive/MyDrive/Trabajo AP/class_7_wine_dataset_v2.csv")

In [3]:
# Type es tipo categórica, transformamos con One Hot Encoding
type_dummies = pd.get_dummies(df['type'], prefix="type")
df = pd.concat([df,type_dummies], axis = 1)

In [4]:
# Convierto a problema de clasificación binaria
df["quality_label"] = df["quality"].apply(lambda q:0 if q<=6 else 1)

In [5]:
# Borramos nan
ds = df[df.isna().sum(axis = 1) == 0]

In [6]:
# Recordar que debo borrar del dataset de entrada mi columna de salida!
x = ds.drop(['Unnamed: 0','type','vendor_id','quality','quality_label'], axis=1) 
y = ds['quality_label']

In [7]:
# Normalización min max
x = x.to_numpy()
y = y.to_numpy()

x_norm = (x - np.min(x, axis=0)) / (np.max(x, axis=0 ) - np.min(x, axis=0))

## Pytorch Lightning

In [8]:
# Pytorch lightning no viene por defecto en google colab
!pip install pytorch_lightning

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pytorch_lightning
  Downloading pytorch_lightning-1.8.1-py3-none-any.whl (798 kB)
[K     |████████████████████████████████| 798 kB 20.2 MB/s 
[?25hCollecting torchmetrics>=0.7.0
  Downloading torchmetrics-0.10.2-py3-none-any.whl (529 kB)
[K     |████████████████████████████████| 529 kB 64.3 MB/s 
Collecting lightning-utilities==0.3.*
  Downloading lightning_utilities-0.3.0-py3-none-any.whl (15 kB)
Collecting fire
  Downloading fire-0.4.0.tar.gz (87 kB)
[K     |████████████████████████████████| 87 kB 7.1 MB/s 
Building wheels for collected packages: fire
  Building wheel for fire (setup.py) ... [?25l[?25hdone
  Created wheel for fire: filename=fire-0.4.0-py2.py3-none-any.whl size=115940 sha256=20d7d7b96025b673745b284f05a1e125bc1ebe868681a813dc2d1f30620ca65c
  Stored in directory: /root/.cache/pip/wheels/8a/67/fb/2e8a12fa16661b9d5af1f654bd199366799740a85c64981226
Successful

In [9]:
import torch
from torch.nn import functional as F
from torch.utils.data import Dataset, DataLoader
import pytorch_lightning as pl
import torchmetrics

In [10]:
# Remove old experiments
!rm -rf lightning_logs

In [11]:
class MyDataset(Dataset):
  def __init__(self, x, y):
    self.x = x
    self.y = y

  def __len__(self):
    return self.x.shape[0]

  def __getitem__(self, idx):
    return self.x[idx], self.y[idx]

In [12]:
# La clase LightningDataModule se utiliza para centralizar las transformaciones
# que requiera el dataset, realizar las divisiones entranmiento/validación
# y crear los DataLoaders
class MyDataModule(pl.LightningDataModule):
    def __init__(self, x, y, batch_size):
      super().__init__()
      self.x = x.astype(np.float32)
      self.y = y.astype(np.float32)
      self.batch_size = batch_size

    def setup(self, stage: str):
      idx = np.random.permutation(self.x.shape[0])
      train_idx = idx[0:int(0.85*len(idx))]
      valid_idx = idx[int(0.85*len(idx)):]
      self.train_x = self.x[train_idx]
      self.train_y = self.y[train_idx]
      self.valid_x = self.x[valid_idx]
      self.valid_y = self.y[valid_idx]

    def train_dataloader(self):
      train_split = MyDataset(self.train_x, self.train_y)
      return DataLoader(train_split, batch_size=self.batch_size, shuffle=True)

    def val_dataloader(self):
      val_split = MyDataset(self.valid_x, self.valid_y)
      return DataLoader(val_split, batch_size=self.batch_size)

In [13]:
dataModule = MyDataModule(x_norm, y, 64)

### Arquitectura NNet Pytorch Ligthning
La red neuronal en Pytorch se define extendiendo torch.nn.Module

In [14]:
class NNet(pl.LightningModule):

  def __init__(self):
    super().__init__()
    self.linear_1 = torch.nn.Linear(in_features=13, out_features=200, bias=True)
    self.relu_1 = torch.nn.ReLU()
    self.linear_2 = torch.nn.Linear(in_features = 200, out_features=100, bias=True)
    self.relu_2 = torch.nn.ReLU()
    self.linear_3 = torch.nn.Linear(in_features = 100, out_features= 1, bias=True)
    self.output = torch.nn.Sigmoid()
    self.train_acc = torchmetrics.Accuracy()
    self.valid_acc = torchmetrics.Accuracy()


  def forward(self, x):
    x = self.linear_1(x)
    x = self.relu_1(x)
    x = self.linear_2(x)
    x = self.relu_2(x)
    x = self.linear_3(x)
    x = self.output(x)
    return x


  def configure_optimizers(self):
    optimizer = torch.optim.Adam(self.parameters(), lr=0.001)
    return optimizer


  def training_step(self, train_batch, batch_idx):
    x_batch, y_batch = train_batch
    x_batch = x_batch.float()
    y_batch = y_batch.float().reshape(-1,1)
    y_hat = self.forward(x_batch)
    loss = F.binary_cross_entropy(y_hat, y_batch)
    acc = self.train_acc(y_hat>=0.5, y_batch.int())
    self.log("train_loss", loss, on_step=False, on_epoch=True, prog_bar=True)
    self.log("train_acc", acc, on_step=False, on_epoch=True, prog_bar=True)
    return loss


  def validation_step(self, val_batch, batch_idx):
    x_batch, y_batch = val_batch
    x_batch = x_batch.float()
    y_batch = y_batch.float().reshape(-1,1)
    y_hat = self.forward(x_batch)
    loss = F.binary_cross_entropy(y_hat, y_batch)
    acc = self.valid_acc(y_hat>=0.5, y_batch.int())
    self.log("val_loss", loss, on_step=False, on_epoch=True, prog_bar=True)
    self.log("val_acc", acc, on_step=False, on_epoch=True, prog_bar=True)
    return loss

In [15]:
# Instanciamos la red
nnet = NNet()

In [16]:
# Entrenamos
trainer = pl.Trainer(max_epochs=20, accelerator='gpu', devices=1)
trainer.fit(nnet, datamodule=dataModule)

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name      | Type     | Params
---------------------------------------
0 | linear_1  | Linear   | 2.8 K 
1 | relu_1    | ReLU     | 0     
2 | linear_2  | Linear   | 20.1 K
3 | relu_2    | ReLU     | 0     
4 | linear_3  | Linear   | 101   
5 | output    | Sigmoid  | 0     
6 | train_acc | Accuracy | 0     
7 | valid_acc | Accuracy | 0     
---------------------------------------
23.0 K    Trainable params
0         Non-trainable params
23.0 K    Total params
0.092     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=20` reached.


In [None]:
# Iniciamos el tensorboard para ver los resultados de los "experimentos"
%load_ext tensorboard
%tensorboard --logdir lightning_logs/