<a href="https://colab.research.google.com/github/RodrigoOBC/Notebook_MarcineLearning/blob/master/Rede_Neural_predizer_vendas_bicicleta.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
from torch import nn
from torch import optim

from torch.utils.data import Dataset
from torch.utils.data import DataLoader

from sklearn import metrics
from sklearn.preprocessing import StandardScaler

import pandas as pd
import numpy as np
import time
import os


import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
args = {
    'epoch_num': 200,     # Número de épocas.
    'lr': 5e-5,           # Taxa de aprendizado.
    'weight_decay': 5e-4, # Penalidade L2 (Regularização).
    'num_workers': 3,     # Número de threads do dataloader.
    'batch_size': 20,     # Tamanho do batch.
}

if torch.cuda.is_available():
    args['device'] = torch.device('cuda')
else:
    args['device'] = torch.device('cpu')

print(args['device'])

cuda


In [3]:
! wget https://archive.ics.uci.edu/ml/machine-learning-databases/00275/Bike-Sharing-Dataset.zip
! unzip Bike-Sharing-Dataset.zip


--2020-12-12 22:23:48--  https://archive.ics.uci.edu/ml/machine-learning-databases/00275/Bike-Sharing-Dataset.zip
Resolving archive.ics.uci.edu (archive.ics.uci.edu)... 128.195.10.252
Connecting to archive.ics.uci.edu (archive.ics.uci.edu)|128.195.10.252|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 279992 (273K) [application/x-httpd-php]
Saving to: ‘Bike-Sharing-Dataset.zip’


2020-12-12 22:23:49 (996 KB/s) - ‘Bike-Sharing-Dataset.zip’ saved [279992/279992]

Archive:  Bike-Sharing-Dataset.zip
  inflating: Readme.txt              
  inflating: day.csv                 
  inflating: hour.csv                


In [5]:
df = pd.read_csv('hour.csv')
df.head()

Unnamed: 0,instant,dteday,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
0,1,2011-01-01,1,0,1,0,0,6,0,1,0.24,0.2879,0.81,0.0,3,13,16
1,2,2011-01-01,1,0,1,1,0,6,0,1,0.22,0.2727,0.8,0.0,8,32,40
2,3,2011-01-01,1,0,1,2,0,6,0,1,0.22,0.2727,0.8,0.0,5,27,32
3,4,2011-01-01,1,0,1,3,0,6,0,1,0.24,0.2879,0.75,0.0,3,10,13
4,5,2011-01-01,1,0,1,4,0,6,0,1,0.24,0.2879,0.75,0.0,0,1,1


In [6]:
torch.manual_seed(1)
indices = torch.randperm(len(df)).tolist()

In [7]:
train_size = int(0.8*len(df))
df_train = df.iloc[indices[:train_size]]
df_test  = df.iloc[indices[train_size:]]

In [9]:
display(df_test.head())
df_train.to_csv('bike_train.csv',index=False)
df_test.to_csv('bike_test.csv',index=False)

Unnamed: 0,instant,dteday,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
12663,12664,2012-06-16,2,1,6,20,0,6,0,2,0.66,0.6212,0.47,0.194,123,229,352
1801,1802,2011-03-20,1,0,3,18,0,0,0,1,0.38,0.3939,0.4,0.3582,58,98,156
16567,16568,2012-11-28,4,1,11,1,0,3,1,2,0.26,0.2576,0.75,0.2239,0,12,12
8817,8818,2012-01-08,1,1,1,5,0,0,0,2,0.32,0.3333,0.49,0.1045,0,2,2
2608,2609,2011-04-23,2,0,4,14,0,6,0,1,0.58,0.5455,0.78,0.3582,182,209,391


In [11]:
class Bicicletas(Dataset):
  def __init__(self, csv_path, scaler_feat=None, scaler_label=None):
    self.dados = pd.read_csv(csv_path).to_numpy()
    
  def __getitem__(self, idx):
    
    sample = self.dados[idx][2:14]
    target  = self.dados[idx][-1:]
    
    # converte para tensor
    sample = torch.from_numpy(sample.astype(np.float32))
    target  = torch.from_numpy(target.astype(np.float32))
    
    return sample, target
    
  def __len__(self):
    return len(self.dados)

In [18]:
df_train = Bicicletas('bike_train.csv')
df_test = Bicicletas('bike_test.csv')


In [19]:
df_train[0]

(tensor([ 4.0000,  1.0000, 11.0000, 19.0000,  0.0000,  4.0000,  1.0000,  1.0000,
          0.3800,  0.3939,  0.2700,  0.3582]), tensor([373.]))

In [21]:
train_loader = DataLoader(df_train,
                          args['batch_size'],
                          num_workers=args['num_workers'],
                          shuffle=True)
test_loader = DataLoader(df_test,
                         args['batch_size'],
                         num_workers=args['num_workers'],
                         shuffle=False)

In [22]:
for batch in test_loader:
  
  dado, rotulo = batch
  print('## Dimensionalidade do batch ##')
  print(dado.size(), rotulo.size())
  
  break

## Dimensionalidade do batch ##
torch.Size([20, 12]) torch.Size([20, 1])


In [24]:
class MLP(nn.Module):
  
  def __init__(self, input_size, hidden_size, out_size):
    super(MLP, self).__init__()
    
    self.features = nn.Sequential(
          nn.Linear(input_size, hidden_size),
          nn.ReLU(),
          nn.Linear(hidden_size, hidden_size),
          nn.ReLU(),
    )
    
    self.classifier = nn.Sequential(
        nn.Linear(hidden_size, out_size),
        nn.ReLU(),
    )

  def forward(self, X):
    
    hidden = self.features(X)
    output = self.classifier(hidden)
    
    return output

In [26]:
input_size  = df_train[0][0].size(0)
hidden_size = 128
out_size    = 1

Rede_nn = MLP(input_size, hidden_size, out_size).to(args['device'])
Rede_nn

MLP(
  (features): Sequential(
    (0): Linear(in_features=12, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=128, bias=True)
    (3): ReLU()
  )
  (classifier): Sequential(
    (0): Linear(in_features=128, out_features=1, bias=True)
    (1): ReLU()
  )
)

In [28]:
criterio = nn.L1Loss().to(args['device'])

optimizer = optim.Adam(Rede_nn.parameters(), lr=args['lr'], weight_decay=args['weight_decay'])

In [36]:
def treinamento(train_loader, net, epoch):
  net.train()
  start = time.time()
  epoch_loss  = []

  for batch in train_loader:
    
    dado, rotulo = batch
    dado = dado.to(args['device'])
    rotulo = rotulo.to(args['device'])
    ypred = net(dado)
    loss = criterion(ypred, rotulo)
    epoch_loss.append(loss.cpu().data)
    loss.backward()
    optimizer.step()
   
  epoch_loss = np.asarray(epoch_loss)
  end = time.time()
  print('#################### Treino ####################')
  print(f'Epoch {epoch}, Loss: {epoch_loss.mean():.4f} +/- {epoch_loss.std():.4f}, Time: {end-start:.2f}')
  
  return epoch_loss.mean()

In [38]:
def testar_rede(test_loader, net, epoch):


  net.eval()
  
  start = time.time()
  
  epoch_loss  = []
  
  with torch.no_grad(): 
    for batch in test_loader:

      dado, rotulo = batch

      # Cast do dado na GPU
      dado = dado.to(args['device'])
      rotulo = rotulo.to(args['device'])

      # Forward
      ypred = net(dado)
      loss = criterion(ypred, rotulo)
      epoch_loss.append(loss.cpu().data)

  epoch_loss = np.asarray(epoch_loss)
  
  end = time.time()
  print('********** Teste **********')
  print(f'Epoch {epoch}, Loss: {epoch_loss.mean():.4f} +/- {epoch_loss.std():.4f}, Time: {end-start:.2f}')
  
  return epoch_loss.mean()