In [1]:
import torch
from torch import nn, optim
import numpy as np
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
args = {
    'batch_size':20,
    'num_workers':40,
    'num_classes': 10,
    'lr': 1e-4,
    'weight_decay':5e-4,
    'num_epochs':30
}

if torch.cuda.is_available():
  args['device'] = torch.device('cuda')
else:
  args['device'] = torch.device('cuda')

print(args['device'])

cuda


In [3]:
df = pd.read_csv('hour.csv')
df.head()

Unnamed: 0,instant,dteday,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
0,1,2011-01-01,1,0,1,0,0,6,0,1,0.24,0.2879,0.81,0.0,3,13,16
1,2,2011-01-01,1,0,1,1,0,6,0,1,0.22,0.2727,0.8,0.0,8,32,40
2,3,2011-01-01,1,0,1,2,0,6,0,1,0.22,0.2727,0.8,0.0,5,27,32
3,4,2011-01-01,1,0,1,3,0,6,0,1,0.24,0.2879,0.75,0.0,3,10,13
4,5,2011-01-01,1,0,1,4,0,6,0,1,0.24,0.2879,0.75,0.0,0,1,1


In [6]:
torch.manual_seed(1)

indices = torch.randperm(len(df)).tolist()
train_size = int(0.8 * len(df))

df_train = df.iloc[indices[:train_size]]
df_test = df.iloc[indices[train_size:]]

print(len(df_train), len(df_test))

df_train.to_csv('bike_train.csv', index=False)
df_test.to_csv('bike_test.csv', index=False)

13903 3476


In [16]:
class Bicicletinha(Dataset):

  def __init__(self, csv_path):
    self.dados = pd.read_csv(csv_path).to_numpy()


  def __getitem__(self, idx):
    sample = self.dados[idx][2:14]
    label = self.dados[idx][-1:]

    #converter pra tensor

    sample = torch.from_numpy(sample.astype(np.float32))
    label = torch.from_numpy(label.astype(np.float32))

    return sample, label

  def __len__(self):
    return len(self.dados)

In [17]:
train_set = Bicicletinha('bike_train.csv')
test_set = Bicicletinha('bike_test.csv')

dado, rotulo = train_set[0]
print(dado)
print(rotulo)

tensor([ 4.0000,  1.0000, 11.0000, 19.0000,  0.0000,  4.0000,  1.0000,  1.0000,
         0.3800,  0.3939,  0.2700,  0.3582])
tensor([373.])


In [18]:
train_loader = DataLoader(train_set,
                          batch_size=args['batch_size'],
                          shuffle=True,
                          num_workers=args['num_workers'])

test_loader = DataLoader(test_set,
                          batch_size=args['batch_size'],
                          shuffle=True,
                          num_workers=args['num_workers'])

  cpuset_checked))


In [20]:
for batch in train_loader:
  dado,rotulo = batch
  print(dado.size(), rotulo.size())
  break

  cpuset_checked))


torch.Size([20, 12]) torch.Size([20, 1])


In [21]:
class MLP(nn.Module):

  def __init__(self, input_size, hidden_size, out_size):
    super(MLP, self).__init__()

    self.features = nn.Sequential(nn.Linear(input_size, hidden_size),
                                  nn.ReLU(),
                                  nn.Linear(hidden_size, hidden_size),
                                  nn.ReLU()
                                  )
    self.out = nn.Linear(hidden_size, out_size)
    
  def forward(self, X):

    feature = self.features(X)
    output = self.out(feature)

    return output

input_size = 12
hidden_size = 128
out_size = 1

net = MLP(input_size, hidden_size, out_size).to(args['device'])

In [22]:
criterion = nn.L1Loss().to(args['device'])
optimizer = optim.Adam(net.parameters(), lr=args['lr'], weight_decay=args['weight_decay'])

In [25]:
def train(train_loader, net, epoch):  
    net.train()
    
    epoch_loss = []
    for batch in train_loader:

      dado, rotulo = batch

      dado = dado.to(args['device'])
      rotulo = rotulo.to(args['device'])

      pred = net(dado)
      loss = criterion(pred, rotulo)
      epoch_loss.append(loss.cpu().data)

      loss.backward()
      optimizer.step()

    epoch_loss = np.asarray(epoch_loss)

    print(f'Epoca: {epoch}\n Loss: {epoch_loss.mean()} \n +/-: {epoch_loss.std()}')

In [26]:
def test(test_loader, net, epoch):  
  net.eval()
  with torch.no_grad():

    epoch_loss = []
    for batch in train_loader:

      dado, rotulo = batch

      dado = dado.to(args['device'])
      rotulo = rotulo.to(args['device'])

      pred = net(dado)
      loss = criterion(pred, rotulo)
      epoch_loss.append(loss.cpu().data)

    epoch_loss = np.asarray(epoch_loss)

    print(f'Epoca: {epoch}\n Loss: {epoch_loss.mean()} \n +/-: {epoch_loss.std()}')

In [27]:
for epoch in range(args['num_epochs']):
  train(train_loader, net, epoch)
  test(test_loader, net, epoch)

  cpuset_checked))


Epoca: 0
 Loss: 148.37576293945312 
 +/-: 39.8237190246582
Epoca: 0
 Loss: 129.7957763671875 
 +/-: 23.76120376586914
Epoca: 1
 Loss: 122.68828582763672 
 +/-: 30.46988868713379
Epoca: 1
 Loss: 116.0938491821289 
 +/-: 27.691028594970703
Epoca: 2
 Loss: 121.51791381835938 
 +/-: 28.29955291748047
Epoca: 2
 Loss: 125.84698486328125 
 +/-: 32.52687072753906
Epoca: 3
 Loss: 119.206787109375 
 +/-: 29.663169860839844
Epoca: 3
 Loss: 112.49465942382812 
 +/-: 27.31487274169922
Epoca: 4
 Loss: 115.67063903808594 
 +/-: 28.59526252746582
Epoca: 4
 Loss: 117.82762145996094 
 +/-: 24.166690826416016
Epoca: 5
 Loss: 111.41590118408203 
 +/-: 26.997655868530273
Epoca: 5
 Loss: 106.41204071044922 
 +/-: 24.88933563232422
Epoca: 6
 Loss: 110.1707534790039 
 +/-: 27.376914978027344
Epoca: 6
 Loss: 106.31611633300781 
 +/-: 28.182289123535156
Epoca: 7
 Loss: 105.945068359375 
 +/-: 26.80251121520996
Epoca: 7
 Loss: 110.7352066040039 
 +/-: 28.973508834838867
Epoca: 8
 Loss: 103.24319458007812 
 +/-: 