In [1]:
import torch
from torch import nn, optim

from torch.utils.data import Dataset
from torch.utils.data import DataLoader

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

args = {
    'batch_size': 20,
    'num_workers': 4,
    'num_classes': 10,
    'lr': 1e-4,
    'weight_decay': 5e-4,
    'num_epochs': 30
}

if torch.cuda.is_available():
  args['device'] = torch.device('cuda')
else:
  args['device'] = torch.device('cpu')

print(args['device'])

cpu


In [2]:
! wget https://archive.ics.uci.edu/static/public/275/bike+sharing+dataset.zip
!unzip bike+sharing+dataset.zip

--2024-02-23 17:54:50--  https://archive.ics.uci.edu/static/public/275/bike+sharing+dataset.zip
Resolving archive.ics.uci.edu (archive.ics.uci.edu)... 128.195.10.252
Connecting to archive.ics.uci.edu (archive.ics.uci.edu)|128.195.10.252|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified
Saving to: ‘bike+sharing+dataset.zip’

bike+sharing+datase     [  <=>               ] 273.43K  1.02MB/s    in 0.3s    

2024-02-23 17:54:51 (1.02 MB/s) - ‘bike+sharing+dataset.zip’ saved [279992]

Archive:  bike+sharing+dataset.zip
  inflating: Readme.txt              
  inflating: day.csv                 
  inflating: hour.csv                


In [3]:
!ls


bike+sharing+dataset.zip  day.csv  hour.csv  Readme.txt  sample_data


In [4]:
df = pd.read_csv('hour.csv')
print(len(df))
df.head()

17379


Unnamed: 0,instant,dteday,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
0,1,2011-01-01,1,0,1,0,0,6,0,1,0.24,0.2879,0.81,0.0,3,13,16
1,2,2011-01-01,1,0,1,1,0,6,0,1,0.22,0.2727,0.8,0.0,8,32,40
2,3,2011-01-01,1,0,1,2,0,6,0,1,0.22,0.2727,0.8,0.0,5,27,32
3,4,2011-01-01,1,0,1,3,0,6,0,1,0.24,0.2879,0.75,0.0,3,10,13
4,5,2011-01-01,1,0,1,4,0,6,0,1,0.24,0.2879,0.75,0.0,0,1,1


In [5]:
torch.manual_seed(1)
indices = torch.randperm(len(df)).tolist()

train_size = int(0.8*len(df))
df_train = df.iloc[indices[:train_size]]
df_test = df.iloc[indices[train_size:]]

print(len(df_train), len(df_test))

df_train.to_csv('bike_train.csv', index=False)
df_test.to_csv('bike_test.csv', index=False)

!ls

13903 3476
bike+sharing+dataset.zip  bike_test.csv  bike_train.csv  day.csv  hour.csv  Readme.txt	sample_data


In [6]:
class bicicreta(Dataset):
  def __init__(self, csv_path):

    self.dados = pd.read_csv(csv_path).to_numpy()

  def __getitem__(self,idx):

    sample = self.dados[idx][2:14]
    label = self.dados[idx][-1:]

    sample = torch.from_numpy(sample.astype(np.float32))
    label = torch.from_numpy(label.astype(np.float32))

    return sample, label

  def __len__(self):

    return len(self.dados)

In [7]:
train_set = bicicreta('bike_train.csv')
test_set = bicicreta('bike_test.csv')

dado, rotulo = train_set[0]

print(rotulo)
print(dado)

tensor([373.])
tensor([ 4.0000,  1.0000, 11.0000, 19.0000,  0.0000,  4.0000,  1.0000,  1.0000,
         0.3800,  0.3939,  0.2700,  0.3582])


In [8]:
train_loader = DataLoader(train_set,
                          batch_size=args['batch_size'],
                          shuffle=True,
                          num_workers=args['num_workers'])

test_loader = DataLoader(test_set,
                          batch_size=args['batch_size'],
                          shuffle=True,
                          num_workers=args['num_workers'])



In [9]:
for batch in train_loader:

  dado, rotulo = batch
  print(dado.size(), rotulo.size())
  break


torch.Size([20, 12]) torch.Size([20, 1])


In [10]:
class MLP(nn.Module):

  def __init__(self, input_size, hidden_size, out_size):
    super(MLP, self).__init__()

    self.features  = nn.Sequential(
                      nn.Linear(input_size, hidden_size),
                      nn.ReLU(),
                      nn.Linear(hidden_size, hidden_size),
                      nn.ReLU()
                    )
    self.out     = nn.Linear(hidden_size, out_size)

  def forward(self, X):

    feature = self.features(X)
    output  = self.out(feature)

    return output

input_size  = len(train_set[0][0])
hidden_size = 128
out_size    = 1 #classes

torch.manual_seed(42)
net = MLP(input_size, hidden_size, out_size).to(args['device']) #cast na GPU

In [11]:
criterion = nn.L1Loss().to(args['device'])
optimizer = optim.Adam(net.parameters(), lr=args['lr'], weight_decay=args['weight_decay'])


In [12]:
def treino(train_loader, net, epoch):

  net.train()
  epoch_loss = []
  for batch in train_loader:

    dado, rotulo = batch
    # Cast na GPU
    dado   = dado.to(args['device'])
    rotulo = rotulo.to(args['device'])
    # Forward
    pred = net(dado)
    loss = criterion(pred, rotulo)
    epoch_loss.append(loss.cpu().data)
    # Backward
    loss.backward()
    optimizer.step()
  epoch_loss = np.asarray(epoch_loss)
  print("Epoca %d, Loss: %.4f +\- %.4f" % (epoch, epoch_loss.mean(), epoch_loss.std()))


In [13]:
def test(test_loader, net, epoch):

  net.eval()
  with torch.no_grad():
    epoch_loss = []
    for batch in train_loader:

      dado, rotulo = batch
      # Cast na GPU
      dado   = dado.to(args['device'])
      rotulo = rotulo.to(args['device'])
      # Forward
      pred = net(dado)
      loss = criterion(pred, rotulo)
      epoch_loss.append(loss.cpu().data)

    epoch_loss = np.asarray(epoch_loss)
    print("Epoca %d, Loss: %.4f +\- %.4f" % (epoch, epoch_loss.mean(), epoch_loss.std()))

In [14]:
for epoch in range(args['num_epochs']):
  treino(train_loader, net, epoch)
  test(test_loader, net, epoch)



Epoca 0, Loss: 149.3162 +\- 38.0812
Epoca 0, Loss: 130.5572 +\- 23.6419
Epoca 1, Loss: 123.9384 +\- 30.3576
Epoca 1, Loss: 115.8362 +\- 26.2342
Epoca 2, Loss: 120.9584 +\- 28.0806
Epoca 2, Loss: 123.9800 +\- 30.9021
Epoca 3, Loss: 117.3581 +\- 28.2725
Epoca 3, Loss: 115.4157 +\- 29.2404
Epoca 4, Loss: 114.4151 +\- 27.8733
Epoca 4, Loss: 109.1239 +\- 24.5350
Epoca 5, Loss: 109.6804 +\- 29.2849
Epoca 5, Loss: 113.5745 +\- 23.5999
Epoca 6, Loss: 104.1306 +\- 25.9364
Epoca 6, Loss: 105.3206 +\- 22.7964
Epoca 7, Loss: 102.0591 +\- 25.5752
Epoca 7, Loss: 105.4553 +\- 22.6840
Epoca 8, Loss: 99.9840 +\- 26.4504
Epoca 8, Loss: 95.7948 +\- 22.4558
Epoca 9, Loss: 99.5769 +\- 25.7261
Epoca 9, Loss: 95.0867 +\- 27.0889
Epoca 10, Loss: 97.2715 +\- 23.7601
Epoca 10, Loss: 103.4008 +\- 30.1548
Epoca 11, Loss: 98.1435 +\- 26.0308
Epoca 11, Loss: 93.5941 +\- 21.6881
Epoca 12, Loss: 95.1613 +\- 27.3116
Epoca 12, Loss: 90.8056 +\- 21.6964
Epoca 13, Loss: 95.8310 +\- 23.6462
Epoca 13, Loss: 99.8983 +\- 28.