## DT Level 3 Day-2 Bike Sharing Demand

## 1. Import Required Libraries

In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader, Dataset

## 2. Loading Data

In [2]:
train = pd.read_csv('../../Day2/Bike Sharing Demand Data/train.csv', parse_dates=['datetime'])
test = pd.read_csv('../../Day2/Bike Sharing Demand Data/test.csv', parse_dates=['datetime'])

In [3]:
print(train.shape)
train.head()


(10886, 12)


Unnamed: 0,datetime,season,holiday,workingday,weather,temp,atemp,humidity,windspeed,casual,registered,count
0,2011-01-01 00:00:00,1,0,0,1,9.84,14.395,81,0.0,3,13,16
1,2011-01-01 01:00:00,1,0,0,1,9.02,13.635,80,0.0,8,32,40
2,2011-01-01 02:00:00,1,0,0,1,9.02,13.635,80,0.0,5,27,32
3,2011-01-01 03:00:00,1,0,0,1,9.84,14.395,75,0.0,3,10,13
4,2011-01-01 04:00:00,1,0,0,1,9.84,14.395,75,0.0,0,1,1


In [4]:
print(test.shape)
test.head()

(6493, 9)


Unnamed: 0,datetime,season,holiday,workingday,weather,temp,atemp,humidity,windspeed
0,2011-01-20 00:00:00,1,0,1,1,10.66,11.365,56,26.0027
1,2011-01-20 01:00:00,1,0,1,1,10.66,13.635,56,0.0
2,2011-01-20 02:00:00,1,0,1,1,10.66,13.635,56,0.0
3,2011-01-20 03:00:00,1,0,1,1,10.66,12.88,56,11.0014
4,2011-01-20 04:00:00,1,0,1,1,10.66,12.88,56,11.0014


## 3. Data Pre-processing


### 3-1 datetime 

In [5]:
train['year']=train['datetime'].dt.year
train['month']=train['datetime'].dt.month
train['day']=train['datetime'].dt.day
train['hour']=train['datetime'].dt.hour

test['year']=test['datetime'].dt.year
test['month']=test['datetime'].dt.month
test['day']=test['datetime'].dt.day
test['hour']=test['datetime'].dt.hour

In [6]:
train.head()

Unnamed: 0,datetime,season,holiday,workingday,weather,temp,atemp,humidity,windspeed,casual,registered,count,year,month,day,hour
0,2011-01-01 00:00:00,1,0,0,1,9.84,14.395,81,0.0,3,13,16,2011,1,1,0
1,2011-01-01 01:00:00,1,0,0,1,9.02,13.635,80,0.0,8,32,40,2011,1,1,1
2,2011-01-01 02:00:00,1,0,0,1,9.02,13.635,80,0.0,5,27,32,2011,1,1,2
3,2011-01-01 03:00:00,1,0,0,1,9.84,14.395,75,0.0,3,10,13,2011,1,1,3
4,2011-01-01 04:00:00,1,0,0,1,9.84,14.395,75,0.0,0,1,1,2011,1,1,4


In [7]:
test.head()

Unnamed: 0,datetime,season,holiday,workingday,weather,temp,atemp,humidity,windspeed,year,month,day,hour
0,2011-01-20 00:00:00,1,0,1,1,10.66,11.365,56,26.0027,2011,1,20,0
1,2011-01-20 01:00:00,1,0,1,1,10.66,13.635,56,0.0,2011,1,20,1
2,2011-01-20 02:00:00,1,0,1,1,10.66,13.635,56,0.0,2011,1,20,2
3,2011-01-20 03:00:00,1,0,1,1,10.66,12.88,56,11.0014,2011,1,20,3
4,2011-01-20 04:00:00,1,0,1,1,10.66,12.88,56,11.0014,2011,1,20,4


### 3-2 Feature selection

In [8]:
train.columns

Index(['datetime', 'season', 'holiday', 'workingday', 'weather', 'temp',
       'atemp', 'humidity', 'windspeed', 'casual', 'registered', 'count',
       'year', 'month', 'day', 'hour'],
      dtype='object')

In [9]:
train_feature_name =['season', 'holiday', 'workingday', 'weather', 'temp',
       'atemp', 'humidity', 'windspeed','year','month','day','hour','count']

test_feature_name =['season', 'holiday', 'workingday', 'weather', 'temp',
       'atemp', 'humidity', 'windspeed','year','month','day','hour',]

In [10]:
train = train[train_feature_name]
test = test[test_feature_name]


In [11]:
print(train.shape)
train.head()

(10886, 13)


Unnamed: 0,season,holiday,workingday,weather,temp,atemp,humidity,windspeed,year,month,day,hour,count
0,1,0,0,1,9.84,14.395,81,0.0,2011,1,1,0,16
1,1,0,0,1,9.02,13.635,80,0.0,2011,1,1,1,40
2,1,0,0,1,9.02,13.635,80,0.0,2011,1,1,2,32
3,1,0,0,1,9.84,14.395,75,0.0,2011,1,1,3,13
4,1,0,0,1,9.84,14.395,75,0.0,2011,1,1,4,1


In [12]:
print(test.shape)
test.head()

(6493, 12)


Unnamed: 0,season,holiday,workingday,weather,temp,atemp,humidity,windspeed,year,month,day,hour
0,1,0,1,1,10.66,11.365,56,26.0027,2011,1,20,0
1,1,0,1,1,10.66,13.635,56,0.0,2011,1,20,1
2,1,0,1,1,10.66,13.635,56,0.0,2011,1,20,2
3,1,0,1,1,10.66,12.88,56,11.0014,2011,1,20,3
4,1,0,1,1,10.66,12.88,56,11.0014,2011,1,20,4


### 3-3 Conver to Tensor

In [13]:
train = Variable(torch.FloatTensor(train.values))
test = Variable(torch.FloatTensor(test.values))

In [14]:
print(train.size())
train

torch.Size([10886, 13])


tensor([[    1.0000,     0.0000,     0.0000,  ...,     1.0000,
             0.0000,    16.0000],
        [    1.0000,     0.0000,     0.0000,  ...,     1.0000,
             1.0000,    40.0000],
        [    1.0000,     0.0000,     0.0000,  ...,     1.0000,
             2.0000,    32.0000],
        ...,
        [    4.0000,     0.0000,     1.0000,  ...,    19.0000,
            21.0000,   168.0000],
        [    4.0000,     0.0000,     1.0000,  ...,    19.0000,
            22.0000,   129.0000],
        [    4.0000,     0.0000,     1.0000,  ...,    19.0000,
            23.0000,    88.0000]])

In [15]:
print(test.size())
test.data

torch.Size([6493, 12])


tensor([[    1.0000,     0.0000,     1.0000,  ...,     1.0000,
            20.0000,     0.0000],
        [    1.0000,     0.0000,     1.0000,  ...,     1.0000,
            20.0000,     1.0000],
        [    1.0000,     0.0000,     1.0000,  ...,     1.0000,
            20.0000,     2.0000],
        ...,
        [    1.0000,     0.0000,     1.0000,  ...,    12.0000,
            31.0000,    21.0000],
        [    1.0000,     0.0000,     1.0000,  ...,    12.0000,
            31.0000,    22.0000],
        [    1.0000,     0.0000,     1.0000,  ...,    12.0000,
            31.0000,    23.0000]])

In [16]:
test.shape[0]

6493

In [17]:
train[:,:-1]

tensor([[    1.0000,     0.0000,     0.0000,  ...,     1.0000,
             1.0000,     0.0000],
        [    1.0000,     0.0000,     0.0000,  ...,     1.0000,
             1.0000,     1.0000],
        [    1.0000,     0.0000,     0.0000,  ...,     1.0000,
             1.0000,     2.0000],
        ...,
        [    4.0000,     0.0000,     1.0000,  ...,    12.0000,
            19.0000,    21.0000],
        [    4.0000,     0.0000,     1.0000,  ...,    12.0000,
            19.0000,    22.0000],
        [    4.0000,     0.0000,     1.0000,  ...,    12.0000,
            19.0000,    23.0000]])

### 3-4 Setting Dataloader


In [36]:
class dataset(Dataset):
    def __init__(self):
        self.len = train.shape[0]
        self.size = train.size()
        self.x= train[:,0:-1]
        self.y=train[:,[-1]]
        
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    
    def __len__(self):
        return self.len
    

dataset = dataset()
train_loader = DataLoader(dataset=dataset, batch_size=5, shuffle=True)

## 4. Model

In [45]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.layers = nn.Sequential(
                                nn.Linear(12,20),
                                nn.ReLU(),
                                nn.Linear(20,10),
                                nn.ReLU(),
                                nn.Linear(10,5),
                                nn.ReLU(),
                                nn.Linear(5,5),
                                nn.ReLU(),
                                nn.Linear(5,1))
        
    def forward(self, x):
        out = self.layers(x)
        return out
        
model =Model()

In [46]:
model

Model(
  (layers): Sequential(
    (0): Linear(in_features=12, out_features=20, bias=True)
    (1): ReLU()
    (2): Linear(in_features=20, out_features=10, bias=True)
    (3): ReLU()
    (4): Linear(in_features=10, out_features=5, bias=True)
    (5): ReLU()
    (6): Linear(in_features=5, out_features=5, bias=True)
    (7): ReLU()
    (8): Linear(in_features=5, out_features=1, bias=True)
  )
)

In [52]:
loss_func = nn.MSELoss(size_average=False)
optimizer = optim.SGD(model.parameters(), lr=0.001)

## 5. Training

In [54]:
for epoch in range(50):
    for i, [x, y] in enumerate(train_loader):
        x, y = Variable(x), Variable(y)
        
        optimizer.zero_grad()
        y_pred = model(x)
        loss = loss_func(y_pred, y)
        loss.backward()
        optimizer.step()
    
        if i % 100 == 0:
            print(i, loss)
    

0 tensor(74337.0156)
100 tensor(67977.1641)
200 tensor(65561.5000)
300 tensor(1.00000e+05 *
       5.1651)
400 tensor(1.00000e+05 *
       1.5522)
500 tensor(65905.3750)
600 tensor(1.00000e+05 *
       1.3124)
700 tensor(1.00000e+05 *
       1.0587)
800 tensor(1.00000e+05 *
       1.1571)
900 tensor(78773.0469)
1000 tensor(70741.7031)
1100 tensor(1.00000e+05 *
       3.3143)
1200 tensor(51508.9375)
1300 tensor(78765.8203)
1400 tensor(1.00000e+05 *
       1.2128)
1500 tensor(1.00000e+05 *
       3.0360)
1600 tensor(1.00000e+05 *
       1.0425)
1700 tensor(1.00000e+05 *
       1.6811)
1800 tensor(1.00000e+05 *
       1.3493)
1900 tensor(1.00000e+05 *
       1.4644)
2000 tensor(1.00000e+05 *
       1.1047)
2100 tensor(1.00000e+05 *
       1.6772)
0 tensor(3.3012e+05)
100 tensor(47040.7344)
200 tensor(72455.3828)
300 tensor(75913.8984)
400 tensor(23825.9824)
500 tensor(1.00000e+05 *
       1.4673)
600 tensor(1.00000e+05 *
       2.7488)
700 tensor(64088.1719)
800 tensor(1.00000e+05 *
     

1300 tensor(1.00000e+05 *
       3.0615)
1400 tensor(90221.2969)
1500 tensor(1.5650e+05)
1600 tensor(1.00000e+05 *
       1.0876)
1700 tensor(4.7211e+05)
1800 tensor(68896.5781)
1900 tensor(30307.6016)
2000 tensor(89342.2656)
2100 tensor(1.00000e+05 *
       3.3023)
0 tensor(95577.4375)
100 tensor(1.00000e+05 *
       1.4615)
200 tensor(43233.0312)
300 tensor(1.00000e+05 *
       4.4910)
400 tensor(1.00000e+05 *
       1.3827)
500 tensor(93782.0547)
600 tensor(1.00000e+05 *
       2.3251)
700 tensor(1.00000e+05 *
       1.3193)
800 tensor(1.00000e+05 *
       1.9645)
900 tensor(1.00000e+05 *
       1.4873)
1000 tensor(1.00000e+05 *
       1.1906)
1100 tensor(60904.1562)
1200 tensor(1.00000e+05 *
       1.2504)
1300 tensor(43566.2031)
1400 tensor(1.00000e+05 *
       1.2408)
1500 tensor(1.00000e+05 *
       2.8174)
1600 tensor(1.00000e+05 *
       1.2707)
1700 tensor(1.00000e+05 *
       1.1535)
1800 tensor(1.00000e+05 *
       1.1518)
1900 tensor(70250.9766)
2000 tensor(45995.7695)
210

2000 tensor(1.00000e+05 *
       1.2588)
2100 tensor(1.00000e+05 *
       1.6524)
0 tensor(1.00000e+05 *
       4.7848)
100 tensor(1.00000e+05 *
       2.8516)
200 tensor(1.00000e+05 *
       1.7921)
300 tensor(1.00000e+05 *
       1.5545)
400 tensor(1.00000e+05 *
       3.0192)
500 tensor(83626.1875)
600 tensor(1.00000e+05 *
       2.7868)
700 tensor(1.00000e+05 *
       1.2042)
800 tensor(81552.6797)
900 tensor(1.00000e+05 *
       2.7200)
1000 tensor(1.00000e+05 *
       1.4158)
1100 tensor(81729.9844)
1200 tensor(55058.5000)
1300 tensor(1.00000e+05 *
       1.2119)
1400 tensor(1.00000e+05 *
       1.0845)
1500 tensor(1.00000e+05 *
       1.2604)
1600 tensor(1.00000e+05 *
       1.4214)
1700 tensor(68458.1406)
1800 tensor(90647.4375)
1900 tensor(1.00000e+05 *
       4.9680)
2000 tensor(19675.2676)
2100 tensor(65941.5078)
0 tensor(1.00000e+05 *
       1.3143)
100 tensor(60136.9609)
200 tensor(1.00000e+05 *
       3.8571)
300 tensor(1.00000e+05 *
       1.9159)
400 tensor(1.00000e+05 

500 tensor(1.00000e+05 *
       1.9482)
600 tensor(1.00000e+05 *
       1.8474)
700 tensor(1.00000e+05 *
       1.0600)
800 tensor(55675.6602)
900 tensor(1.00000e+05 *
       1.2855)
1000 tensor(1.00000e+05 *
       1.0928)
1100 tensor(1.00000e+05 *
       1.0755)
1200 tensor(1.00000e+05 *
       1.0435)
1300 tensor(1.00000e+05 *
       1.5275)
1400 tensor(1.00000e+05 *
       7.3121)
1500 tensor(1.00000e+05 *
       5.6911)
1600 tensor(85718.2656)
1700 tensor(70851.3828)
1800 tensor(1.00000e+05 *
       1.3026)
1900 tensor(99295.8438)
2000 tensor(1.00000e+05 *
       1.3403)
2100 tensor(1.00000e+05 *
       1.0154)
0 tensor(1.00000e+05 *
       1.8120)
100 tensor(1.00000e+05 *
       2.4627)
200 tensor(1.00000e+05 *
       7.2722)
300 tensor(1.00000e+05 *
       1.9946)
400 tensor(1.00000e+05 *
       1.2870)
500 tensor(82067.0625)
600 tensor(1.00000e+05 *
       2.7459)
700 tensor(1.00000e+05 *
       2.3772)
800 tensor(56026.9883)
900 tensor(88459.5625)
1000 tensor(1.00000e+05 *
   

800 tensor(1.00000e+05 *
       3.0315)
900 tensor(1.00000e+05 *
       2.0813)
1000 tensor(66390.2656)
1100 tensor(1.00000e+05 *
       1.8026)
1200 tensor(93330.8281)
1300 tensor(89316.9219)
1400 tensor(1.00000e+05 *
       1.6672)
1500 tensor(1.00000e+05 *
       1.6028)
1600 tensor(1.00000e+05 *
       2.6589)
1700 tensor(60672.0352)
1800 tensor(78129.7969)
1900 tensor(32280.8340)
2000 tensor(1.00000e+05 *
       1.4008)
2100 tensor(1.00000e+05 *
       1.9480)
0 tensor(1.00000e+05 *
       1.7698)
100 tensor(1.00000e+05 *
       1.0570)
200 tensor(1.00000e+05 *
       5.7018)
300 tensor(1.00000e+05 *
       1.2724)
400 tensor(79584.6016)
500 tensor(1.00000e+05 *
       5.4801)
600 tensor(68467.6562)
700 tensor(49571.2852)
800 tensor(1.00000e+05 *
       1.3754)
900 tensor(1.00000e+05 *
       3.0750)
1000 tensor(31588.0938)
1100 tensor(70342.5625)
1200 tensor(1.00000e+05 *
       2.1341)
1300 tensor(1.00000e+05 *
       3.5916)
1400 tensor(1.00000e+05 *
       3.1852)
1500 tensor(

In [80]:
predictions=model(test)
predictions= predictions.data.numpy()
predictions=pd.DataFrame(predictions)

In [81]:
submit= pd.read_csv('../../Day2/Bike Sharing Demand Data/submission.csv')
submit['count'] = predictions

In [82]:
submit.to_csv("../../Day2/Bike Sharing Demand Data/submission.csv", index=False)