In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable 
from torch.utils.data import DataLoader, Dataset

In [2]:
x_train = pd.read_csv('../data/x_train_std_1009.csv')
x_test = pd.read_csv('../data/x_test_std_1009.csv')

In [3]:
y_train = pd.read_csv('../data/y_train_1009.csv')
ID_test = pd.read_csv('../data/ID_test_1009.csv')

In [4]:
x_train.shape, x_test.shape,y_train.shape, ID_test.shape

((903653, 55), (804684, 55), (903653, 1), (804684, 1))

In [5]:
x_train = Variable(torch.FloatTensor(x_train.values))
y_train = Variable(torch.FloatTensor(y_train.values))
y_train = y_train.view(-1,1)
x_test= Variable(torch.FloatTensor(x_test.values))

In [6]:
x_test.size()

torch.Size([804684, 55])

In [7]:
x_train.size()

torch.Size([903653, 55])

In [8]:
y_train.size()

torch.Size([903653, 1])

In [9]:
class dataset(Dataset):
    def __init__(self):
        self.len = x_train.size()[0]
        self.x=x_train
        self.y=y_train
        
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    
    def __len__(self):
        return self.len

dataset = dataset()
train_loader = DataLoader(dataset=dataset, batch_size=100, num_workers=2)

In [10]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.layers = nn.Sequential(
                                nn.Linear(55,100),
                                nn.ReLU(),
                                nn.Linear(100,150),
                                nn.ReLU(),
                                nn.Linear(150,200),
                                nn.ReLU(),
                                nn.Linear(200,150),
                                nn.ReLU(),
                                nn.Linear(150,100),
                                nn.ReLU(),
                                nn.Linear(100,50),
                                nn.ReLU(),
                                nn.Linear(50,1),
        )
    def forward(self,x):
        out = self.layers(x)
        return out

model = Model()

In [11]:
model

Model(
  (layers): Sequential(
    (0): Linear(in_features=55, out_features=100, bias=True)
    (1): ReLU()
    (2): Linear(in_features=100, out_features=150, bias=True)
    (3): ReLU()
    (4): Linear(in_features=150, out_features=200, bias=True)
    (5): ReLU()
    (6): Linear(in_features=200, out_features=150, bias=True)
    (7): ReLU()
    (8): Linear(in_features=150, out_features=100, bias=True)
    (9): ReLU()
    (10): Linear(in_features=100, out_features=50, bias=True)
    (11): ReLU()
    (12): Linear(in_features=50, out_features=1, bias=True)
  )
)

In [12]:
loss_func = nn.MSELoss(size_average=False)
optimizer = optim.SGD(model.parameters(), lr=0.000001)

In [24]:
for epoch in range(3):
    for i, [x, y] in enumerate(train_loader):
        optimizer.zero_grad()
        y_pred = model(x)
        loss = loss_func(y_pred, y)
        loss.backward()
        optimizer.step()
    
        if i % 1000 == 0:
            print(epoch, i, loss)  

0 0 tensor(1.00000e-02 *
       1.6139)
0 1000 tensor(1.00000e-02 *
       2.6415)
0 2000 tensor(1.00000e-03 *
       7.0290)
0 3000 tensor(0.1227)
0 4000 tensor(1.00000e-02 *
       2.1846)
0 5000 tensor(1.00000e-03 *
       9.5296)
0 6000 tensor(1.00000e-03 *
       3.4960)
0 7000 tensor(1.00000e-03 *
       7.8686)
0 8000 tensor(1.00000e-03 *
       3.9318)
0 9000 tensor(1.00000e-02 *
       1.0819)
1 0 tensor(1.00000e-02 *
       1.1497)
1 1000 tensor(1.00000e-02 *
       1.6879)
1 2000 tensor(1.00000e-03 *
       9.5245)
1 3000 tensor(0.1150)
1 4000 tensor(1.00000e-02 *
       1.9302)
1 5000 tensor(1.00000e-03 *
       8.3531)
1 6000 tensor(1.00000e-03 *
       3.4639)
1 7000 tensor(1.00000e-03 *
       7.4067)
1 8000 tensor(1.00000e-03 *
       3.7039)
1 9000 tensor(1.00000e-02 *
       1.0318)
2 0 tensor(1.00000e-02 *
       1.0443)
2 1000 tensor(1.00000e-02 *
       1.6991)
2 2000 tensor(1.00000e-03 *
       8.5068)
2 3000 tensor(1.00000e-02 *
       9.3452)
2 4000 tensor(1.000

In [26]:
torch.save(model.state_dict(),'./DNN.pth')

In [13]:
model=Model()
model.load_state_dict(torch.load('./Dnn.pth'))

In [38]:
out = model(x_test)
print(out)
out.size()

tensor([[ 2.5099e-03],
        [ 8.5264e-03],
        [ 1.5822e-02],
        ...,
        [ 3.5140e-01],
        [ 7.3089e-02],
        [ 4.0721e-02]])


torch.Size([804684, 1])

In [40]:
out = out.view(-1)

In [41]:
out

tensor([ 2.5099e-03,  8.5264e-03,  1.5822e-02,  ...,  3.5140e-01,
         7.3089e-02,  4.0721e-02])

In [42]:
import numpy as np

In [43]:
result = out.data.numpy()

In [45]:
result = pd.Series(result)

In [17]:
test = pd.read_csv('../data/test.csv', dtype={'fullVisitorId': str} )

In [49]:
ID=test['fullVisitorId']

In [50]:
submission = pd.concat([ID, result], axis=1)

In [60]:
submission = submission.rename(index = str, columns={0:'PredictedLogRevenue'})

In [63]:
submission= submission.groupby(by='fullVisitorId').sum().reset_index()

In [64]:
submission.shape

(617242, 2)

In [65]:
submission .to_csv('../data/submission_DNN_1009.csv', index=False)