In [1]:
import torch as t
import numpy as np
import torch.nn as nn
import torch.nn.functional as F

from torch import optim
from torch.utils.data import Dataset, DataLoader
from torch.autograd import Variable

In [6]:
class traindata(Dataset):
    def __init__(self):
        xy = np.loadtxt('train.csv', dtype='int', delimiter=',')
        self.y_data = t.from_numpy(xy[:, 11])
        xy = np.delete(xy, 11, axis=1)
        self.x_data = t.from_numpy(xy)
        self.len = xy.shape[0]
        
    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]
    
    def __len__(self):
        return self.len

In [7]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fn1 = nn.Linear(43, 20)
        self.fn2 = nn.Linear(20, 10)
        self.fn3 = nn.Linear(10, 1)
        
    def forward(self, x):
        x = x.view(x.size()[0], -1)
        x = F.sigmoid(self.fn1(x))
        x = F.sigmoid(self.fn2(x))
        x = F.sigmoid(self.fn3(x))
        return x

In [8]:
net = Net()
criterion = nn.MSELoss()
optimizer = optim.SGD(net.parameters(), lr=0.000003, momentum=0.9)

In [9]:
mytraindata = traindata()
trainloader = DataLoader(dataset = mytraindata, batch_size = 32, shuffle = True, num_workers = 0)

In [12]:
for epoch in range(6):
    

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        
        
        inputs, labels = data
        inputs = Variable(inputs).float()
        labels = Variable(labels).float()
 
        optimizer.zero_grad()
        
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        
        optimizer.step()
        
        running_loss += loss.data.item()
        if i % 4000 == 3999:
            print('[%d, %5d] loss: %.3f' % (epoch+1, i+1, running_loss/200))
            running_loss = 0.0
print('Finished')


[1,  4000] loss: 3.346
[1,  8000] loss: 3.332
[1, 12000] loss: 3.318
[1, 16000] loss: 3.317
[1, 20000] loss: 3.279
[2,  4000] loss: 3.279
[2,  8000] loss: 3.263
[2, 12000] loss: 3.249
[2, 16000] loss: 3.245
[2, 20000] loss: 3.235
[3,  4000] loss: 3.212
[3,  8000] loss: 3.215
[3, 12000] loss: 3.227
[3, 16000] loss: 3.220
[3, 20000] loss: 3.217
[4,  4000] loss: 3.203
[4,  8000] loss: 3.194
[4, 12000] loss: 3.178
[4, 16000] loss: 3.187
[4, 20000] loss: 3.190
[5,  4000] loss: 3.178
[5,  8000] loss: 3.202
[5, 12000] loss: 3.169
[5, 16000] loss: 3.154
[5, 20000] loss: 3.171
[6,  4000] loss: 3.188
[6,  8000] loss: 3.155
[6, 12000] loss: 3.145
[6, 16000] loss: 3.170
[6, 20000] loss: 3.158
Finished


In [15]:
class Testdata(Dataset):
    def __init__(self):
        xy = np.loadtxt('test.csv', dtype='int', delimiter=',')
        
        self.x_data = t.from_numpy(xy)
        self.len = xy.shape[0]
        
    def __getitem__(self, index):
        return self.x_data[index]
    
    def __len__(self):
        return self.len

In [16]:
mytestdata = Testdata()
testloader = DataLoader(dataset = mytestdata, batch_size = 1, shuffle = False, num_workers = 0)

In [17]:
pred = t.Tensor(200000, 1)
for i, data in enumerate(testloader, 0):
    x = data
    outputs = net(Variable(x).float())
    pred[i] = outputs.data.item()
    

In [18]:
pred[pred>0.5]

tensor([])

In [19]:
idd = t.arange(800000, 1000000).view(-1, 1)
print(idd.shape, pred.shape)
pred = t.cat([idd, pred], dim = 1)
pred[10,:]

torch.Size([200000, 1]) torch.Size([200000, 1])


tensor([8.0001e+05, 2.2335e-01])

In [20]:
np.savetxt('pred.csv', pred, fmt = '%.4f', delimiter=',')