In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn

In [2]:
df = pd.read_csv('./train_data.csv')

In [3]:
class Classifiction(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(18,100),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(100,80),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(80,60),
            nn.ReLU(),
            nn.Linear(60,24),
            nn.Sigmoid()
        )
    def forward(self, x):
        x = self.net(x)
        return x

In [4]:
dataset = df.values[:,0:18]
target = df.values[:,18:19]
target = target.reshape(target.shape[0])
target = torch.from_numpy(target).long()
dataset = torch.from_numpy(dataset).float()

In [5]:
train_data = dataset[0:1000000,:]
test_data = dataset[1000000:1130000,:]
train_target = target[0:1000000]
test_target = target[1000000:1130000]

In [6]:
trainset = torch.utils.data.TensorDataset(train_data, train_target)
trainloader = torch.utils.data.DataLoader(
    dataset = trainset,
    batch_size = 10000,
    drop_last = True,
    shuffle = True
)
testset = torch.utils.data.TensorDataset(test_data, test_target)
testloader = torch.utils.data.DataLoader(
    dataset = testset,
    batch_size = 10000,
    drop_last = True,
    shuffle = True
)

In [7]:
def train(model, train_loader, optimizer, criterion):
    model.train()
    train_loss = 0
    train_acc = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        output = torch.max(output,1)[1]
        train_acc += torch.eq(output, target).sum().item()
    return train_loss, train_acc

In [8]:
def test(model, test_loader):
    model.eval()
    test_acc = 0
    with torch.no_grad():
        for data, target in test_loader:
            output = model(data)
            output = torch.max(output,1)[1]
            test_acc += torch.eq(output, target).sum().item()
    return test_acc 

In [9]:
net = Classifiction()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), 5e-3)

for epochs in range(20):
    loss = 0
    train_acc = 0
    test_acc = 0
    loss, train_acc = train(net, trainloader, optimizer, criterion)
    test_acc = test(net, testloader)
    train_acc = train_acc/1000000
    test_acc = test_acc/130000
    print(loss)
    print(train_acc)
    print(test_acc)

297.476167678833
0.291906
0.32911538461538464
286.1917586326599
0.431084
0.32911538461538464
284.143625497818
0.443827
0.32911538461538464
283.11486768722534
0.446987
0.32911538461538464
282.39137506484985
0.448491
0.32911538461538464
281.74310660362244
0.449325
0.32911538461538464
280.8954770565033
0.449764
0.32911538461538464
280.5708680152893
0.449917
0.32911538461538464
280.4181146621704
0.450142
0.32911538461538464
280.43808221817017
0.450323
0.32911538461538464
280.2769238948822
0.450346
0.32911538461538464
279.11492347717285
0.450349
0.32911538461538464
275.73455119132996
0.450271
0.32911538461538464
275.4878237247467
0.450337
0.32911538461538464
275.5074653625488
0.450481
0.32911538461538464
275.5204019546509
0.450549
0.32911538461538464
275.5150213241577
0.450563
0.32911538461538464
275.47487115859985
0.450549
0.32911538461538464
275.4791216850281
0.450605
0.32911538461538464
275.4493775367737
0.450602
0.32911538461538464


In [10]:
test_data = pd.read_csv('./pre_test.csv')
index = test_data.columns.values.tolist()[19:]
index = np.array(index)

In [11]:
test_data = test_data.values[:,1:19]
test_data = torch.from_numpy(test_data).float()

In [12]:
net.eval()
with torch.no_grad():
    out = net(test_data)
    preds = np.argsort(out, axis = 1)
    preds = np.fliplr(preds)[:, :7]
    test_id = np.array(pd.read_csv('./santander-product-recommendation/test_ver2.csv/test_ver2.csv', usecols = ['ncodpers'])['ncodpers'])
    final_preds = [" ".join(list(index[pred])) for pred in preds]
    out_df = pd.DataFrame({'ncodpers': test_id, 'added_products': final_preds})
    out_df.to_csv('./submission.csv', index = False)