## Settings

In [1]:
# 一つ上の階層からモジュールを参照できるようにする
import sys
sys.path.append('..')

In [2]:
# モジュールの変更を自動的に反映する
%load_ext autoreload
%autoreload 2

In [3]:
import numpy as np
import pandas as pd
from inputs import load_data
# XGB
from models import XGB
# Torch
import torch
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.data import random_split
from models import TrainingDataset, Torch, train_model, validate_model, \
                   TestDataset, predict_test

## XGB

In [4]:
train_x, train_y, test_x = load_data()

In [8]:
params = None
model = XGB(params)

In [9]:
tr_x = train_x#[:80]
va_x = train_x#[80:]
tr_y = train_y#[:80]
va_y = train_y#[80:]

In [15]:
early_stopping = 5
model.fit(tr_x, tr_y, va_x, va_y,
          early_stopping_rounds=early_stopping,
          verbose=False)

In [11]:
preds = model.predict(test_x)

In [12]:
index = test_x.index.to_numpy().reshape(-1, 1)
preds = preds.reshape(-1, 1)
submission = np.concatenate((index, preds), axis=1)
submission = pd.DataFrame(submission)

In [13]:
submission.to_csv('xgb.csv', index=False, header=False)

## Torch

In [40]:
def transform_labels(y):
    if y == 1:
        return 0
    if y == 2:
        return 1
    if y == 3:
        return 2
    if y == 5:
        return 3
    if y == 6:
        return 4
    if y == 7:
        return 5

In [41]:
dataset = TrainingDataset(target_transform = transform_labels)

In [42]:
train_ratio = 0.8
train_size = int(train_ratio * len(dataset))
test_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, test_size])

In [43]:
batch_size = 10

train_dataloader = DataLoader(train_dataset, batch_size=batch_size)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size)

for X, y in val_dataloader:
    print('Shape of X', X.shape)
    print('Shape of y', y.shape, y.dtype)
    break

Shape of X torch.Size([10, 9])
Shape of y torch.Size([10]) torch.int64


In [49]:
input_size = 9
output_size = 6
model = Torch(input_size, output_size)

In [50]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

In [51]:
epochs = 5
for t in range(epochs):
    print(f'Epoch {t+1}\n-------------------------------')
    train_model(train_dataloader, model, loss_fn, optimizer)
    validate_model(val_dataloader, model, loss_fn)
    print('\n')
print('Done!')

Epoch 1
-------------------------------
loss: 2.547250  [    0/   85]
loss: 1.713688  [   20/   85]
loss: 1.771018  [   40/   85]
loss: 1.452805  [   60/   85]
loss: 2.468002  [   80/   85]
Test Error: 
Accuracy: 18.2%, Avg loss: 0.231427 



Epoch 2
-------------------------------
loss: 1.284566  [    0/   85]
loss: 1.528397  [   20/   85]
loss: 1.799482  [   40/   85]
loss: 1.338935  [   60/   85]
loss: 1.777973  [   80/   85]
Test Error: 
Accuracy: 50.0%, Avg loss: 0.218241 



Epoch 3
-------------------------------
loss: 1.151711  [    0/   85]
loss: 1.480496  [   20/   85]
loss: 1.792628  [   40/   85]
loss: 1.337609  [   60/   85]
loss: 1.776502  [   80/   85]
Test Error: 
Accuracy: 50.0%, Avg loss: 0.218181 



Epoch 4
-------------------------------
loss: 1.151702  [    0/   85]
loss: 1.480107  [   20/   85]
loss: 1.790741  [   40/   85]
loss: 1.336833  [   60/   85]
loss: 1.775420  [   80/   85]
Test Error: 
Accuracy: 50.0%, Avg loss: 0.218159 



Epoch 5
--------------------

In [52]:
test_dataset = TestDataset()
test_dataloader = DataLoader(test_dataset, batch_size=batch_size)
preds = predict_test(test_dataloader, model)

In [53]:
preds

[tensor([1.6872, 1.7993, 0.0000, 0.0000, 0.0000, 0.0000]),
 tensor([1.6753, 1.8407, 0.0000, 0.0000, 0.0000, 0.0000]),
 tensor([1.7023, 1.7446, 0.0000, 0.0000, 0.0000, 0.0000]),
 tensor([1.6979, 1.8294, 0.0000, 0.0000, 0.0000, 0.0000]),
 tensor([1.7027, 1.7427, 0.0000, 0.0000, 0.0000, 0.0000]),
 tensor([1.7040, 1.7476, 0.0000, 0.0000, 0.0000, 0.0000]),
 tensor([1.6936, 1.7428, 0.0000, 0.0000, 0.0000, 0.0000]),
 tensor([1.6874, 1.8413, 0.0000, 0.0000, 0.0000, 0.0000]),
 tensor([1.6979, 1.8317, 0.0000, 0.0000, 0.0000, 0.0000]),
 tensor([1.6924, 1.8773, 0.0000, 0.0000, 0.0000, 0.0000]),
 tensor([1.7018, 1.8316, 0.0000, 0.0000, 0.0000, 0.0000]),
 tensor([1.7207, 1.7922, 0.0000, 0.0000, 0.0000, 0.0000]),
 tensor([1.6183, 1.8668, 0.0000, 0.0000, 0.0000, 0.0000]),
 tensor([1.6882, 1.8465, 0.0000, 0.0000, 0.0000, 0.0000]),
 tensor([1.6175, 1.7174, 0.0000, 0.0000, 0.0000, 0.0000]),
 tensor([1.6860, 1.8071, 0.0000, 0.0000, 0.0000, 0.0000]),
 tensor([1.6923, 1.8602, 0.0000, 0.0000, 0.0000, 0.0000]