## Settings

In [1]:
# 一つ上の階層からモジュールを参照できるようにする
import sys
sys.path.append('..')

In [2]:
# モジュールの変更を自動的に反映する
%load_ext autoreload
%autoreload 2

In [3]:
import numpy as np
import pandas as pd
from inputs import load_data
# XGB
from models import XGB
# Torch
import torch
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.data import random_split
from models import TrainingDataset, Torch, transform_labels, restore_labels, train_model, validate_model, \
                   TestDataset, predict_test

## XGB

In [15]:
train_x, train_y, test_x = load_data()

In [8]:
params = None
model = XGB(params)

In [9]:
tr_x = train_x#[:80]
va_x = train_x#[80:]
tr_y = train_y#[:80]
va_y = train_y#[80:]

In [15]:
early_stopping = 5
model.fit(tr_x, tr_y, va_x, va_y,
          early_stopping_rounds=early_stopping,
          verbose=False)

In [11]:
preds = model.predict(test_x)

In [12]:
index = test_x.index.to_numpy().reshape(-1, 1)
preds = preds.reshape(-1, 1)
submission = np.concatenate((index, preds), axis=1)
submission = pd.DataFrame(submission)

In [13]:
submission.to_csv('xgb.csv', index=False, header=False)

## Torch

In [57]:
dataset = TrainingDataset(target_transform = transform_labels)

In [58]:
train_ratio = 0.95
train_size = int(train_ratio * len(dataset))
test_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, test_size])

In [59]:
batch_size = 10

train_dataloader = DataLoader(train_dataset, batch_size=batch_size)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size)

for X, y in val_dataloader:
    print('Shape of X', X.shape)
    print('Shape of y', y.shape, y.dtype)
    break

Shape of X torch.Size([6, 9])
Shape of y torch.Size([6]) torch.int64


In [60]:
input_size = 9
output_size = 6
model = Torch(input_size, output_size)

In [61]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

In [62]:
epochs = 5
for t in range(epochs):
    print(f'Epoch {t+1}\n-------------------------------')
    train_model(train_dataloader, model, loss_fn, optimizer)
    validate_model(val_dataloader, model, loss_fn)
print('Done!')

Epoch 1
-------------------------------
loss: 5.613350  [    0/  101]
loss: 5.642447  [   20/  101]
loss: 2.586769  [   40/  101]
loss: 1.434297  [   60/  101]
loss: 1.036397  [   80/  101]
loss: 3.417500  [  100/  101]
Test Error: 
Accuracy: 33.3%, Avg loss: 0.250084 

Epoch 2
-------------------------------
loss: 1.788128  [    0/  101]
loss: 1.334235  [   20/  101]
loss: 1.270487  [   40/  101]
loss: 0.999954  [   60/  101]
loss: 1.153728  [   80/  101]
loss: 2.629848  [  100/  101]
Test Error: 
Accuracy: 16.7%, Avg loss: 0.286036 

Epoch 3
-------------------------------
loss: 1.749056  [    0/  101]
loss: 1.366430  [   20/  101]
loss: 1.291510  [   40/  101]
loss: 1.002410  [   60/  101]
loss: 1.153626  [   80/  101]
loss: 2.619985  [  100/  101]
Test Error: 
Accuracy: 16.7%, Avg loss: 0.282554 

Epoch 4
-------------------------------
loss: 1.743028  [    0/  101]
loss: 1.364628  [   20/  101]
loss: 1.290856  [   40/  101]
loss: 1.000938  [   60/  101]
loss: 1.152751  [   80/  10

In [63]:
test_dataset = TestDataset()
test_dataloader = DataLoader(test_dataset, batch_size=batch_size)
preds = predict_test(test_dataloader, model)

In [64]:
preds = [restore_labels(p) for p in preds]
preds = np.array(preds)

In [65]:
# preds

In [66]:
_, _, test_x = load_data()
index = test_x.index.to_numpy().reshape(-1, 1)
preds = preds.reshape(-1, 1)
submission = np.concatenate((index, preds), axis=1)
submission = pd.DataFrame(submission)

In [67]:
submission.to_csv('torch.csv', index=False, header=False)

## Keras

In [32]:
train_x, train_y, test_x = load_data()

In [33]:
from keras import Sequential, Input
from keras.utils import to_categorical
from keras.layers import Dense, Dropout
from keras.optimizers import Adam

In [40]:
num_classes = train_y.max()+1
train_y = to_categorical(train_y, num_classes=num_classes)

In [42]:
input_shape = (train_x.shape[1],)
model = Sequential([
    Input(shape=input_shape),
    Dense(32, activation='relu'),
    Dense(num_classes, activation='relu')
])
model.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_12 (Dense)             (None, 32)                320       
_________________________________________________________________
dense_13 (Dense)             (None, 8)                 264       
Total params: 584
Trainable params: 584
Non-trainable params: 0
_________________________________________________________________


In [43]:
learning_rate = 1e-3
optimizer = Adam(learning_rate=learning_rate)

In [44]:
batch_size = 10
epochs = 5

model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
model.fit(train_x, train_y, batch_size=batch_size, epochs=epochs, validation_split=0.05)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7ffd490368d0>