In [1]:
import pandas as pd
import numpy as np
import torch
from torch.utils import data
import torch.nn.functional as F
import torch.nn as nn
from tqdm import tqdm
import torch.optim as optim
from sklearn.model_selection import train_test_split

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
READ_PATH1 = "../../Dataset/Home/dataTrain.csv"
READ_PATH2 = "../../Dataset/Home/dataA.csv"
df_train = pd.read_csv(READ_PATH1)
df_test = pd.read_csv(READ_PATH2)

In [3]:
df_train.head()

Unnamed: 0,id,f1,f2,f3,f4,f5,f6,f7,f8,f9,...,f38,f39,f40,f41,f42,f43,f44,f45,f46,label
0,81167,0,1,mid,0,0,0,153,0,0,...,0,0,0,0,0,0,0,624,1539,0
1,50408,1,1,mid,0,0,21,0,0,0,...,0,0,0,0,0,0,0,186,366,0
2,9114,0,0,high,36,36,120,0,0,0,...,0,0,0,0,0,0,0,24,48,1
3,53228,1,1,low,0,0,0,0,0,0,...,0,0,0,0,0,0,3,3,9,0
4,56280,1,1,mid,9,51,294,0,0,0,...,0,0,0,0,0,0,0,42,141,0


In [4]:
df_test.head()

Unnamed: 0,id,f1,f2,f3,f4,f5,f6,f7,f8,f9,...,f37,f38,f39,f40,f41,f42,f43,f44,f45,f46
0,1,1,1,mid,0,0,0,0,0,0,...,147,0,0,0,0,0,0,6,51,168
1,2,0,1,mid,0,0,0,0,0,0,...,111,0,0,0,0,0,6,21,219,459
2,3,0,1,high,0,0,0,0,0,0,...,102,0,0,0,0,0,0,0,18,27
3,4,0,1,mid,0,0,0,0,0,0,...,78,0,0,0,0,0,0,0,9,9
4,5,1,1,low,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,78,243


In [5]:
Y_train = df_train["label"]
X_train = df_train.drop(["id", "label"], axis=1)

In [6]:
X_test = df_test.drop(["id"], axis=1)

In [7]:
X_train = pd.concat((X_train, pd.get_dummies(X_train["f3"], drop_first=True)), axis=1)
X_train = X_train.drop(["f3"], axis=1)

X_test = pd.concat((X_test, pd.get_dummies(X_test["f3"], drop_first=True)), axis=1)
X_test = X_test.drop(["f3"], axis=1)

In [8]:
class Net(nn.Module):
    def __init__(self, num_input=1, num_output=1, num_hidden_layer=9, num_neuron=20):
        super(Net, self).__init__()
        self.input_layer = nn.Linear(num_input, num_neuron)
        self.hidden_layer = nn.ModuleList([nn.Linear(num_neuron, num_neuron) for _ in range(num_hidden_layer)])
        self.output_layer = nn.Linear(num_neuron, num_output)
        self.act = torch.tanh

    def forward(self, x):
        out_each = self.act(self.input_layer(x))
        for layer in self.hidden_layer:
            out_each = self.act(layer(out_each))
        out = self.output_layer(out_each)
        return out
    
    def predict(self, x):
        pred = F.softmax(self.forward(x), dim=1)
        ans = []
        for t in pred:
            if t[0] > t[1]:
                ans.append(0)
            else:
                ans.append(1)
        return torch.tensor(ans)

In [9]:
X_train = torch.Tensor(X_train.values)
Y_train = torch.LongTensor(Y_train.values)

x_train, x_val, y_train, y_val = train_test_split(X_train, Y_train, test_size=0.2)

train_dataset = data.TensorDataset(x_train, y_train)
train_dataloader = data.DataLoader(train_dataset, batch_size=2000, shuffle=True)

model = Net(47, 2, 4, 64)
lr = 1e-3
optimizer = optim.Adam(model.parameters(), lr)
epoch = 10000
loss_func = nn.CrossEntropyLoss()
mse_x, mse_y = [], []
l2_error_x, l2_error_y = [], []
best_error = 1e9
best_acc = 0.0
for i in tqdm(range(epoch)):
    for input_x, input_y in train_dataloader:
        y_predict = model(input_x)
        loss = loss_func(y_predict, input_y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    if (i+1) % 10 == 0:
        correct = torch.zeros(1).squeeze()
        total = torch.zeros(1).squeeze()
        ans = model.predict(x_val)
        correct += (ans == y_val).sum().float()
        total += len(y_val)
        acc_str = 'Accuracy: %f'%((correct/total).cpu().detach().data.numpy())
        print(acc_str)
        if (correct/total).cpu().detach().data.numpy() > best_acc:
            best_error = (correct/total).cpu().detach().data.numpy()
            state = {
                "state_dict": model.state_dict()
            }
            torch.save(state, f"../../Result/Home/NN/best_model.pkl")

  0%|          | 10/10000 [00:04<1:13:43,  2.26it/s]

Accuracy: 0.815699


  0%|          | 20/10000 [00:08<1:13:26,  2.27it/s]

Accuracy: 0.822964


  0%|          | 30/10000 [00:12<1:16:02,  2.19it/s]

Accuracy: 0.824969


  0%|          | 40/10000 [00:17<1:14:00,  2.24it/s]

Accuracy: 0.827975


  0%|          | 43/10000 [00:18<1:11:04,  2.33it/s]


KeyboardInterrupt: 