In [2]:
%matplotlib inline
import torch
from torch import nn, optim
import torch.nn.functional as F
from torch.utils import data
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [3]:
# データ
train_data = pd.read_csv("./dataset/NKI_RS_train_data.csv",index_col=0)
train_label = pd.read_csv("./dataset/NKI_RS_train_label.csv",index_col = 0)
test_data = pd.read_csv("./dataset/NKI_RS_test_data.csv", index_col = 0)
train_label = train_label["sex"]

In [5]:
from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits = 5, shuffle=True, random_state = 0)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# # 訓練用の関数
# from train_eval_loop import train_valid_loop
# データをtensor型にかえる関数
from datapreparation import df2tensor

In [6]:
# modelのimport
from models_NN import Net_deeper_elu, Net_deeper_elu2, Net_deeper_mish
# rvsd2...ReLUをELUに変更
# deeper_mish... Net_deeperのReLUをELUに変更

# 訓練/検証用関数,train loss基準でEarly stop
from train_eval_loop_lossbassed import train_valid_loop_lossbased

In [7]:
# nky君のモデル
num = 0
n_epoch = 500
for train_idx, valid_idx in skf.split(train_data, train_label):
    print(f"--------------------This is {num + 1}th fold.10epochごとの結果を以下に出力--------------------")
    # modelの初期化
    model = Net_deeper_elu().to(device)
    optimizer = optim.SGD(model.parameters(), lr = 1e-2, momentum = 0.9, weight_decay = 5e-3)
    best_score = 0
    # 分割した訓練データと検証データをtensor型に変換
    trainloader, validloader, valid_data, valid_label = df2tensor(
        train_data.iloc[train_idx], train_label.iloc[train_idx],
        train_data.iloc[valid_idx], train_label.iloc[valid_idx], batch_size = 64)
    # モデルの訓練と評価, earlystoppipngによっていちばんauc_scoreの高かった
    train_acc_list, train_loss_list, valid_acc_list, valid_loss_list, auc_score_list = train_valid_loop_lossbased(
        trainloader, validloader, valid_data, valid_label, model, n_epoch, optimizer)
    auc_score = np.max(auc_score_list)
    if auc_score > best_score:
        best_score = auc_score
        # この辺のfoldごとの最良モデルの読み込みをもう少しうまく、簡潔に書けないか
        best_model = model.load_state_dict(torch.load("model.pth"))
    for a,b,c,d,e in zip(
        train_loss_list, train_acc_list,
        valid_loss_list, valid_acc_list, auc_score_list):
        print(f"train loss:{a:.5f},train acc:{b:.5f}, valid loss:{c:.5f}, valid acc:{d:.5f}, auc_score:{e:.5f}")
    num += 1

--------------------This is 1th fold.10epochごとの結果を以下に出力--------------------
Early Stopping is working in 48th epoch.
Best auc score in validation is 0.7606823979591836
train loss:0.64525,train acc:0.63272, valid loss:0.67704, valid acc:0.61111, auc_score:0.71524
train loss:0.32740,train acc:0.98765, valid loss:0.61043, valid acc:0.67284, auc_score:0.70727
train loss:0.32355,train acc:0.99074, valid loss:0.61423, valid acc:0.67284, auc_score:0.76068
train loss:0.32194,train acc:0.99228, valid loss:0.61715, valid acc:0.67284, auc_score:0.70823
train loss:0.32041,train acc:0.99383, valid loss:0.62393, valid acc:0.64815, auc_score:0.75781
--------------------This is 2th fold.10epochごとの結果を以下に出力--------------------
Early Stopping is working in 62th epoch.
Best auc score in validation is 0.8279655612244897
train loss:0.64359,train acc:0.61111, valid loss:0.67433, valid acc:0.63580, auc_score:0.71237
train loss:0.32124,train acc:0.99383, valid loss:0.57091, valid acc:0.72840, auc_score:0.79257

In [8]:
model = Net_deeper_elu().to(device)
model.eval()
model.load_state_dict(torch.load("model.pth"))
test_data_tensor = torch.tensor(np.array(test_data.astype('f')))
prediction_deeper_elu = model.forward(test_data_tensor.to(device))
# print(bn_prediction)
print(prediction_deeper_elu[:,1])
df_submission = pd.read_csv("./dataset/sample_submission.csv",header = 0)
df_submission['sex'] = prediction_deeper_elu[:, 1].to('cpu').detach().numpy().copy()
df_submission.to_csv("./predicted_label/DNN_deeper_elu.csv")

tensor([8.3489e-01, 8.4105e-01, 9.9968e-01, 5.8382e-04, 9.9856e-01, 3.6883e-02,
        6.7456e-01, 7.3420e-02, 9.9945e-01, 9.9560e-01, 9.8923e-01, 9.9536e-01,
        9.3438e-01, 9.0294e-01, 9.7370e-01, 5.3641e-01, 3.6033e-03, 1.4421e-01,
        3.5614e-01, 9.6844e-01, 9.8661e-01, 1.1420e-01, 3.8427e-02, 7.5880e-01,
        9.9646e-01, 7.9554e-02, 6.0484e-01, 1.6634e-01, 3.8393e-03, 8.9741e-01,
        6.0413e-01, 2.6333e-02, 4.3738e-01, 9.2383e-01, 3.4997e-06, 9.9754e-01,
        3.8307e-01, 9.9389e-01, 9.6546e-01, 3.2168e-03, 9.9105e-01, 1.8054e-03,
        1.2829e-02, 2.7576e-01, 2.1496e-02, 8.4228e-02, 9.8797e-01, 9.9861e-01,
        9.5973e-01, 9.9672e-01, 1.2004e-01, 3.1759e-01, 9.8623e-01, 9.6161e-01,
        9.5716e-01, 1.7287e-02, 9.9784e-01, 9.9787e-01, 3.2432e-04, 2.7872e-02,
        1.6335e-01, 2.3910e-03, 4.7581e-04, 7.3343e-01, 5.6915e-01, 7.3455e-03,
        8.8785e-01, 9.9974e-01, 1.2870e-02, 9.9586e-01, 1.1359e-01, 9.9792e-01,
        6.3772e-02, 9.9966e-01, 9.4792e-

In [9]:
# elu ordered(linear -> batch norm -> elu -> dropout)
num = 0
n_epoch = 500
for train_idx, valid_idx in skf.split(train_data, train_label):
    print(f"--------------------This is {num + 1}th fold.10epochごとの結果を以下に出力--------------------")
    # modelの初期化
    model = Net_deeper_elu2().to(device)
    optimizer = optim.SGD(model.parameters(), lr = 1e-2, momentum = 0.9, weight_decay = 5e-3)
    best_score = 0
    # 分割した訓練データと検証データをtensor型に変換
    trainloader, validloader, valid_data, valid_label = df2tensor(
        train_data.iloc[train_idx], train_label.iloc[train_idx],
        train_data.iloc[valid_idx], train_label.iloc[valid_idx], batch_size = 64)
    # モデルの訓練と評価, earlystoppipngによっていちばんauc_scoreの高かった
    train_acc_list, train_loss_list, valid_acc_list, valid_loss_list, auc_score_list = train_valid_loop_lossbased(
        trainloader, validloader, valid_data, valid_label, model, n_epoch, optimizer)
    auc_score = np.max(auc_score_list)
    if auc_score > best_score:
        best_score = auc_score
        # この辺のfoldごとの最良モデルの読み込みをもう少しうまく、簡潔に書けないか
        best_model = model.load_state_dict(torch.load("model.pth"))
    for a,b,c,d,e in zip(
        train_loss_list, train_acc_list,
        valid_loss_list, valid_acc_list, auc_score_list):
        print(f"train loss:{a:.5f},train acc:{b:.5f}, valid loss:{c:.5f}, valid acc:{d:.5f}, auc_score:{e:.5f}")
    num += 1

--------------------This is 1th fold.10epochごとの結果を以下に出力--------------------
Early Stopping is working in 35th epoch.
Best auc score in validation is 0.7598852040816327
train loss:0.65546,train acc:0.61111, valid loss:0.67166, valid acc:0.64198, auc_score:0.70153
train loss:0.31781,train acc:0.99846, valid loss:0.61189, valid acc:0.69136, auc_score:0.65242
train loss:0.31509,train acc:1.00000, valid loss:0.60083, valid acc:0.69136, auc_score:0.75989
train loss:0.31471,train acc:1.00000, valid loss:0.60878, valid acc:0.69136, auc_score:0.71158
--------------------This is 2th fold.10epochごとの結果を以下に出力--------------------
Early Stopping is working in 34th epoch.
Best auc score in validation is 0.8066007653061225
train loss:0.65288,train acc:0.59877, valid loss:0.68569, valid acc:0.59259, auc_score:0.68288
train loss:0.31935,train acc:0.99691, valid loss:0.56532, valid acc:0.73457, auc_score:0.78284
train loss:0.31798,train acc:0.99691, valid loss:0.56589, valid acc:0.73457, auc_score:0.76738

In [10]:
model = Net_deeper_elu2().to(device)
model.eval()
model.load_state_dict(torch.load("model.pth"))
test_data_tensor = torch.tensor(np.array(test_data.astype('f')))
prediction_deeper_elu2 = model.forward(test_data_tensor.to(device))
df_submission = pd.read_csv("./dataset/sample_submission.csv",header = 0)
df_submission['sex'] = prediction_deeper_elu2[:, 1].to('cpu').detach().numpy().copy()
df_submission.to_csv("./predicted_label/DNN_deeper_elu2.csv")

In [11]:
# mishを使用
num = 0
n_epoch = 500
for train_idx, valid_idx in skf.split(train_data, train_label):
    print(f"--------------------This is {num + 1}th fold.10epochごとの結果を以下に出力--------------------")
    # modelの初期化
    model = Net_deeper_mish().to(device)
    optimizer = optim.SGD(model.parameters(), lr = 1e-2, momentum = 0.9, weight_decay = 5e-3)
    best_score = 0
    # 分割した訓練データと検証データをtensor型に変換
    trainloader, validloader, valid_data, valid_label = df2tensor(
        train_data.iloc[train_idx], train_label.iloc[train_idx],
        train_data.iloc[valid_idx], train_label.iloc[valid_idx], batch_size = 64)
    # モデルの訓練と評価, earlystoppipngによっていちばんauc_scoreの高かった
    train_acc_list, train_loss_list, valid_acc_list, valid_loss_list, auc_score_list = train_valid_loop_lossbased(
        trainloader, validloader, valid_data, valid_label, model, n_epoch, optimizer)
    auc_score = np.max(auc_score_list)
    if auc_score > best_score:
        best_score = auc_score
        # この辺のfoldごとの最良モデルの読み込みをもう少しうまく、簡潔に書けないか
        best_model = model.load_state_dict(torch.load("model.pth"))
    for a,b,c,d,e in zip(
        train_loss_list, train_acc_list,
        valid_loss_list, valid_acc_list, auc_score_list):
        print(f"train loss:{a:.5f},train acc:{b:.5f}, valid loss:{c:.5f}, valid acc:{d:.5f}, auc_score:{e:.5f}")
    num += 1

--------------------This is 1th fold.10epochごとの結果を以下に出力--------------------
Early Stopping is working in 50th epoch.
Best auc score in validation is 0.7745535714285715
train loss:0.63901,train acc:0.64043, valid loss:0.68759, valid acc:0.59877, auc_score:0.68240
train loss:0.31831,train acc:0.99691, valid loss:0.59601, valid acc:0.67284, auc_score:0.74011
train loss:0.31761,train acc:0.99691, valid loss:0.59520, valid acc:0.67901, auc_score:0.77041
train loss:0.31746,train acc:0.99691, valid loss:0.59481, valid acc:0.67901, auc_score:0.77455
train loss:0.31742,train acc:0.99691, valid loss:0.59473, valid acc:0.69136, auc_score:0.75207
train loss:0.31744,train acc:0.99691, valid loss:0.59490, valid acc:0.69136, auc_score:0.74665
--------------------This is 2th fold.10epochごとの結果を以下に出力--------------------
Early Stopping is working in 52th epoch.
Best auc score in validation is 0.8042091836734694
train loss:0.65364,train acc:0.63272, valid loss:0.68768, valid acc:0.62346, auc_score:0.72417

In [12]:
model = Net_deeper_mish().to(device)
model.eval()
model.load_state_dict(torch.load("model.pth"))
test_data_tensor = torch.tensor(np.array(test_data.astype('f')))
prediction_deeper_mish = model.forward(test_data_tensor.to(device))
print(prediction_deeper_elu[:,1])
df_submission['sex'] = prediction_deeper_mish[:, 1].to('cpu').detach().numpy().copy()
df_submission.to_csv("./predicted_label/DNN_deeper_mish.csv")

tensor([8.3489e-01, 8.4105e-01, 9.9968e-01, 5.8382e-04, 9.9856e-01, 3.6883e-02,
        6.7456e-01, 7.3420e-02, 9.9945e-01, 9.9560e-01, 9.8923e-01, 9.9536e-01,
        9.3438e-01, 9.0294e-01, 9.7370e-01, 5.3641e-01, 3.6033e-03, 1.4421e-01,
        3.5614e-01, 9.6844e-01, 9.8661e-01, 1.1420e-01, 3.8427e-02, 7.5880e-01,
        9.9646e-01, 7.9554e-02, 6.0484e-01, 1.6634e-01, 3.8393e-03, 8.9741e-01,
        6.0413e-01, 2.6333e-02, 4.3738e-01, 9.2383e-01, 3.4997e-06, 9.9754e-01,
        3.8307e-01, 9.9389e-01, 9.6546e-01, 3.2168e-03, 9.9105e-01, 1.8054e-03,
        1.2829e-02, 2.7576e-01, 2.1496e-02, 8.4228e-02, 9.8797e-01, 9.9861e-01,
        9.5973e-01, 9.9672e-01, 1.2004e-01, 3.1759e-01, 9.8623e-01, 9.6161e-01,
        9.5716e-01, 1.7287e-02, 9.9784e-01, 9.9787e-01, 3.2432e-04, 2.7872e-02,
        1.6335e-01, 2.3910e-03, 4.7581e-04, 7.3343e-01, 5.6915e-01, 7.3455e-03,
        8.8785e-01, 9.9974e-01, 1.2870e-02, 9.9586e-01, 1.1359e-01, 9.9792e-01,
        6.3772e-02, 9.9966e-01, 9.4792e-