In [1]:
%matplotlib inline 
import torch
from torch import nn, optim
import torch.nn.functional as F
from torch.utils import data
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# データ
train_data = pd.read_csv("./dataset/NKI_RS_train_data.csv",index_col=0)
train_label = pd.read_csv("./dataset/NKI_RS_train_label.csv",index_col = 0)
test_data = pd.read_csv("./dataset/NKI_RS_test_data.csv", index_col = 0)
train_label = train_label["sex"]

In [3]:
from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits = 5, shuffle=True, random_state = 0)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# # 訓練用の関数
# from train_eval_loop import train_valid_loop
# データをtensor型にかえる関数
from datapreparation import df2tensor

In [4]:
# modelのimport
from models_NN import Net_deeper, Net_deeper2, Net_nky, Net_nky2, Net_deeper_elu, Net_deeper_elu2, Net_deeper_mish
# rvsd2...ReLUをELUに変更
# deeper_mish... Net_deeperのReLUをELUに変更

# 訓練/検証用関数,train loss基準でEarly stop
from train_eval_loop_lossbassed import train_valid_loop_lossbased

In [5]:
# いちばん単純なモデル
num = 0
n_epoch = 500
for train_idx, valid_idx in skf.split(train_data, train_label):
    print(f"--------------------This is {num + 1}th fold.10epochごとの結果を以下に出力--------------------")
    # modelの初期化
    model = Net_deeper().to(device)
    optimizer = optim.SGD(model.parameters(), lr = 1e-2, momentum = 0.9, weight_decay = 5e-3)
    best_score = 0
    # 分割した訓練データと検証データをtensor型に変換
    trainloader, validloader, valid_data, valid_label = df2tensor(
        train_data.iloc[train_idx], train_label.iloc[train_idx],
        train_data.iloc[valid_idx], train_label.iloc[valid_idx], batch_size = 64)
    # モデルの訓練と評価, earlystoppipngによっていちばんauc_scoreの高かった
    train_acc_list, train_loss_list, valid_acc_list, valid_loss_list, auc_score_list = train_valid_loop_lossbased(
        trainloader, validloader, valid_data, valid_label, model, n_epoch, optimizer)
    auc_score = np.max(auc_score_list)
    if auc_score > best_score:
        best_score = auc_score
        # この辺のfoldごとの最良モデルの読み込みをもう少しうまく、簡潔に書けないか
        best_model = model.load_state_dict(torch.load("model.pth"))
    for a,b,c,d,e in zip(
        train_loss_list, train_acc_list,
        valid_loss_list, valid_acc_list, auc_score_list):
        print(f"train loss:{a:.5f},train acc:{b:.5f}, valid loss:{c:.5f}, valid acc:{d:.5f}, auc_score:{e:.5f}")
    num += 1

--------------------This is 1th fold.10epochごとの結果を以下に出力--------------------
Early Stopping is working in 45th epoch.
Best auc score in validation is 0.7209821428571429
train loss:0.65390,train acc:0.62037, valid loss:0.69616, valid acc:0.39506, auc_score:0.66247
train loss:0.31667,train acc:0.99846, valid loss:0.61905, valid acc:0.66667, auc_score:0.58147
train loss:0.31460,train acc:1.00000, valid loss:0.62005, valid acc:0.65432, auc_score:0.72098
train loss:0.31449,train acc:1.00000, valid loss:0.62075, valid acc:0.66049, auc_score:0.67650
train loss:0.31448,train acc:1.00000, valid loss:0.62114, valid acc:0.64815, auc_score:0.69627
--------------------This is 2th fold.10epochごとの結果を以下に出力--------------------
Early Stopping is working in 49th epoch.
Best auc score in validation is 0.7637117346938775
train loss:0.67513,train acc:0.57099, valid loss:0.73442, valid acc:0.39506, auc_score:0.68264
train loss:0.31557,train acc:1.00000, valid loss:0.59525, valid acc:0.68519, auc_score:0.66837

In [9]:
model = Net_deeper().to(device)
model.eval()
model.load_state_dict(torch.load("model.pth"))
test_data_tensor = torch.tensor(np.array(test_data.astype('f')))
prediction_deeper = model.forward(test_data_tensor.to(device))
# print(bn_prediction)
print(prediction_deeper[:,1])
df_submission = pd.read_csv("./dataset/sample_submission.csv",header = 0)
df_submission['sex'] = prediction_deeper[:, 1].to('cpu').detach().numpy().copy()
df_submission.to_csv("./predicted_label/DNN_deeper.csv")

tensor([4.7456e-01, 9.8483e-01, 9.9209e-01, 4.3316e-02, 9.8578e-01, 3.6843e-01,
        9.7884e-01, 3.4610e-01, 9.8681e-01, 9.8632e-01, 9.9123e-01, 9.8585e-01,
        8.5624e-01, 9.5454e-01, 9.7807e-01, 8.6344e-01, 4.3302e-01, 7.7475e-01,
        9.2072e-01, 9.3118e-01, 9.3594e-01, 5.1186e-01, 8.4925e-01, 8.8699e-01,
        9.8686e-01, 1.1044e-01, 3.3469e-01, 2.4487e-01, 6.1847e-01, 9.3910e-01,
        4.9001e-01, 6.2445e-01, 1.8359e-01, 8.6194e-01, 3.7070e-04, 9.8903e-01,
        2.3598e-01, 9.6812e-01, 9.4321e-01, 2.8989e-01, 9.8535e-01, 6.4283e-01,
        1.3371e-01, 5.1883e-01, 8.6709e-01, 2.1564e-01, 9.8754e-01, 9.9645e-01,
        9.6631e-01, 9.8857e-01, 7.6193e-01, 2.5030e-01, 9.3519e-01, 9.3118e-01,
        9.3099e-01, 5.3924e-02, 9.8720e-01, 9.8569e-01, 2.7300e-02, 9.7733e-01,
        8.1877e-01, 1.0120e-01, 2.7151e-03, 9.6695e-01, 6.4341e-02, 1.6251e-02,
        9.8250e-01, 9.9713e-01, 1.8451e-01, 9.9521e-01, 5.5333e-01, 9.8303e-01,
        8.1353e-02, 9.9153e-01, 1.7365e-

In [10]:
# linear -> batch norm -> relu -> dropout
num = 0
n_epoch = 500
for train_idx, valid_idx in skf.split(train_data, train_label):
    print(f"--------------------This is {num + 1}th fold.10epochごとの結果を以下に出力--------------------")
    # modelの初期化
    model = Net_deeper2().to(device)
    optimizer = optim.SGD(model.parameters(), lr = 1e-2, momentum = 0.9, weight_decay = 5e-3)
    best_score = 0
    # 分割した訓練データと検証データをtensor型に変換
    trainloader, validloader, valid_data, valid_label = df2tensor(
        train_data.iloc[train_idx], train_label.iloc[train_idx],
        train_data.iloc[valid_idx], train_label.iloc[valid_idx], batch_size = 64)
    # モデルの訓練と評価, earlystoppipngによっていちばんauc_scoreの高かった
    train_acc_list, train_loss_list, valid_acc_list, valid_loss_list, auc_score_list = train_valid_loop_lossbased(
        trainloader, validloader, valid_data, valid_label, model, n_epoch, optimizer)
    auc_score = np.max(auc_score_list)
    if auc_score > best_score:
        best_score = auc_score
        # この辺のfoldごとの最良モデルの読み込みをもう少しうまく、簡潔に書けないか
        best_model = model.load_state_dict(torch.load("model.pth"))
    for a,b,c,d,e in zip(
        train_loss_list, train_acc_list,
        valid_loss_list, valid_acc_list, auc_score_list):
        print(f"train loss:{a:.5f},train acc:{b:.5f}, valid loss:{c:.5f}, valid acc:{d:.5f}, auc_score:{e:.5f}")
    num += 1

--------------------This is 1th fold.10epochごとの結果を以下に出力--------------------
Early Stopping is working in 35th epoch.
Best auc score in validation is 0.7380420918367346
train loss:0.69197,train acc:0.53858, valid loss:0.68846, valid acc:0.60494, auc_score:0.58099
train loss:0.32387,train acc:1.00000, valid loss:0.61292, valid acc:0.68519, auc_score:0.63122
train loss:0.31812,train acc:1.00000, valid loss:0.60615, valid acc:0.68519, auc_score:0.68351
train loss:0.31694,train acc:1.00000, valid loss:0.60880, valid acc:0.67901, auc_score:0.69372
--------------------This is 2th fold.10epochごとの結果を以下に出力--------------------
Early Stopping is working in 34th epoch.
Best auc score in validation is 0.7648278061224489
train loss:0.69845,train acc:0.52932, valid loss:0.69516, valid acc:0.39506, auc_score:0.54273
train loss:0.32284,train acc:1.00000, valid loss:0.59669, valid acc:0.67901, auc_score:0.62803
train loss:0.31772,train acc:1.00000, valid loss:0.60586, valid acc:0.67901, auc_score:0.69467

In [12]:
model = Net_deeper2().to(device)
model.eval()
model.load_state_dict(torch.load("model.pth"))
prediction_deeper2 = model.forward(test_data_tensor.to(device))
# print(bn_prediction)
print(prediction_deeper2[:,1])
df_submission['sex'] = prediction_deeper2[:, 1].to('cpu').detach().numpy().copy()
df_submission.to_csv("./predicted_label/DNN_deeper2.csv")

tensor([0.9344, 0.9855, 0.9910, 0.0326, 0.9873, 0.4371, 0.9886, 0.5073, 0.9934,
        0.9900, 0.9917, 0.9890, 0.9624, 0.9457, 0.9933, 0.7855, 0.3873, 0.9350,
        0.8859, 0.9800, 0.9259, 0.4475, 0.9139, 0.7324, 0.9910, 0.0563, 0.4162,
        0.3049, 0.3359, 0.9735, 0.8429, 0.4289, 0.3921, 0.9762, 0.0057, 0.9940,
        0.0861, 0.9928, 0.7180, 0.0444, 0.9937, 0.4845, 0.0347, 0.1527, 0.8303,
        0.1766, 0.9893, 0.9955, 0.9843, 0.9876, 0.6300, 0.2154, 0.9499, 0.9785,
        0.6369, 0.1117, 0.9622, 0.9667, 0.0679, 0.8967, 0.8619, 0.0822, 0.0266,
        0.9902, 0.0919, 0.0321, 0.9888, 0.9982, 0.0948, 0.9964, 0.2233, 0.9912,
        0.0808, 0.9959, 0.7012, 0.9242, 0.0212, 0.9905, 0.9850, 0.9901, 0.8925],
       device='cuda:0', grad_fn=<SelectBackward>)


In [13]:
# nkyくんのモデル
# linear -> batch norm -> relu -> dropout
num = 0
n_epoch = 500
for train_idx, valid_idx in skf.split(train_data, train_label):
    print(f"--------------------This is {num + 1}th fold.10epochごとの結果を以下に出力--------------------")
    # modelの初期化
    model = Net_nky().to(device)
    optimizer = optim.SGD(model.parameters(), lr = 1e-2, momentum = 0.9, weight_decay = 5e-3)
    best_score = 0
    # 分割した訓練データと検証データをtensor型に変換
    trainloader, validloader, valid_data, valid_label = df2tensor(
        train_data.iloc[train_idx], train_label.iloc[train_idx],
        train_data.iloc[valid_idx], train_label.iloc[valid_idx], batch_size = 64)
    # モデルの訓練と評価
    train_acc_list, train_loss_list, valid_acc_list, valid_loss_list, auc_score_list = train_valid_loop_lossbased(
        trainloader, validloader, valid_data, valid_label, model, n_epoch, optimizer)
    auc_score = np.max(auc_score_list)
    if auc_score > best_score:
        best_score = auc_score
        best_model = model.load_state_dict(torch.load("model.pth"))
    for a,b,c,d,e in zip(
        train_loss_list, train_acc_list,
        valid_loss_list, valid_acc_list, auc_score_list):
        print(f"train loss:{a:.5f},train acc:{b:.5f}, valid loss:{c:.5f}, valid acc:{d:.5f}, auc_score:{e:.5f}")
    num += 1

--------------------This is 1th fold.10epochごとの結果を以下に出力--------------------


RuntimeError: CUDA error: an illegal memory access was encountered
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.

In [None]:
調べてもあまりリファレンスの見つからないエラーで動かない。要再調査

In [None]:
model = Net_deeper2().to(device)
model.eval()
model.load_state_dict(torch.load("model.pth"))
prediction_nky = model.forward(test_data_tensor.to(device))
# print(bn_prediction)
print(prediction_deeper2[:,1])
df_submission['sex'] = prediction_nky[:, 1].to('cpu').detach().numpy().copy()
df_submission.to_csv("./predicted_label/DNN_nky.csv")