In [1]:
%matplotlib inline 
import torch
from torch import nn, optim
from torch.utils import data
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch.nn.functional as F

In [2]:
# データの読み込み
train_data = pd.read_csv("NKI_RS_train_data.csv",index_col=0)
train_label = pd.read_csv("NKI_RS_train_label.csv",index_col = 0)
test_data = pd.read_csv("NKI_RS_test_data.csv", index_col = 1)

In [3]:
train_label = train_label["sex"]
train_label

subject_id
A00054153    0
A00040383    1
A00035337    1
A00064053    1
A00035840    1
            ..
A00037377    1
A00057862    1
A00059527    1
A00061790    1
A00039159    1
Name: sex, Length: 810, dtype: int64

In [4]:
learning_rate = 1e-2
loss_fn = nn.CrossEntropyLoss()

In [5]:
# modelのロード
from model_test import Net
model = Net()
model

Net(
  (net): Sequential(
    (0): Linear(in_features=6670, out_features=3000, bias=True)
    (1): ReLU()
    (2): Linear(in_features=3000, out_features=1500, bias=True)
    (3): ReLU()
    (4): Linear(in_features=1500, out_features=750, bias=True)
    (5): ReLU()
    (6): Linear(in_features=750, out_features=100, bias=True)
    (7): ReLU()
    (8): Linear(in_features=100, out_features=20, bias=True)
    (9): ReLU()
    (10): Linear(in_features=20, out_features=2, bias=True)
  )
)

Dropoutを組み込んだモデルでテスト

In [6]:
from CV_train_eval import cross_val

In [7]:
from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits = 5, shuffle=True, random_state = 0)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num = 0
n_epochs = 100

for train_idx, valid_idx in skf.split(train_data, train_label):
    print(f"This is {num + 1}th fold.--------------------")
    num += 1
    # foldごとのモデルの初期化
    model = Net().to(device)
    # foldごとにモデルの最適化手法のパラメータ引数をちゃんと設定してあげる必要があった
    optimizer = optim.SGD(model.parameters(), lr = learning_rate, momentum = 0.9, weight_decay = 5e-3)

    # データの準備-------------------------------------------------------------
    # train_idx, valid_idxにはインデックスの値が格納されている。
    _train_data = train_data.iloc[train_idx]
    _train_label = train_label.iloc[train_idx]
    _valid_data = train_data.iloc[valid_idx]
    _valid_label = train_label.iloc[valid_idx]
    # データの準備
    # 訓練データをtensor型に変換------------
    train_data_tensor = torch.tensor(np.array(_train_data.astype('f'))) # データのみ
    train_label_tensor = torch.tensor(_train_label)
    # DataLoaderに渡す
    train_tensor = data.TensorDataset(train_data_tensor, train_label_tensor)
    trainloader = data.DataLoader(train_tensor, batch_size = 64)
    # valid
    valid_data_tensor = torch.tensor(np.array(_valid_data.astype('f'))) # データのみ
    valid_label_tensor = torch.tensor(_valid_label)
    # print(valid_data_tensor.size(),valid_label_tensor.size())
    # print("---------")
    valid_tensor = data.TensorDataset(valid_data_tensor, valid_label_tensor)
    validloader = data.DataLoader(valid_tensor, batch_size = 64)
    # ここまででデータの準備完了------------------------------------------------
    # ここから学習
    
    for epoch in range(n_epochs):
        train_loss = 0
        train_acc = 0
        valid_loss = 0
        valid_acc = 0
        auc_score = 0
        
        model.train() # 学習モード <-これテキストとかにあった気がしないがなんだ？？
        # dropoutやbatch normのふるまいが変わる。今回はNNに組み込んでいないから必要ないと思われる
        batch_count = 0
        for xt, yt in trainloader: # ミニバッチずつ計算
            # データをgpuに <-これであってる？？
            xt = xt.to(device)
            yt = yt.to(device)
            
            y_pred = model.forward(xt) # モデルにミニバッチをぶち込む
            loss = loss_fn(y_pred, yt) # 予測結果の損失計算
            # print(f"{batch_count + 1}回目のミニバッチごとのロス:{loss}")
            train_loss += loss.item() * xt.size(0) # ミニバッチのサイズを掛ける
            train_acc += (y_pred.max(1)[1] == yt).sum().item()

            optimizer.zero_grad() # 勾配情報の初期化
            loss.backward() # 誤差逆伝搬
            optimizer.step() # 重みの更新
            # print(batch_count)
            batch_count += 1
        # print(batch_count)
        
        avg_train_loss = train_loss / len(trainloader.dataset)
        avg_train_acc = train_acc / len(trainloader.dataset)
        
        # validation
        model.eval() # <- dropoutやbatch normのふるまいが変わる。今回はNNに組み込んでいないから必要ないと思われる
        with torch.no_grad(): # validationにおいては勾配を計算する必要がない
            for xv, yv in validloader:
                xv = xv.to(device)
                yv = yv.to(device)
                y_pred = model(xv)
                loss = loss_fn(y_pred, yv)
                valid_loss += loss.item() * xv.size(0) # ミニバッチのサイズを掛ける
                valid_acc += (y_pred.max(1)[1] == yv).sum().item()
                

            avg_valid_loss = valid_loss / len(validloader.dataset)
            avg_valid_acc = valid_acc / len(validloader.dataset)
            avg_auc_score = auc_score / len(validloader.dataset)
        if epoch == 0 or (epoch + 1) % 50 == 0:
            print(f"epoch:{epoch + 1},train_loss:{avg_train_loss:.5f}, train_acc:{avg_train_acc:.5f}, val_loss:{avg_valid_loss:.5f},val_acc:{avg_valid_acc:.5f}")
        
# 1epochを経ても重みが更新できていない
# そもそもlossの値が小さすぎるような気がする

This is 1th fold.--------------------
epoch:1,train_loss:0.68682, train_acc:0.60802, val_loss:0.68525,val_acc:0.60494
epoch:50,train_loss:0.66940, train_acc:0.60802, val_loss:0.67084,val_acc:0.60494
epoch:100,train_loss:0.66943, train_acc:0.60802, val_loss:0.67084,val_acc:0.60494
This is 2th fold.--------------------
epoch:1,train_loss:0.69959, train_acc:0.39198, val_loss:0.69641,val_acc:0.39506
epoch:50,train_loss:0.66941, train_acc:0.60802, val_loss:0.67090,val_acc:0.60494
epoch:100,train_loss:0.66943, train_acc:0.60802, val_loss:0.67090,val_acc:0.60494
This is 3th fold.--------------------
epoch:1,train_loss:0.69266, train_acc:0.52778, val_loss:0.69044,val_acc:0.60494
epoch:50,train_loss:0.66960, train_acc:0.60802, val_loss:0.67093,val_acc:0.60494
epoch:100,train_loss:0.66964, train_acc:0.60802, val_loss:0.67095,val_acc:0.60494
This is 4th fold.--------------------
epoch:1,train_loss:0.69132, train_acc:0.60648, val_loss:0.68938,val_acc:0.61111
epoch:50,train_loss:0.67035, train_acc:

In [8]:
from model import Net_simple, Net_dp, Net_bn
model_dp_test = Net_dp()
model_dp_test

Net_dp(
  (net): Sequential(
    (0): Linear(in_features=6670, out_features=3000, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.2, inplace=False)
    (3): Linear(in_features=3000, out_features=1500, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.2, inplace=False)
    (6): Linear(in_features=1500, out_features=750, bias=True)
    (7): ReLU()
    (8): Dropout(p=0.2, inplace=False)
    (9): Linear(in_features=750, out_features=100, bias=True)
    (10): ReLU()
    (11): Dropout(p=0.2, inplace=False)
    (12): Linear(in_features=100, out_features=20, bias=True)
    (13): ReLU()
    (14): Dropout(p=0.2, inplace=False)
    (15): Linear(in_features=20, out_features=2, bias=True)
  )
)

In [9]:
# dropoutを入れたやつ
from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits = 5, shuffle=True, random_state = 0)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num = 0
n_epochs = 100
for train_idx, valid_idx in skf.split(train_data, train_label):
    print(f"This is {num + 1}th fold.--------------------")
    num += 1
    # foldごとのモデルの初期化
    model = Net_dp().to(device)
    # foldごとにモデルの最適化手法のパラメータ引数をちゃんと設定してあげる必要があった
    optimizer = optim.SGD(model.parameters(), lr = learning_rate, momentum = 0.9, weight_decay = 5e-3)

    # データの準備-------------------------------------------------------------
    # train_idx, valid_idxにはインデックスの値が格納されている。
    _train_data = train_data.iloc[train_idx]
    _train_label = train_label.iloc[train_idx]
    _valid_data = train_data.iloc[valid_idx]
    _valid_label = train_label.iloc[valid_idx]
    # データの準備
    # 訓練データをtensor型に変換------------
    train_data_tensor = torch.tensor(np.array(_train_data.astype('f'))) # データのみ
    train_label_tensor = torch.tensor(_train_label)
    # DataLoaderに渡す
    train_tensor = data.TensorDataset(train_data_tensor, train_label_tensor)
    trainloader = data.DataLoader(train_tensor, batch_size = 64)
    # valid
    valid_data_tensor = torch.tensor(np.array(_valid_data.astype('f'))) # データのみ
    valid_label_tensor = torch.tensor(_valid_label)
    # print(valid_data_tensor.size(),valid_label_tensor.size())
    # print("---------")
    valid_tensor = data.TensorDataset(valid_data_tensor, valid_label_tensor)
    validloader = data.DataLoader(valid_tensor, batch_size = 64)
    # ここまででデータの準備完了------------------------------------------------
    # ここから学習
    
    for epoch in range(n_epochs):
        train_loss = 0
        train_acc = 0
        valid_loss = 0
        valid_acc = 0
        auc_score = 0
        
        model.train() # 学習モード <-これテキストとかにあった気がしないがなんだ？？
        # dropoutやbatch normのふるまいが変わる。今回はNNに組み込んでいないから必要ないと思われる
        batch_count = 0
        for xt, yt in trainloader: # ミニバッチずつ計算
            # データをgpuに <-これであってる？？
            xt = xt.to(device)
            yt = yt.to(device)
            
            y_pred = model.forward(xt) # モデルにミニバッチをぶち込む
            loss = loss_fn(y_pred, yt) # 予測結果の損失計算
            # print(f"{batch_count + 1}回目のミニバッチごとのロス:{loss}")
            train_loss += loss.item() * xt.size(0) # ミニバッチのサイズを掛ける
            train_acc += (y_pred.max(1)[1] == yt).sum().item()

            optimizer.zero_grad() # 勾配情報の初期化
            loss.backward() # 誤差逆伝搬
            optimizer.step() # 重みの更新
            # print(batch_count)
            batch_count += 1
        # print(batch_count)
        
        avg_train_loss = train_loss / len(trainloader.dataset)
        avg_train_acc = train_acc / len(trainloader.dataset)
        
        # validation
        model.eval() # <- dropoutやbatch normのふるまいが変わる。今回はNNに組み込んでいないから必要ないと思われる
        with torch.no_grad(): # validationにおいては勾配を計算する必要がない
            for xv, yv in validloader:
                xv = xv.to(device)
                yv = yv.to(device)
                y_pred = model(xv)
                loss = loss_fn(y_pred, yv)
                valid_loss += loss.item() * xv.size(0) # ミニバッチのサイズを掛ける
                valid_acc += (y_pred.max(1)[1] == yv).sum().item()
                

            avg_valid_loss = valid_loss / len(validloader.dataset)
            avg_valid_acc = valid_acc / len(validloader.dataset)
            avg_auc_score = auc_score / len(validloader.dataset)
        if epoch == 0 or (epoch + 1) % 50 == 0:
            print(f"epoch:{epoch + 1},train_loss:{avg_train_loss:.5f}, train_acc:{avg_train_acc:.5f}, val_loss:{avg_valid_loss:.5f},val_acc:{avg_valid_acc:.5f}")

This is 1th fold.--------------------
epoch:1,train_loss:0.70691, train_acc:0.39198, val_loss:0.70297,val_acc:0.39506
epoch:50,train_loss:0.66939, train_acc:0.60802, val_loss:0.67088,val_acc:0.60494
epoch:100,train_loss:0.66952, train_acc:0.60802, val_loss:0.67088,val_acc:0.60494
This is 2th fold.--------------------
epoch:1,train_loss:0.70659, train_acc:0.39198, val_loss:0.70281,val_acc:0.39506
epoch:50,train_loss:0.66982, train_acc:0.60802, val_loss:0.67093,val_acc:0.60494
epoch:100,train_loss:0.66948, train_acc:0.60802, val_loss:0.67094,val_acc:0.60494
This is 3th fold.--------------------
epoch:1,train_loss:0.70238, train_acc:0.39198, val_loss:0.69912,val_acc:0.39506
epoch:50,train_loss:0.66811, train_acc:0.60802, val_loss:0.67087,val_acc:0.60494
epoch:100,train_loss:0.67027, train_acc:0.60802, val_loss:0.67088,val_acc:0.60494
This is 4th fold.--------------------
epoch:1,train_loss:0.69562, train_acc:0.40278, val_loss:0.69258,val_acc:0.61111
epoch:50,train_loss:0.66985, train_acc:

In [10]:
num = 0
n_epochs = 5
for train_idx, valid_idx in skf.split(train_data, train_label):
    print(f"This is {num + 1}th fold.--------------------")
    num += 1
    # foldごとのモデルの初期化
    model = Net_bn().to(device)
    # foldごとにモデルの最適化手法のパラメータ引数をちゃんと設定してあげる必要があった
    optimizer = optim.SGD(model.parameters(), lr = learning_rate, momentum = 0.9, weight_decay = 5e-3)

    # データの準備-------------------------------------------------------------
    # train_idx, valid_idxにはインデックスの値が格納されている。
    _train_data = train_data.iloc[train_idx]
    _train_label = train_label.iloc[train_idx]
    _valid_data = train_data.iloc[valid_idx]
    _valid_label = train_label.iloc[valid_idx]
    # データの準備
    # 訓練データをtensor型に変換------------
    train_data_tensor = torch.tensor(np.array(_train_data.astype('f'))) # データのみ
    train_label_tensor = torch.tensor(_train_label)
    # DataLoaderに渡す
    train_tensor = data.TensorDataset(train_data_tensor, train_label_tensor)
    trainloader = data.DataLoader(train_tensor, batch_size = 64)
    # valid
    valid_data_tensor = torch.tensor(np.array(_valid_data.astype('f'))) # データのみ
    valid_label_tensor = torch.tensor(_valid_label)
    # print(valid_data_tensor.size(),valid_label_tensor.size())
    # print("---------")
    valid_tensor = data.TensorDataset(valid_data_tensor, valid_label_tensor)
    validloader = data.DataLoader(valid_tensor, batch_size = 64)
    # ここまででデータの準備完了------------------------------------------------
    # ここから学習
    
    for epoch in range(n_epochs):
        train_loss = 0
        train_acc = 0
        valid_loss = 0
        valid_acc = 0
        auc_score = 0
        
        model.train() # 学習モード <-これテキストとかにあった気がしないがなんだ？？
        # dropoutやbatch normのふるまいが変わる。今回はNNに組み込んでいないから必要ないと思われる
        batch_count = 0
        for xt, yt in trainloader: # ミニバッチずつ計算
            # データをgpuに <-これであってる？？
            xt = xt.to(device)
            yt = yt.to(device)
            
            y_pred = model.forward(xt) # モデルにミニバッチをぶち込む
            loss = loss_fn(y_pred, yt) # 予測結果の損失計算
            # print(f"{batch_count + 1}回目のミニバッチごとのロス:{loss}")
            train_loss += loss.item() * xt.size(0) # ミニバッチのサイズを掛ける
            _, y_pred = torch.max(y_pred, 1)
            train_acc += torch.sum(y_pred == yt)
            optimizer.zero_grad() # 勾配情報の初期化
            loss.backward() # 誤差逆伝播
            optimizer.step() # 重みの更新
            # print(batch_count)
            batch_count += 1
        # print(batch_count)
        
        avg_train_loss = train_loss / len(trainloader.dataset)
        avg_train_acc = train_acc / len(trainloader.dataset)
        
        # validation
        model.eval() # <- dropoutやbatch normのふるまいが変わる。今回はNNに組み込んでいないから必要ないと思われる
        with torch.no_grad(): # validationにおいては勾配を計算する必要がない
            for xv, yv in validloader:
                xv = xv.to(device)
                yv = yv.to(device)
                y_pred = model(xv)
                loss = loss_fn(y_pred, yv)
                valid_loss += loss.item() * xv.size(0) # ミニバッチのサイズを掛ける
                _, y_pred = torch.max(y_pred, 1)
                valid_acc += torch.sum(y_pred == yv)
                

            avg_valid_loss = valid_loss / len(validloader.dataset)
            avg_valid_acc = valid_acc / len(validloader.dataset)
            avg_auc_score = auc_score / len(validloader.dataset)
        print(f"epoch:{epoch + 1},train_loss:{avg_train_loss:.5f}, train_acc:{avg_train_acc:.5f}, val_loss:{avg_valid_loss:.5f},val_acc:{avg_valid_acc:.5f}")
        # if epoch == 0 or (epoch + 1) % 10 == 0:
        #     print(f"epoch:{epoch + 1},train_loss:{avg_train_loss:.5f}, train_acc:{avg_train_acc:.5f}, val_loss:{avg_valid_loss:.5f},val_acc:{avg_valid_acc:.5f}")

This is 1th fold.--------------------
epoch:1,train_loss:0.66745, train_acc:0.59105, val_loss:0.68919,val_acc:0.60494
epoch:2,train_loss:0.47526, train_acc:0.91667, val_loss:0.70049,val_acc:0.41975
epoch:3,train_loss:0.38710, train_acc:0.98765, val_loss:0.66435,val_acc:0.63580
epoch:4,train_loss:0.34860, train_acc:0.99846, val_loss:0.67228,val_acc:0.60494
epoch:5,train_loss:0.33344, train_acc:1.00000, val_loss:0.64697,val_acc:0.63580
This is 2th fold.--------------------
epoch:1,train_loss:0.66652, train_acc:0.60340, val_loss:0.69238,val_acc:0.52469
epoch:2,train_loss:0.49320, train_acc:0.92901, val_loss:0.67285,val_acc:0.69136
epoch:3,train_loss:0.39594, train_acc:0.98611, val_loss:0.62312,val_acc:0.70370
epoch:4,train_loss:0.34810, train_acc:1.00000, val_loss:0.60226,val_acc:0.68519
epoch:5,train_loss:0.33129, train_acc:1.00000, val_loss:0.59804,val_acc:0.69753
This is 3th fold.--------------------
epoch:1,train_loss:0.66356, train_acc:0.58179, val_loss:0.68495,val_acc:0.60494
epoch:

# 過学習に対処するには？

train_accの挙動は気にしなくてよい。valid_accが下がらなければ過学習ではない。
[double descent][https://www.acceluniverse.com/blog/developers/2020/01/deep-double-descent-where-bigger-models-and-more-data-hurt.html]