In [11]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import torch
import torch.nn as nn
import torch.optim as optim

import optuna

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix,accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_curve, auc,matthews_corrcoef, precision_recall_curve,roc_auc_score



データ読み取り

In [12]:
df=pd.read_csv('../../data/learning_data.csv',index_col=0)

X=df.drop(columns='dengue',axis=1).values
y=df['dengue'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1,random_state=42)

#torchテンソルに変換
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32)


In [13]:
class DNN_3(nn.Module):
    def __init__(self, input_dim,hidden_units,dropout1,dropout2,dropout3,dropout4):
        super(DNN_3, self).__init__()
        self.layer1 = nn.Linear(input_dim,hidden_units)
        self.layer2 = nn.Linear(hidden_units,hidden_units)
        self.layer3 = nn.Linear(hidden_units, hidden_units)
        self.layer4 = nn.Linear(hidden_units, hidden_units)
        self.output_layer = nn.Linear(hidden_units, 1)

        self.dropout1 = nn.Dropout(dropout1)
        self.dropout2 = nn.Dropout(dropout2)
        self.dropout3 = nn.Dropout(dropout3)
        self.dropout4 = nn.Dropout(dropout4)


        self.bn1 = nn.BatchNorm1d(hidden_units)
        self.bn2 = nn.BatchNorm1d(hidden_units)
        self.bn3 = nn.BatchNorm1d(hidden_units)
        self.bn4 = nn.BatchNorm1d(hidden_units)

        
        
    def forward(self, x):
        x = torch.relu(self.bn1(self.layer1(x)))
        x = self.dropout1(x)
        
        x = torch.relu(self.bn2(self.layer2(x)))
        x = self.dropout2(x)

        x = torch.relu(self.bn3(self.layer3(x)))
        x = self.dropout3(x)

        x = torch.relu(self.bn4(self.layer4(x)))
        x = self.dropout4(x)

        x = torch.sigmoid(self.output_layer(x)) 
        return x
    

学習データセットの作成

In [14]:
train_dataset = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=32, shuffle=True)

val_dataset = torch.utils.data.TensorDataset(X_val_tensor, y_val_tensor)
val_loader = torch.utils.data.DataLoader(dataset=val_dataset, batch_size=32, shuffle=False)

Optunaの設定

In [15]:
def objective(trial):
    hidden_units = trial.suggest_int("hidden_units", 4, 512, step=4)
    dropout1 = trial.suggest_float("dropout1", 0.1, 0.5, step=0.05)
    dropout2 = trial.suggest_float("dropout2", 0.1, 0.5, step=0.05)
    dropout3 = trial.suggest_float("dropout3", 0.1, 0.5, step=0.05)
    dropout4 = trial.suggest_float("dropout4", 0.1, 0.5, step=0.05)
    learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
    weight_decay=trial.suggest_loguniform("weight_decay",1e-5,1)
    

    model = DNN_3(input_dim=X_train_tensor.shape[1], hidden_units=hidden_units, dropout1=dropout1,dropout2=dropout2,dropout3=dropout3,dropout4=dropout4).to(device)

    criterion = nn.BCELoss()
    optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate,weight_decay=weight_decay)

    num_epochs = 100
    for epoch in range(num_epochs):
        model.train()
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            outputs = model(X_batch).squeeze()
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()

    
    model.eval()
    val_true, val_pred, val_prob = [], [], []
    with torch.no_grad():
        for X_val, y_val in val_loader:
            X_val, y_val = X_val.to(device), y_val.to(device)
            val_outputs = model(X_val).squeeze()
            predictions = (val_outputs >= 0.5).float()
            val_true.extend(y_val.cpu().numpy())
            val_pred.extend(predictions.cpu().numpy())
            val_prob.extend(val_outputs.cpu().numpy())


    accuracy = accuracy_score(val_true, val_pred)
    precision = precision_score(val_true, val_pred)
    recall = recall_score(val_true, val_pred)
    f1 = f1_score(val_true, val_pred)
    mcc = matthews_corrcoef(val_true, val_pred)
    specificity = recall_score(val_true, val_pred, pos_label=0)

    # ログ
    print(f'Accuracy: {accuracy * 100:.2f}%')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1 Score: {f1:.4f}')
    print(f'Matthews Correlation Coefficient: {mcc:.4f}')
    print(f'Specificity: {specificity:.4f}')

    return f1


In [None]:
# 使用可能なGPUの数を取得
num_gpus = torch.cuda.device_count()

if num_gpus == 0:
    print("使用可能なGPUはありません。")
else:
    print(f"使用可能なGPUの数: {num_gpus}")
    for i in range(num_gpus):
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
        print(f"  メモリ使用状況: {torch.cuda.memory_allocated(i) / 1024**2:.2f} MB / {torch.cuda.get_device_properties(i).total_memory / 1024**2:.2f} MB")
        print(f"  CUDA対応バージョン: {torch.cuda.get_device_properties(i).major}.{torch.cuda.get_device_properties(i).minor}")


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

study = optuna.create_study(direction="maximize")  
study.optimize(objective, n_trials=100)


print("Best Parameters: ", study.best_params)
print("Best Validation F1: ", study.best_value)


[I 2024-11-28 15:44:51,722] A new study created in memory with name: no-name-8523f4d1-f170-48e3-9185-9470ec4447d2
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
  weight_decay=trial.suggest_loguniform("weight_decay",1e-5,1)
[I 2024-11-28 15:45:45,361] Trial 0 finished with value: 0.5132075471698113 and parameters: {'hidden_units': 344, 'dropout1': 0.1, 'dropout2': 0.25, 'dropout3': 0.1, 'dropout4': 0.1, 'learning_rate': 7.337099987833251e-05, 'weight_decay': 0.0004624435969868201}. Best is trial 0 with value: 0.5132075471698113.


Accuracy: 63.25%
Precision: 0.4892
Recall: 0.5397
F1 Score: 0.5132
Matthews Correlation Coefficient: 0.2198
Specificity: 0.6844


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
  weight_decay=trial.suggest_loguniform("weight_decay",1e-5,1)
[I 2024-11-28 15:46:37,369] Trial 1 finished with value: 0.4351464435146444 and parameters: {'hidden_units': 248, 'dropout1': 0.25, 'dropout2': 0.15000000000000002, 'dropout3': 0.4, 'dropout4': 0.1, 'learning_rate': 0.0001531878856797872, 'weight_decay': 0.005345524835241412}. Best is trial 0 with value: 0.5132075471698113.


Accuracy: 61.54%
Precision: 0.4602
Recall: 0.4127
F1 Score: 0.4351
Matthews Correlation Coefficient: 0.1454
Specificity: 0.7289


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
  weight_decay=trial.suggest_loguniform("weight_decay",1e-5,1)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2024-11-28 15:47:28,416] Trial 2 finished with value: 0.0 and parameters: {'hidden_units': 500, 'dropout1': 0.4, 'dropout2': 0.25, 'dropout3': 0.25, 'dropout4': 0.4, 'learning_rate': 0.0884615177421434, 'weight_decay': 1.5441636953050647e-05}. Best is trial 0 with value: 0.5132075471698113.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
  weight_decay=trial.suggest_loguniform("weight_decay",1e-5,1)


Accuracy: 64.10%
Precision: 0.0000
Recall: 0.0000
F1 Score: 0.0000
Matthews Correlation Coefficient: 0.0000
Specificity: 1.0000


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2024-11-28 15:48:20,358] Trial 3 finished with value: 0.0 and parameters: {'hidden_units': 404, 'dropout1': 0.35, 'dropout2': 0.4, 'dropout3': 0.2, 'dropout4': 0.1, 'learning_rate': 0.0005224524456086983, 'weight_decay': 0.03249029720986691}. Best is trial 0 with value: 0.5132075471698113.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
  weight_decay=trial.suggest_loguniform("weight_decay",1e-5,1)


Accuracy: 64.10%
Precision: 0.0000
Recall: 0.0000
F1 Score: 0.0000
Matthews Correlation Coefficient: 0.0000
Specificity: 1.0000


[I 2024-11-28 15:49:10,290] Trial 4 finished with value: 0.528169014084507 and parameters: {'hidden_units': 368, 'dropout1': 0.1, 'dropout2': 0.15000000000000002, 'dropout3': 0.4, 'dropout4': 0.1, 'learning_rate': 0.00013656988234243866, 'weight_decay': 0.00010232803009052427}. Best is trial 4 with value: 0.528169014084507.


Accuracy: 61.82%
Precision: 0.4747
Recall: 0.5952
F1 Score: 0.5282
Matthews Correlation Coefficient: 0.2182
Specificity: 0.6311


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
  weight_decay=trial.suggest_loguniform("weight_decay",1e-5,1)
[I 2024-11-28 15:49:59,092] Trial 5 finished with value: 0.5551601423487544 and parameters: {'hidden_units': 192, 'dropout1': 0.5, 'dropout2': 0.45000000000000007, 'dropout3': 0.5, 'dropout4': 0.5, 'learning_rate': 3.571265934827454e-05, 'weight_decay': 0.048733250852647604}. Best is trial 5 with value: 0.5551601423487544.


Accuracy: 64.39%
Precision: 0.5032
Recall: 0.6190
F1 Score: 0.5552
Matthews Correlation Coefficient: 0.2674
Specificity: 0.6578


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
  weight_decay=trial.suggest_loguniform("weight_decay",1e-5,1)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2024-11-28 15:50:46,226] Trial 6 finished with value: 0.0 and parameters: {'hidden_units': 332, 'dropout1': 0.45000000000000007, 'dropout2': 0.5, 'dropout3': 0.5, 'dropout4': 0.30000000000000004, 'learning_rate': 0.005746911996252804, 'weight_decay': 0.039273659759334834}. Best is trial 5 with value: 0.5551601423487544.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
  weight_decay=trial.suggest_loguniform("weight_decay",1e-5,1)


Accuracy: 64.10%
Precision: 0.0000
Recall: 0.0000
F1 Score: 0.0000
Matthews Correlation Coefficient: 0.0000
Specificity: 1.0000


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2024-11-28 15:51:34,457] Trial 7 finished with value: 0.0 and parameters: {'hidden_units': 384, 'dropout1': 0.35, 'dropout2': 0.15000000000000002, 'dropout3': 0.4, 'dropout4': 0.1, 'learning_rate': 0.00042399159182197676, 'weight_decay': 0.6503547460745087}. Best is trial 5 with value: 0.5551601423487544.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
  weight_decay=trial.suggest_loguniform("weight_decay",1e-5,1)


Accuracy: 64.10%
Precision: 0.0000
Recall: 0.0000
F1 Score: 0.0000
Matthews Correlation Coefficient: 0.0000
Specificity: 1.0000


[I 2024-11-28 15:52:21,006] Trial 8 finished with value: 0.43776824034334766 and parameters: {'hidden_units': 336, 'dropout1': 0.4, 'dropout2': 0.35, 'dropout3': 0.1, 'dropout4': 0.35, 'learning_rate': 0.0003637474667616724, 'weight_decay': 0.0032550126194261985}. Best is trial 5 with value: 0.5551601423487544.


Accuracy: 62.68%
Precision: 0.4766
Recall: 0.4048
F1 Score: 0.4378
Matthews Correlation Coefficient: 0.1624
Specificity: 0.7511


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
  weight_decay=trial.suggest_loguniform("weight_decay",1e-5,1)
[I 2024-11-28 15:53:16,016] Trial 9 finished with value: 0.4827586206896552 and parameters: {'hidden_units': 428, 'dropout1': 0.4, 'dropout2': 0.30000000000000004, 'dropout3': 0.4, 'dropout4': 0.15000000000000002, 'learning_rate': 2.070858808902359e-05, 'weight_decay': 0.11652487227105361}. Best is trial 5 with value: 0.5551601423487544.


Accuracy: 61.54%
Precision: 0.4667
Recall: 0.5000
F1 Score: 0.4828
Matthews Correlation Coefficient: 0.1775
Specificity: 0.6800


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
  weight_decay=trial.suggest_loguniform("weight_decay",1e-5,1)
[I 2024-11-28 15:54:03,765] Trial 10 finished with value: 0.23783783783783785 and parameters: {'hidden_units': 72, 'dropout1': 0.5, 'dropout2': 0.5, 'dropout3': 0.5, 'dropout4': 0.5, 'learning_rate': 1.13245293484068e-05, 'weight_decay': 0.7302448341772837}. Best is trial 5 with value: 0.5551601423487544.


Accuracy: 59.83%
Precision: 0.3729
Recall: 0.1746
F1 Score: 0.2378
Matthews Correlation Coefficient: 0.0130
Specificity: 0.8356


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
  weight_decay=trial.suggest_loguniform("weight_decay",1e-5,1)
[I 2024-11-28 15:54:56,340] Trial 11 finished with value: 0.4672131147540984 and parameters: {'hidden_units': 184, 'dropout1': 0.2, 'dropout2': 0.1, 'dropout3': 0.35, 'dropout4': 0.2, 'learning_rate': 0.0028786786976553887, 'weight_decay': 0.00011220742921006105}. Best is trial 5 with value: 0.5551601423487544.


Accuracy: 62.96%
Precision: 0.4831
Recall: 0.4524
F1 Score: 0.4672
Matthews Correlation Coefficient: 0.1841
Specificity: 0.7289


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
  weight_decay=trial.suggest_loguniform("weight_decay",1e-5,1)
[I 2024-11-28 15:55:45,765] Trial 12 finished with value: 0.5800604229607251 and parameters: {'hidden_units': 136, 'dropout1': 0.1, 'dropout2': 0.4, 'dropout3': 0.5, 'dropout4': 0.5, 'learning_rate': 5.486720354776601e-05, 'weight_decay': 0.0005967216485498529}. Best is trial 12 with value: 0.5800604229607251.


Accuracy: 60.40%
Precision: 0.4683
Recall: 0.7619
F1 Score: 0.5801
Matthews Correlation Coefficient: 0.2700
Specificity: 0.5156


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
  weight_decay=trial.suggest_loguniform("weight_decay",1e-5,1)
[I 2024-11-28 15:56:35,760] Trial 13 finished with value: 0.6197916666666666 and parameters: {'hidden_units': 112, 'dropout1': 0.2, 'dropout2': 0.4, 'dropout3': 0.5, 'dropout4': 0.5, 'learning_rate': 3.804267172865449e-05, 'weight_decay': 0.000985386807750969}. Best is trial 13 with value: 0.6197916666666666.


Accuracy: 58.40%
Precision: 0.4612
Recall: 0.9444
F1 Score: 0.6198
Matthews Correlation Coefficient: 0.3551
Specificity: 0.3822


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
  weight_decay=trial.suggest_loguniform("weight_decay",1e-5,1)
[I 2024-11-28 15:57:24,004] Trial 14 finished with value: 0.1 and parameters: {'hidden_units': 12, 'dropout1': 0.2, 'dropout2': 0.4, 'dropout3': 0.45000000000000007, 'dropout4': 0.45000000000000007, 'learning_rate': 5.519614931744419e-05, 'weight_decay': 0.0006846663084335174}. Best is trial 13 with value: 0.6197916666666666.


Accuracy: 64.10%
Precision: 0.5000
Recall: 0.0556
F1 Score: 0.1000
Matthews Correlation Coefficient: 0.0599
Specificity: 0.9689


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
  weight_decay=trial.suggest_loguniform("weight_decay",1e-5,1)
[I 2024-11-28 15:58:10,193] Trial 15 finished with value: 0.5512820512820513 and parameters: {'hidden_units': 116, 'dropout1': 0.15000000000000002, 'dropout2': 0.4, 'dropout3': 0.30000000000000004, 'dropout4': 0.4, 'learning_rate': 1.146679535823098e-05, 'weight_decay': 0.0005243135861530116}. Best is trial 13 with value: 0.6197916666666666.


Accuracy: 60.11%
Precision: 0.4624
Recall: 0.6825
F1 Score: 0.5513
Matthews Correlation Coefficient: 0.2288
Specificity: 0.5556


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
  weight_decay=trial.suggest_loguniform("weight_decay",1e-5,1)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2024-11-28 15:59:01,245] Trial 16 finished with value: 0.0 and parameters: {'hidden_units': 124, 'dropout1': 0.25, 'dropout2': 0.35, 'dropout3': 0.45000000000000007, 'dropout4': 0.45000000000000007, 'learning_rate': 0.0020233712955168293, 'weight_decay': 0.007858671133832016}. Best is trial 13 with value: 0.6197916666666666.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
  weight_decay=trial.suggest_loguniform("weight_decay",1e-5,1)


Accuracy: 64.10%
Precision: 0.0000
Recall: 0.0000
F1 Score: 0.0000
Matthews Correlation Coefficient: 0.0000
Specificity: 1.0000


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2024-11-28 15:59:52,232] Trial 17 finished with value: 0.0 and parameters: {'hidden_units': 4, 'dropout1': 0.15000000000000002, 'dropout2': 0.45000000000000007, 'dropout3': 0.30000000000000004, 'dropout4': 0.25, 'learning_rate': 0.030291499602737852, 'weight_decay': 1.2056490881758823e-05}. Best is trial 13 with value: 0.6197916666666666.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
  weight_decay=trial.suggest_loguniform("weight_decay",1e-5,1)


Accuracy: 64.10%
Precision: 0.0000
Recall: 0.0000
F1 Score: 0.0000
Matthews Correlation Coefficient: 0.0000
Specificity: 1.0000


[I 2024-11-28 16:00:37,397] Trial 18 finished with value: 0.46494464944649444 and parameters: {'hidden_units': 240, 'dropout1': 0.15000000000000002, 'dropout2': 0.30000000000000004, 'dropout3': 0.45000000000000007, 'dropout4': 0.5, 'learning_rate': 0.00016739310022892217, 'weight_decay': 0.00011602248109245396}. Best is trial 13 with value: 0.6197916666666666.


Accuracy: 58.69%
Precision: 0.4345
Recall: 0.5000
F1 Score: 0.4649
Matthews Correlation Coefficient: 0.1321
Specificity: 0.6356


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
  weight_decay=trial.suggest_loguniform("weight_decay",1e-5,1)
[I 2024-11-28 16:01:26,875] Trial 19 finished with value: 0.601123595505618 and parameters: {'hidden_units': 80, 'dropout1': 0.25, 'dropout2': 0.35, 'dropout3': 0.35, 'dropout4': 0.4, 'learning_rate': 4.015291693172958e-05, 'weight_decay': 0.001562374461222686}. Best is trial 13 with value: 0.6197916666666666.


Accuracy: 59.54%
Precision: 0.4652
Recall: 0.8492
F1 Score: 0.6011
Matthews Correlation Coefficient: 0.3054
Specificity: 0.4533


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
  weight_decay=trial.suggest_loguniform("weight_decay",1e-5,1)
[I 2024-11-28 16:02:14,068] Trial 20 finished with value: 0.5294117647058824 and parameters: {'hidden_units': 56, 'dropout1': 0.30000000000000004, 'dropout2': 0.25, 'dropout3': 0.2, 'dropout4': 0.35, 'learning_rate': 0.0010813461577226593, 'weight_decay': 0.0017395492518652643}. Best is trial 13 with value: 0.6197916666666666.


Accuracy: 63.53%
Precision: 0.4932
Recall: 0.5714
F1 Score: 0.5294
Matthews Correlation Coefficient: 0.2361
Specificity: 0.6711


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
  weight_decay=trial.suggest_loguniform("weight_decay",1e-5,1)
[I 2024-11-28 16:03:02,677] Trial 21 finished with value: 0.6128133704735376 and parameters: {'hidden_units': 160, 'dropout1': 0.25, 'dropout2': 0.35, 'dropout3': 0.35, 'dropout4': 0.45000000000000007, 'learning_rate': 3.083360707439208e-05, 'weight_decay': 0.0012453365998061087}. Best is trial 13 with value: 0.6197916666666666.


Accuracy: 60.40%
Precision: 0.4721
Recall: 0.8730
F1 Score: 0.6128
Matthews Correlation Coefficient: 0.3314
Specificity: 0.4533


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
  weight_decay=trial.suggest_loguniform("weight_decay",1e-5,1)
[I 2024-11-28 16:03:51,403] Trial 22 finished with value: 0.6158038147138964 and parameters: {'hidden_units': 180, 'dropout1': 0.25, 'dropout2': 0.35, 'dropout3': 0.35, 'dropout4': 0.45000000000000007, 'learning_rate': 1.939035136571824e-05, 'weight_decay': 0.0015870273649294716}. Best is trial 13 with value: 0.6197916666666666.


Accuracy: 59.83%
Precision: 0.4689
Recall: 0.8968
F1 Score: 0.6158
Matthews Correlation Coefficient: 0.3391
Specificity: 0.4311


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
  weight_decay=trial.suggest_loguniform("weight_decay",1e-5,1)
[I 2024-11-28 16:04:37,666] Trial 23 finished with value: 0.5808580858085809 and parameters: {'hidden_units': 168, 'dropout1': 0.30000000000000004, 'dropout2': 0.35, 'dropout3': 0.35, 'dropout4': 0.45000000000000007, 'learning_rate': 2.6024384654790194e-05, 'weight_decay': 0.0002747500233983606}. Best is trial 13 with value: 0.6197916666666666.


Accuracy: 63.82%
Precision: 0.4972
Recall: 0.6984
F1 Score: 0.5809
Matthews Correlation Coefficient: 0.2906
Specificity: 0.6044


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
  weight_decay=trial.suggest_loguniform("weight_decay",1e-5,1)
[I 2024-11-28 16:05:26,948] Trial 24 finished with value: 0.6114285714285714 and parameters: {'hidden_units': 280, 'dropout1': 0.2, 'dropout2': 0.45000000000000007, 'dropout3': 0.25, 'dropout4': 0.45000000000000007, 'learning_rate': 1.053012043083658e-05, 'weight_decay': 0.010737650716522357}. Best is trial 13 with value: 0.6197916666666666.


Accuracy: 61.25%
Precision: 0.4777
Recall: 0.8492
F1 Score: 0.6114
Matthews Correlation Coefficient: 0.3286
Specificity: 0.4800


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
  weight_decay=trial.suggest_loguniform("weight_decay",1e-5,1)
[I 2024-11-28 16:06:15,889] Trial 25 finished with value: 0.5409252669039146 and parameters: {'hidden_units': 216, 'dropout1': 0.25, 'dropout2': 0.30000000000000004, 'dropout3': 0.25, 'dropout4': 0.35, 'learning_rate': 0.00010050283538223841, 'weight_decay': 0.0012858278365205835}. Best is trial 13 with value: 0.6197916666666666.


Accuracy: 63.25%
Precision: 0.4903
Recall: 0.6032
F1 Score: 0.5409
Matthews Correlation Coefficient: 0.2435
Specificity: 0.6489


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
  weight_decay=trial.suggest_loguniform("weight_decay",1e-5,1)
[I 2024-11-28 16:07:05,228] Trial 26 finished with value: 0.5478547854785478 and parameters: {'hidden_units': 276, 'dropout1': 0.2, 'dropout2': 0.35, 'dropout3': 0.35, 'dropout4': 0.4, 'learning_rate': 2.709750580391026e-05, 'weight_decay': 0.015210598719105966}. Best is trial 13 with value: 0.6197916666666666.


Accuracy: 60.97%
Precision: 0.4689
Recall: 0.6587
F1 Score: 0.5479
Matthews Correlation Coefficient: 0.2312
Specificity: 0.5822


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
  weight_decay=trial.suggest_loguniform("weight_decay",1e-5,1)
[I 2024-11-28 16:07:52,762] Trial 27 finished with value: 0.4496124031007752 and parameters: {'hidden_units': 160, 'dropout1': 0.30000000000000004, 'dropout2': 0.2, 'dropout3': 0.15000000000000002, 'dropout4': 0.30000000000000004, 'learning_rate': 0.0002097230019312758, 'weight_decay': 5.309205149417036e-05}. Best is trial 13 with value: 0.6197916666666666.


Accuracy: 59.54%
Precision: 0.4394
Recall: 0.4603
F1 Score: 0.4496
Matthews Correlation Coefficient: 0.1302
Specificity: 0.6711


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
  weight_decay=trial.suggest_loguniform("weight_decay",1e-5,1)
[I 2024-11-28 16:08:42,378] Trial 28 finished with value: 0.5905292479108635 and parameters: {'hidden_units': 96, 'dropout1': 0.25, 'dropout2': 0.45000000000000007, 'dropout3': 0.30000000000000004, 'dropout4': 0.45000000000000007, 'learning_rate': 1.7398937645714583e-05, 'weight_decay': 0.002194043424548526}. Best is trial 13 with value: 0.6197916666666666.


Accuracy: 58.12%
Precision: 0.4549
Recall: 0.8413
F1 Score: 0.5905
Matthews Correlation Coefficient: 0.2811
Specificity: 0.4356


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
  weight_decay=trial.suggest_loguniform("weight_decay",1e-5,1)
[I 2024-11-28 16:09:31,003] Trial 29 finished with value: 0.6108108108108108 and parameters: {'hidden_units': 48, 'dropout1': 0.35, 'dropout2': 0.25, 'dropout3': 0.45000000000000007, 'dropout4': 0.5, 'learning_rate': 7.056874967209555e-05, 'weight_decay': 0.00026393147796000704}. Best is trial 13 with value: 0.6197916666666666.


Accuracy: 58.97%
Precision: 0.4631
Recall: 0.8968
F1 Score: 0.6108
Matthews Correlation Coefficient: 0.3278
Specificity: 0.4178


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
  weight_decay=trial.suggest_loguniform("weight_decay",1e-5,1)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2024-11-28 16:10:19,667] Trial 30 finished with value: 0.0 and parameters: {'hidden_units': 148, 'dropout1': 0.15000000000000002, 'dropout2': 0.30000000000000004, 'dropout3': 0.35, 'dropout4': 0.25, 'learning_rate': 0.009367086576680358, 'weight_decay': 0.0034663242576113275}. Best is trial 13 with value: 0.6197916666666666.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
  weight_decay=trial.suggest_loguniform("weight_decay",1e-5,1)


Accuracy: 64.10%
Precision: 0.0000
Recall: 0.0000
F1 Score: 0.0000
Matthews Correlation Coefficient: 0.0000
Specificity: 1.0000


[I 2024-11-28 16:11:08,678] Trial 31 finished with value: 0.603448275862069 and parameters: {'hidden_units': 216, 'dropout1': 0.2, 'dropout2': 0.45000000000000007, 'dropout3': 0.25, 'dropout4': 0.45000000000000007, 'learning_rate': 1.4059899425255595e-05, 'weight_decay': 0.010311021436412642}. Best is trial 13 with value: 0.6197916666666666.


Accuracy: 60.68%
Precision: 0.4730
Recall: 0.8333
F1 Score: 0.6034
Matthews Correlation Coefficient: 0.3118
Specificity: 0.4800


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
  weight_decay=trial.suggest_loguniform("weight_decay",1e-5,1)
[I 2024-11-28 16:11:58,752] Trial 32 finished with value: 0.5818181818181818 and parameters: {'hidden_units': 288, 'dropout1': 0.2, 'dropout2': 0.4, 'dropout3': 0.25, 'dropout4': 0.45000000000000007, 'learning_rate': 2.278809506815795e-05, 'weight_decay': 0.000968069383300306}. Best is trial 13 with value: 0.6197916666666666.


Accuracy: 60.68%
Precision: 0.4706
Recall: 0.7619
F1 Score: 0.5818
Matthews Correlation Coefficient: 0.2741
Specificity: 0.5200


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
  weight_decay=trial.suggest_loguniform("weight_decay",1e-5,1)
[I 2024-11-28 16:12:47,719] Trial 33 finished with value: 0.5185185185185185 and parameters: {'hidden_units': 300, 'dropout1': 0.25, 'dropout2': 0.45000000000000007, 'dropout3': 0.2, 'dropout4': 0.4, 'learning_rate': 7.660243126859054e-05, 'weight_decay': 0.0033556931067154506}. Best is trial 13 with value: 0.6197916666666666.


Accuracy: 62.96%
Precision: 0.4861
Recall: 0.5556
F1 Score: 0.5185
Matthews Correlation Coefficient: 0.2211
Specificity: 0.6711


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
  weight_decay=trial.suggest_loguniform("weight_decay",1e-5,1)
[I 2024-11-28 16:13:36,169] Trial 34 finished with value: 0.6178010471204188 and parameters: {'hidden_units': 248, 'dropout1': 0.2, 'dropout2': 0.5, 'dropout3': 0.30000000000000004, 'dropout4': 0.5, 'learning_rate': 1.0602984547290335e-05, 'weight_decay': 0.018956665445899728}. Best is trial 13 with value: 0.6197916666666666.


Accuracy: 58.40%
Precision: 0.4609
Recall: 0.9365
F1 Score: 0.6178
Matthews Correlation Coefficient: 0.3489
Specificity: 0.3867


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
  weight_decay=trial.suggest_loguniform("weight_decay",1e-5,1)
[I 2024-11-28 16:14:23,055] Trial 35 finished with value: 0.4669260700389105 and parameters: {'hidden_units': 236, 'dropout1': 0.30000000000000004, 'dropout2': 0.5, 'dropout3': 0.4, 'dropout4': 0.5, 'learning_rate': 4.139986762303801e-05, 'weight_decay': 0.01904809641685722}. Best is trial 13 with value: 0.6197916666666666.


Accuracy: 60.97%
Precision: 0.4580
Recall: 0.4762
F1 Score: 0.4669
Matthews Correlation Coefficient: 0.1593
Specificity: 0.6844


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
  weight_decay=trial.suggest_loguniform("weight_decay",1e-5,1)
[I 2024-11-28 16:15:11,853] Trial 36 finished with value: 0.4684014869888476 and parameters: {'hidden_units': 204, 'dropout1': 0.25, 'dropout2': 0.35, 'dropout3': 0.35, 'dropout4': 0.5, 'learning_rate': 1.8015844710972207e-05, 'weight_decay': 0.1113826785971686}. Best is trial 13 with value: 0.6197916666666666.


Accuracy: 59.26%
Precision: 0.4406
Recall: 0.5000
F1 Score: 0.4684
Matthews Correlation Coefficient: 0.1410
Specificity: 0.6444


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
  weight_decay=trial.suggest_loguniform("weight_decay",1e-5,1)
[I 2024-11-28 16:15:57,554] Trial 37 finished with value: 0.5182481751824818 and parameters: {'hidden_units': 180, 'dropout1': 0.1, 'dropout2': 0.5, 'dropout3': 0.30000000000000004, 'dropout4': 0.4, 'learning_rate': 0.00011957528683176207, 'weight_decay': 0.005527921388912327}. Best is trial 13 with value: 0.6197916666666666.


Accuracy: 62.39%
Precision: 0.4797
Recall: 0.5635
F1 Score: 0.5182
Matthews Correlation Coefficient: 0.2149
Specificity: 0.6578


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
  weight_decay=trial.suggest_loguniform("weight_decay",1e-5,1)
[I 2024-11-28 16:16:44,314] Trial 38 finished with value: 0.4797047970479705 and parameters: {'hidden_units': 248, 'dropout1': 0.35, 'dropout2': 0.4, 'dropout3': 0.4, 'dropout4': 0.5, 'learning_rate': 0.0002466750849475302, 'weight_decay': 0.00033011706642646876}. Best is trial 13 with value: 0.6197916666666666.


Accuracy: 59.83%
Precision: 0.4483
Recall: 0.5159
F1 Score: 0.4797
Matthews Correlation Coefficient: 0.1562
Specificity: 0.6444


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
  weight_decay=trial.suggest_loguniform("weight_decay",1e-5,1)
[I 2024-11-28 16:17:29,701] Trial 39 finished with value: 0.5274725274725275 and parameters: {'hidden_units': 472, 'dropout1': 0.15000000000000002, 'dropout2': 0.2, 'dropout3': 0.2, 'dropout4': 0.45000000000000007, 'learning_rate': 3.730843972254069e-05, 'weight_decay': 2.69330728449221e-05}. Best is trial 13 with value: 0.6197916666666666.


Accuracy: 63.25%
Precision: 0.4898
Recall: 0.5714
F1 Score: 0.5275
Matthews Correlation Coefficient: 0.2315
Specificity: 0.6667


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
  weight_decay=trial.suggest_loguniform("weight_decay",1e-5,1)


In [None]:
best_params=study.best_params
model = DNN_3(input_dim=X_train_tensor.shape[1],
              hidden_units=best_params["hidden_units"],
              dropout1=best_params["dropout1"],
              dropout2=best_params["dropout2"],
              dropout3=best_params["dropout3"],
              dropout4=best_params["dropout4"])
# 最適化と訓練を実行
optimizer = optim.Adam(model.parameters(), lr=best_params["learning_rate"],weight_decay=best_params["weight_decay"])
criterion = nn.BCELoss()

num_epochs=100
train_losses = []
val_losses = []

model.train()
for epoch in range(num_epochs):
    epoch_train_loss = 0
    epoch_val_loss = 0

    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        outputs = model(X_batch).squeeze()
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        epoch_train_loss += loss.item()
    avg_train_loss = epoch_train_loss / len(train_loader)
    train_losses.append(avg_train_loss)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_train_loss:.4f}')

    if val_loader is not None:
        model.eval()  
        with torch.no_grad():
            for X_val, y_val in val_loader:
                X_val, y_val = X_val.to(device), y_val.to(device)
                val_outputs = model(X_val).squeeze()
                val_loss = criterion(val_outputs, y_val)
                epoch_val_loss += val_loss.item()

        avg_val_loss = epoch_val_loss / len(val_loader)
        val_losses.append(avg_val_loss)
        model.train()  

    if val_loader is not None:
        print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}")
    else:
        print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {avg_train_loss:.4f}")

plt.figure(figsize=(10, 6))
plt.plot(range(1, num_epochs + 1), train_losses, label='Train Loss')
if val_losses:
    plt.plot(range(1, num_epochs + 1), val_losses, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Learning Curve for DNN1')
plt.legend()
plt.grid()
plt.show()

In [None]:
model.eval()
with torch.no_grad():
    X_test_tensor = X_test_tensor.to(device)
    y_test_tensor = y_test_tensor.to(device)

    # 予測と確率
    test_outputs = model(X_test_tensor).squeeze()
    predictions = (test_outputs >= 0.5).float()
    y_true = y_test_tensor.cpu().numpy()
    y_pred = predictions.cpu().numpy()
    y_prob = test_outputs.cpu().numpy()

# 評価指標
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    mcc = matthews_corrcoef(y_true, y_pred)
    specificity = recall_score(y_true, y_pred, pos_label=0)  
    print(f'Accuracy: {accuracy * 100:.2f}%')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1 Score: {f1:.4f}')
    print(f'Matthews Correlation Coefficient: {mcc:.4f}')
    print(f'Specificity: {specificity:.4f}')

    # 混同行列（割合表示）
    cm = confusion_matrix(y_true, y_pred, normalize='true')
    sns.heatmap(cm, annot=True, fmt=".2%", cmap="Blues", cbar=False)
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.title(f"Confusion Matrix (Normalized)")
    plt.show()

    # ROC曲線とAUC
    fpr, tpr, thresholds = roc_curve(y_true, y_prob)
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, label=f'ROC curve (AUC = {roc_auc:.2f})')
    plt.plot([0, 1], [0, 1], 'k--') 
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(f'ROC Curve - Model ')
    plt.legend(loc="lower right")
    plt.show()

    # Precision-Recall曲線
    precision_curve, recall_curve, pr_thresholds = precision_recall_curve(y_true, y_prob)
    pr_auc = auc(recall_curve, precision_curve)
    plt.plot(recall_curve, precision_curve, label=f'PR curve (AUC = {pr_auc:.2f})')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title(f'Precision-Recall Curve ')
    plt.legend(loc="lower left")
    plt.show()    

In [None]:
# モデルと構造を保存
torch.save(model, '../../saved_model/DNN_3.pth')
