<a href="https://colab.research.google.com/github/TomoharuKurosu/Tomoharu_DS2/blob/main/optuna%E3%81%A7%E9%96%BE%E5%80%A4%E3%82%92%E6%9C%80%E9%81%A9%E5%8C%96.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!pip install optuna
import optuna
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# データセットの生成
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# モデルの評価関数
def evaluate_model(threshold):
    # ロジスティック回帰モデルの訓練
    model = LogisticRegression()
    model.fit(X_train, y_train)

    # モデルの予測確率を取得
    y_prob = model.predict_proba(X_test)[:, 1]

    # 閾値を用いて予測ラベルを決定
    y_pred = (y_prob > threshold).astype(int)

    # 精度を計算
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy

# 目的関数の定義
def objective(trial):
    threshold = trial.suggest_float('threshold', 0.0, 1.0)
    accuracy = evaluate_model(threshold)
    return accuracy

# OptunaのStudyを作成して最適化を実行
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

# 最適な閾値を取得
best_threshold = study.best_params['threshold']
print(f"Best threshold: {best_threshold}")

# 最適な閾値でのモデル精度を表示
best_accuracy = study.best_value
print(f"Best accuracy with threshold {best_threshold}: {best_accuracy}")


Collecting optuna
  Downloading optuna-4.0.0-py3-none-any.whl.metadata (16 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.13.2-py3-none-any.whl.metadata (7.4 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.8.2-py3-none-any.whl.metadata (10 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.5-py3-none-any.whl.metadata (2.9 kB)
Downloading optuna-4.0.0-py3-none-any.whl (362 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m362.8/362.8 kB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.13.2-py3-none-any.whl (232 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.0/233.0 kB[0m [31m16.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.8.2-py3-none-any.whl (11 kB)
Downloading Mako-1.3.5-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: Ma

[I 2024-09-03 04:14:02,952] A new study created in memory with name: no-name-a3808108-06f3-4c4a-8086-f43550a5c968
[I 2024-09-03 04:14:02,988] Trial 0 finished with value: 0.8533333333333334 and parameters: {'threshold': 0.4864270517740865}. Best is trial 0 with value: 0.8533333333333334.
[I 2024-09-03 04:14:03,021] Trial 1 finished with value: 0.8 and parameters: {'threshold': 0.19232350866864267}. Best is trial 0 with value: 0.8533333333333334.
[I 2024-09-03 04:14:03,042] Trial 2 finished with value: 0.8533333333333334 and parameters: {'threshold': 0.35534830963641273}. Best is trial 0 with value: 0.8533333333333334.
[I 2024-09-03 04:14:03,068] Trial 3 finished with value: 0.7566666666666667 and parameters: {'threshold': 0.8726606996708326}. Best is trial 0 with value: 0.8533333333333334.
[I 2024-09-03 04:14:03,089] Trial 4 finished with value: 0.82 and parameters: {'threshold': 0.6199632160060083}. Best is trial 0 with value: 0.8533333333333334.
[I 2024-09-03 04:14:03,112] Trial 5 fi

Best threshold: 0.42618836960894224
Best accuracy with threshold 0.42618836960894224: 0.87
