<a href="https://colab.research.google.com/github/TomoharuKurosu/Tomoharu_DS2/blob/main/optuna%E3%81%A7%E9%96%BE%E5%80%A4%E3%82%92%E6%9C%80%E9%81%A9%E5%8C%96.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install optuna
import optuna
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# データセットの生成
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# モデルの評価関数
def evaluate_model(threshold):
    # ロジスティック回帰モデルの訓練
    model = LogisticRegression()
    model.fit(X_train, y_train)

    # モデルの予測確率を取得
    y_prob = model.predict_proba(X_test)[:, 1]

    # 閾値を用いて予測ラベルを決定
    y_pred = (y_prob > threshold).astype(int)

    # 精度を計算
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy

# 目的関数の定義
def objective(trial):
    threshold = trial.suggest_float('threshold', 0.0, 1.0)
    accuracy = evaluate_model(threshold)
    return accuracy

# OptunaのStudyを作成して最適化を実行
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

# 最適な閾値を取得
best_threshold = study.best_params['threshold']
print(f"Best threshold: {best_threshold}")

# 最適な閾値でのモデル精度を表示
best_accuracy = study.best_value
print(f"Best accuracy with threshold {best_threshold}: {best_accuracy}")


Collecting optuna
  Downloading optuna-4.0.0-py3-none-any.whl.metadata (16 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.13.2-py3-none-any.whl.metadata (7.4 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.8.2-py3-none-any.whl.metadata (10 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.5-py3-none-any.whl.metadata (2.9 kB)
Downloading optuna-4.0.0-py3-none-any.whl (362 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m362.8/362.8 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.13.2-py3-none-any.whl (232 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.0/233.0 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.8.2-py3-none-any.whl (11 kB)
Downloading Mako-1.3.5-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: Mak

[I 2024-09-03 04:39:15,085] A new study created in memory with name: no-name-6666a17a-c710-492d-b1fe-4802482d3138
[I 2024-09-03 04:39:15,131] Trial 0 finished with value: 0.7333333333333333 and parameters: {'threshold': 0.1024218658579642}. Best is trial 0 with value: 0.7333333333333333.
[I 2024-09-03 04:39:15,177] Trial 1 finished with value: 0.77 and parameters: {'threshold': 0.8428125505158924}. Best is trial 1 with value: 0.77.
[I 2024-09-03 04:39:15,250] Trial 2 finished with value: 0.7766666666666666 and parameters: {'threshold': 0.14689320496440983}. Best is trial 2 with value: 0.7766666666666666.
[I 2024-09-03 04:39:15,293] Trial 3 finished with value: 0.81 and parameters: {'threshold': 0.6821932369159133}. Best is trial 3 with value: 0.81.
[I 2024-09-03 04:39:15,336] Trial 4 finished with value: 0.73 and parameters: {'threshold': 0.09874834967000456}. Best is trial 3 with value: 0.81.
[I 2024-09-03 04:39:15,411] Trial 5 finished with value: 0.7966666666666666 and parameters: {

Best threshold: 0.4285939467042803
Best accuracy with threshold 0.4285939467042803: 0.87
