In [1]:
import pandas as pd
import numpy as np
import optuna
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.preprocessing import OneHotEncoder
from optuna.visualization import plot_optimization_history, plot_contour

In [5]:

# データの読み込み
data = pd.read_excel('data.xlsx', header = 1)

# 説明変数と目的変数の分離
X = data[['Concentration', 'time', 'tea']]

# tea変数のOneHotエンコード（BT, GT, GC）でBCは[0,0,0]に設定
def custom_onehot_encoding(df):
    onehot = pd.get_dummies(df['tea'])
    for col in ['BT', 'GT', 'GC']:
        if col not in onehot.columns:
            onehot[col] = 0
    onehot = onehot[['BT', 'GT', 'GC']]
    return pd.concat([df[['Concentration', 'time']], onehot], axis=1)

X_encoded = custom_onehot_encoding(X)
mineral = ['Mn']  # ここに必要なミネラルを追加

In [7]:


print(f"\n### Mineral: {mineral} ###")

# 目的変数を選択
y = data[mineral]

# データの分割
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

# MLRモデルの構築と評価
mlr_model = LinearRegression()
mlr_model.fit(X_train, y_train)
y_pred_mlr = mlr_model.predict(X_test)

# 回帰係数の出力
intercept = mlr_model.intercept_
coefficients = mlr_model.coef_

# 回帰式の表示
#feature_names = X_encoded.columns
#equation = f"{mineral} = {intercept:.4f} + " + " + ".join([f"({coef:.4f} * {name})" for coef, name in zip(coefficients, feature_names)])
#print("Regression Equation (MLR):")
#print(equation)

# 評価指標の計算（MLR）
r2_mlr = r2_score(y_test, y_pred_mlr)
rmse_mlr = np.sqrt(mean_squared_error(y_test, y_pred_mlr))
mae_mlr = mean_absolute_error(y_test, y_pred_mlr)

print("\nMLR Model Performance:")
print(f"R2: {r2_mlr:.4f}")
print(f"RMSE: {rmse_mlr:.4f}")
print(f"MAE: {mae_mlr:.4f}")



### Mineral: ['Mn'] ###

MLR Model Performance:
R2: 0.7353
RMSE: 1.4190
MAE: 1.1234


In [8]:

# 最適化対象関数の定義
def objective(trial):
    concentration = trial.suggest_float('Concentration', 0.0, 10.0)  # 濃度の範囲を設定
    time = trial.suggest_float('time', 2.0, 60.0)  # 時間の範囲を設定
    tea = trial.suggest_categorical('tea', ['BT', 'GT', 'GC'])
    
    # 最適化のためのデータフレーム作成
    X_opt = pd.DataFrame({
        'Concentration': [concentration],
        'time': [time],
        'BT': [1 if tea == 'BT' else 0],
        'GT': [1 if tea == 'GT' else 0],
        'GC': [1 if tea == 'GC' else 0]
    })

    # Mnの計算
    predicted_mn = intercept + np.dot(coefficients, X_opt.iloc[0])
    
    # Mnレベルを5.3で安定させるための最小化
    return abs(predicted_mn - 5.3)


In [9]:
# Optunaによる最適化
study = optuna.create_study(direction='minimize', sampler=optuna.samplers.TPESampler())
study.optimize(objective, n_trials=50)


[I 2024-08-09 21:40:48,401] A new study created in memory with name: no-name-04b300b8-598f-45af-ac0a-d747e313c343
[I 2024-08-09 21:40:48,403] Trial 0 finished with value: 21.205081089286537 and parameters: {'Concentration': 9.12610734073615, 'time': 33.62383495839843, 'tea': 'GT'}. Best is trial 0 with value: 21.205081089286537.
[I 2024-08-09 21:40:48,408] Trial 1 finished with value: 18.11428103730142 and parameters: {'Concentration': 9.069480649592917, 'time': 12.198949022835432, 'tea': 'GC'}. Best is trial 1 with value: 18.11428103730142.
[I 2024-08-09 21:40:48,409] Trial 2 finished with value: 1.5507758054780716 and parameters: {'Concentration': 0.27362009654947284, 'time': 43.653318284704326, 'tea': 'GT'}. Best is trial 2 with value: 1.5507758054780716.
[I 2024-08-09 21:40:48,411] Trial 3 finished with value: 0.3786165476218901 and parameters: {'Concentration': 1.625423102879967, 'time': 21.47637250632795, 'tea': 'BT'}. Best is trial 3 with value: 0.3786165476218901.
[I 2024-08-09

In [19]:
# 最適化履歴のプロット
plot_optimization_history(study)

In [15]:

# Optunaの等線図プロット
plot_contour(study, params=['Concentration', 'time'])

In [20]:

optuna.visualization.plot_param_importances(study)

In [21]:

optuna.visualization.plot_parallel_coordinate(study)


In [22]:
optuna.visualization.plot_slice(study)