## 2. 機械学習

#### 「1.データ抽出」で出力された data.csvから、予測したいデータを除いて、data1.csvを作り、そのデータを読み込んで学習させる。

In [1]:
import pandas as pd

df = pd.read_csv("data1.csv", encoding='shift-jis')
df = df.dropna(subset=["TA_B_1"])
df.sample(3)

Unnamed: 0.1,Unnamed: 0,date_FCR_F,new_coil,pp_FCR_F,FCR_F_1,FCR_F_2,FCR_F_3,FCR_F_4,FCR_F_5,FCR_F_6,...,炉温7,炉温8,炉温9,炉温10,炉温11,炉温12,炉温13,炉温14,炉温15,炉温16
115,115,2024/9/2 0:00,53474-2,9F,16.245242,6.009391,0.772782,0.0,0.141927,0.68153,...,649.0,656.0,514.0,403.0,300.0,301.0,212.0,203.0,103.0,30.0
1,1,2023/4/15 0:00,49134-12,9F,15.557247,4.253771,0.108134,0.819745,1.503837,1.98719,...,644.0,654.0,508.0,401.0,292.0,302.0,213.0,201.0,103.0,20.0
152,155,2024/12/1 0:00,53909-1,9F,14.166507,3.085537,0.134725,0.461953,0.104593,1.170969,...,650.0,655.0,509.0,403.0,297.0,301.0,212.0,200.0,79.0,12.0


In [2]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error


# 特徴量とターゲットの設定
X = df[[f"FCR_F_{i}" for i in range(1, 21)] + ["熱処理速度"] + [f"炉温{i}" for i in range(1, 17)]]   # プロセス前のデータ
y = df[[f"TA_B_{i}" for i in range(1, 21)]]                   # プロセス後のデータ


# 学習関数
def learn(x, t, depth=3):
    # データの分割
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
    
    model = RandomForestRegressor(n_estimators=100, random_state=0, max_depth = depth)
    model.fit(X_train, y_train)
    
    # モデル評価：MSE(Mean Squared Error)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    
    return round(mse, 3), model


# ハイパーパラメータ最適化（MSEの最小値の探索）
min_mse = float('inf')
best_depth = None

for j in range(1, 15):
    mse, model = learn(X, y, depth = j)
    print(f"depth:{j}, MSE:{mse}")

    if mse < min_mse:
        min_mse = mse
        best_depth = j

print(f"The depth with the lowest MSE is: {best_depth}")

depth:1, MSE:1.779
depth:2, MSE:1.713
depth:3, MSE:1.713
depth:4, MSE:1.689
depth:5, MSE:1.672
depth:6, MSE:1.695
depth:7, MSE:1.671
depth:8, MSE:1.67
depth:9, MSE:1.652
depth:10, MSE:1.669
depth:11, MSE:1.661
depth:12, MSE:1.657
depth:13, MSE:1.658
depth:14, MSE:1.658
The depth with the lowest MSE is: 9


In [3]:
# データの分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# モデル適用（max_depthは上記の最小値を代入）
model = RandomForestRegressor(n_estimators=100, random_state=0, max_depth = best_depth)
model.fit(X_train, y_train)


In [4]:
# モデルの保存
import pickle

with open('RF03.pkl', 'wb') as f:
    pickle.dump(model, f)