In [1]:
import sqlite3
import pandas as pd

# --------------------------------------------------------------
# 抽出対象
# --------------------------------------------------------------
user_code = "D250"
thickness = 30.0
width = 495


# --------------------------------------------------------------
# データベース接続＆抽出
# --------------------------------------------------------------
#dbname = "F:\\金属箔事業部\\秘L\\旧箔\\箔\\品質保証Gr\\QC150_品質データ\\010_オフライン形状測定装置データ_工場\\新形状測定装置\\★SQLite取り込みプログラム\\DB\\keijyo_new.db"
dbname = "./DB/keijyo_new.db"

# CASE WHEN THEN: 文字列の置換
# MAX : GROUP BY条件のなかで、m_timeの最大値を抽出

with sqlite3.connect(dbname) as conn1:
    sql = f'''
            SELECT
                * ,
                MAX(m_time),
                CASE 
                    WHEN position = 'f' THEN 'F'
                    WHEN position = 'Ｆ' THEN 'F'
                    WHEN position = 'b' THEN 'B'
                    WHEN position = 'Ｂ' THEN 'B'
                    ELSE position
                END AS new_position
            FROM tbl_01
            WHERE
                (customer = "{user_code}")
                AND (thickness = "{thickness}")
                AND (width = "{width}")
                AND (treat_code = "4" OR treat_code = "9")
            GROUP BY
                coil_num,
                coildiv_num,
                ope_code,
                treat_code,
                CASE 
                    WHEN position = 'f' THEN 'F'
                    WHEN position = 'Ｆ' THEN 'F'
                    WHEN position = 'b' THEN 'B'
                    WHEN position = 'Ｂ' THEN 'B'
                    ELSE position
                END
            ORDER BY m_date ASC
        '''
    df = pd.read_sql(sql, con=conn1)


# --------------------------------------------------------------
# DF作成
# --------------------------------------------------------------

# コイル番号
df["new_coil"] = df["coil_num"] + "-"+ df["coildiv_num"]

# 処理コードと位置の新規列　["9F","9B","4F","4B"] = ["FCR_F","FCR_B","TA_F","TA_B"]
df["process_position"] = df["treat_code"] + df["new_position"]

# 使用する列の指定
new_col = ["m_date", "new_coil", "process_position"] + [f"i{i}" for i in range(1, 21)]
df = df[new_col]

# CSV出力
#df.to_csv("data.csv", encoding='shift-jis')

df.head(3)

Unnamed: 0,m_date,new_coil,process_position,i1,i2,i3,i4,i5,i6,i7,...,i11,i12,i13,i14,i15,i16,i17,i18,i19,i20
0,2023-04-15 00:00:00,49134-11,9B,18.136928,3.790268,0.0,0.558058,0.992577,2.033761,2.587749,...,0.333904,0.013008,0.307649,0.652011,0.720563,1.595945,2.631035,4.617211,10.478505,17.890834
1,2023-04-15 00:00:00,49134-11,9F,14.397729,3.015333,0.0,0.479469,0.775821,1.638432,2.051087,...,0.772083,0.248304,0.395933,0.899396,0.736544,0.840219,1.414581,3.619171,8.066631,13.722709
2,2023-04-15 00:00:00,49134-12,9B,12.868052,4.347836,0.0,0.476016,1.263866,1.913309,2.823925,...,0.959822,0.789794,1.568435,1.387556,0.618093,0.651241,1.425827,2.963362,6.9217,10.121797


In [2]:
df_MF = df[df["process_position"] == "9F"]    # FCR F
df_MB = df[df["process_position"] == "9B"]    # FCR B
df_TF = df[df["process_position"] == "4F"]    # TA F
df_TB = df[df["process_position"] == "4B"]    # TA B

df_MF.columns = ["date_FCR_F", "new_coil", "pp_FCR_F"] + [f"FCR_F_{i}" for i in range(1, 21)]
df_MB.columns = ["date_FCR_B", "new_coil", "pp_FCR_B"] + [f"FCR_B_{i}" for i in range(1, 21)]
df_TF.columns = ["date_TA_F", "new_coil", "pp_TA_F"] + [f"TA_F_{i}" for i in range(1, 21)]
df_TB.columns = ["date_TA_B", "new_coil", "pp_TA_B"] + [f"TA_B_{i}" for i in range(1, 21)]

# "new_coil"をキーに各DFを結合
df2 = pd.merge(df_MF, df_MB, on="new_coil", how='left')
df2 = pd.merge(df2, df_TF, on="new_coil", how='left')
df2 = pd.merge(df2, df_TB, on="new_coil", how='left')

df2.to_csv("data2.csv", encoding='shift-jis')

df2.head(3)

Unnamed: 0,date_FCR_F,new_coil,pp_FCR_F,FCR_F_1,FCR_F_2,FCR_F_3,FCR_F_4,FCR_F_5,FCR_F_6,FCR_F_7,...,TA_B_11,TA_B_12,TA_B_13,TA_B_14,TA_B_15,TA_B_16,TA_B_17,TA_B_18,TA_B_19,TA_B_20
0,2023-04-15 00:00:00,49134-11,9F,14.397729,3.015333,0.0,0.479469,0.775821,1.638432,2.051087,...,2.006524,0.570906,0.501261,0.575592,0.10884,0.0,1.169751,2.753668,7.081118,13.974039
1,2023-04-15 00:00:00,49134-12,9F,15.557247,4.253771,0.108134,0.819745,1.503837,1.98719,2.42042,...,1.252476,0.168589,0.007054,0.076176,0.0,0.433399,1.830434,2.864033,6.868363,11.273258
2,2023-05-07 00:00:00,49135-1,9F,14.571781,5.226943,1.243542,1.725209,0.962206,0.626832,0.069162,...,2.305909,1.347314,0.940886,0.421182,0.235367,0.169316,1.139534,2.968099,7.583181,15.721907


In [3]:
df3 = df2.dropna(subset=["TA_B_1"])
df3.head(3)

Unnamed: 0,date_FCR_F,new_coil,pp_FCR_F,FCR_F_1,FCR_F_2,FCR_F_3,FCR_F_4,FCR_F_5,FCR_F_6,FCR_F_7,...,TA_B_11,TA_B_12,TA_B_13,TA_B_14,TA_B_15,TA_B_16,TA_B_17,TA_B_18,TA_B_19,TA_B_20
0,2023-04-15 00:00:00,49134-11,9F,14.397729,3.015333,0.0,0.479469,0.775821,1.638432,2.051087,...,2.006524,0.570906,0.501261,0.575592,0.10884,0.0,1.169751,2.753668,7.081118,13.974039
1,2023-04-15 00:00:00,49134-12,9F,15.557247,4.253771,0.108134,0.819745,1.503837,1.98719,2.42042,...,1.252476,0.168589,0.007054,0.076176,0.0,0.433399,1.830434,2.864033,6.868363,11.273258
2,2023-05-07 00:00:00,49135-1,9F,14.571781,5.226943,1.243542,1.725209,0.962206,0.626832,0.069162,...,2.305909,1.347314,0.940886,0.421182,0.235367,0.169316,1.139534,2.968099,7.583181,15.721907


In [4]:

# モデル作成＆テスト用データ（検証用データ除去）
df4 = df3.drop(df3[df3["new_coil"]=="53165-2"].index)

# 検証用データ
df4v = df3[df3["new_coil"]=="53165-2"]


In [5]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error


# 特徴量とターゲットの設定
X = df4[[f"FCR_F_{i}" for i in range(1, 21)]]   # プロセス前のデータ
y = df4[[f"TA_B_{i}" for i in range(1, 21)]]    # プロセス後のデータ

# データの分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# モデルの選択とトレーニング
model = RandomForestRegressor()
model.fit(X_train, y_train)

# モデルの評価
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')


Mean Squared Error: 1.2778746128041631


In [6]:
# 検証データをDF化
df4v_F1 = df4v.loc[93, "FCR_F_1":"FCR_F_20"]
df4vd_F1 = pd.DataFrame([df4v_F1])

# 予測結果
predicted = model.predict(df4vd_F1)
#print(f'Predicted: {predicted}')
df_predicted = pd.DataFrame(predicted)
df_predicted.to_csv("data3.csv")
df_predicted

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,14.396008,3.132767,0.183274,0.282204,0.405217,0.997327,1.428457,2.249161,1.934908,2.253233,1.689226,0.886606,0.653594,0.267791,0.291038,0.57288,1.917848,3.874045,7.861929,13.510428
