In [4]:
import gradio as gr
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error, r2_score
from bayes_opt import BayesianOptimization

def process_and_predict(file2, file3, sheet_name):
    # Excel読み込み・整形（あなたの処理に準拠）
    def clean_data(data, drop_cols, drop_rows):
        data = data.T
        data.columns = data.iloc[1]
        data = data.drop(data.index[[0, 1]])
        data = data.drop(data.columns[drop_cols], axis=1)
        data = data.drop(data.index[drop_rows])
        data = data.fillna(0)
        data = data[data["件数"] != 0]
        return data

    df2 = pd.read_excel(file2.name, sheet_name=sheet_name)
    df3 = pd.read_excel(file3.name, sheet_name=sheet_name)
    
    df2 = clean_data(df2, drop_cols=[0,1,9,10], drop_rows=[28,29,30,31])
    df3 = clean_data(df3, drop_cols=[0,1,2,10,11], drop_rows=[31])

    # 対数変換
    df2 = df2.apply(lambda x: np.log(x + 1))
    df3 = df3.apply(lambda x: np.log(x + 1))

    # カラム順合わせ
    df2 = df2[df3.columns]

    # 学習・予測用データ
    X_train = df2.drop(columns='件数')
    y_train = df2['件数']
    X_test = df3.drop(columns='件数')
    y_test = df3['件数']

    # ベイズ最適化
    def optimize_model(n_estimators, learning_rate, max_depth, max_features, min_samples_split, subsample):
        model = GradientBoostingRegressor(
            n_estimators=int(n_estimators),
            learning_rate=learning_rate,
            max_depth=int(max_depth),
            max_features=max_features,
            min_samples_split=int(min_samples_split),
            subsample=subsample,
            random_state=42
        )
        return cross_val_score(model, X_train, y_train, cv=5, scoring='r2').mean()

    pbounds = {
        'n_estimators': (100, 1000),
        'learning_rate': (0.005, 0.3),
        'max_depth': (3, 15),
        'min_samples_split': (2, 20),
        'subsample': (0.5, 1.0),
        'max_features': (0.5, 1.0)
    }

    optimizer = BayesianOptimization(f=optimize_model, pbounds=pbounds, random_state=42, verbose=0)
    optimizer.maximize(init_points=5, n_iter=20)
    best_params = optimizer.max['params']
    best_params['n_estimators'] = int(best_params['n_estimators'])
    best_params['max_depth'] = int(best_params['max_depth'])
    best_params['min_samples_split'] = int(best_params['min_samples_split'])

    model = GradientBoostingRegressor(**best_params, random_state=42)
    model.fit(X_train, y_train)

    # 予測と元スケールへの逆変換
    y_pred = model.predict(X_test)
    y_pred_original = np.exp(y_pred)
    y_test_original = np.exp(y_test)

    result_df = pd.DataFrame({
        "日付": df3.index,
        "実測件数": y_test_original.values,
        "予測件数": y_pred_original,
        "差分": y_pred_original - y_test_original.values
    })

    r2 = r2_score(y_test_original, y_pred_original)
    summary = f"R²スコア: {r2:.3f}（1に近いほど良好）"

    return summary, result_df

# Gradio UI
gr.Interface(
    fn=process_and_predict,
    inputs=[
        gr.File(label="2月のExcelファイル（件数あり）"),
        gr.File(label="3月のExcelファイル（予測対象）"),
        gr.Textbox(label="シート名", value="実績昼")
    ],
    outputs=[
        gr.Text(label="予測モデルの性能"),
        gr.Dataframe(label="各日の予測 vs 実測（件数）")
    ],
    title="工場作業件数の予測ツール（Gradio版）",
    description="2月の実績をもとに、3月の作業件数を予測します。Excelファイルをアップしてください。"
).launch()


* Running on local URL:  http://127.0.0.1:7863
* To create a public link, set `share=True` in `launch()`.




  data = data.fillna(0)
  data = data.fillna(0)
