In [None]:
import os
import joblib
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import gradio as gr

DATA_PATH = "/content/cleaned_dataset.xlsx"   # change if needed
MODEL_QUALITY_PATH = "model_quality.pkl"
MODEL_POROSITY_PATH = "model_porosity.pkl"
SCALER_PATH = "scaler.pkl"

RANDOM_STATE = 42

def load_data(path):
    if not os.path.exists(path):
        raise FileNotFoundError(f"Dataset not found at: {path}")
    df = pd.read_excel(path)
    required = {"Build_Quality", "Porosity"}
    if not required.issubset(df.columns):
        raise ValueError(f"Dataset must contain columns: {required}. Found: {list(df.columns)}")
    X = df.drop(columns=["Build_Quality", "Porosity"])
    y_quality = df["Build_Quality"]
    y_porosity = df["Porosity"]
    return X, y_quality, y_porosity, df

def train_or_load_models(X, yq, yp):
    # If model files exist, load them
    if os.path.exists(MODEL_QUALITY_PATH) and os.path.exists(MODEL_POROSITY_PATH) and os.path.exists(SCALER_PATH):
        print("Loading saved models and scaler...")
        model_q = joblib.load(MODEL_QUALITY_PATH)
        model_p = joblib.load(MODEL_POROSITY_PATH)
        scaler = joblib.load(SCALER_PATH)
        # We won't recompute performance here (unless you want to)
        performance = None
        return model_q, model_p, scaler, performance

    # Else, train fresh models and save them.
    print("Training models (no saved models found)...")
    X_train, X_test, yq_train, yq_test, yp_train, yp_test = train_test_split(
        X, yq, yp, test_size=0.2, random_state=RANDOM_STATE, stratify=yq if len(np.unique(yq))>1 else None
    )
    scaler = StandardScaler()
    X_train_s = scaler.fit_transform(X_train)
    X_test_s = scaler.transform(X_test)

    model_q = RandomForestClassifier(random_state=RANDOM_STATE, n_jobs=-1)
    model_q.fit(X_train_s, yq_train)

    model_p = RandomForestClassifier(random_state=RANDOM_STATE, n_jobs=-1)
    model_p.fit(X_train_s, yp_train)

    # Evaluate
    yq_pred = model_q.predict(X_test_s)
    yp_pred = model_p.predict(X_test_s)

    acc_q = accuracy_score(yq_test, yq_pred)
    acc_p = accuracy_score(yp_test, yp_pred)
    cr_q = classification_report(yq_test, yq_pred, zero_division=0)
    cr_p = classification_report(yp_test, yp_pred, zero_division=0)

    performance = {
        "acc_quality": acc_q,
        "acc_porosity": acc_p,
        "report_quality": cr_q,
        "report_porosity": cr_p
    }

    # Save for reuse
    joblib.dump(model_q, MODEL_QUALITY_PATH)
    joblib.dump(model_p, MODEL_POROSITY_PATH)
    joblib.dump(scaler, SCALER_PATH)
    print("Models and scaler saved.")

    return model_q, model_p, scaler, performance

# ---------- Prepare data & models ----------
X, y_quality, y_porosity, df_full = load_data(DATA_PATH)

# compute input ranges for UI
input_ranges = {}
for col in X.columns:
    col_min = float(X[col].min())
    col_max = float(X[col].max())
    input_ranges[col] = (col_min, col_max)

model_quality, model_porosity, scaler, performance = train_or_load_models(X, y_quality, y_porosity)

# build a human-readable performance summary string (if available)
if performance:
    perf_text = (
        f"**Model Performance (test set)**\n\n"
        f"- Build Quality Accuracy: {performance['acc_quality']:.4f}\n"
        f"- Porosity Accuracy: {performance['acc_porosity']:.4f}\n\n"
        f"---\n\n"
        f"**Build Quality classification report**\n\n```\n{performance['report_quality']}\n```\n"
        f"**Porosity classification report**\n\n```\n{performance['report_porosity']}\n```\n"
        f"---\n\n"
        f"_Models saved to {MODEL_QUALITY_PATH}, {MODEL_POROSITY_PATH}, scaler saved to {SCALER_PATH}_"
    )
else:
    perf_text = (
        "_Loaded pre-saved models. If you want fresh training, delete the model .pkl files and rerun._"
    )

# ---------- Prediction function ----------
def predict_properties(*features):
    # features come in same order as X.columns
    arr = np.array([features], dtype=float)
    try:
        arr_s = scaler.transform(arr)
    except Exception as e:
        return ("Error: scaler.transform failed. Check inputs.", str(e), {}, {}, perf_text)

    # predictions
    pred_q = model_quality.predict(arr_s)[0]
    pred_p = model_porosity.predict(arr_s)[0]

    # probabilities (as dicts for gr.Label)
    try:
        probs_q = model_quality.predict_proba(arr_s)[0]
        classes_q = model_quality.classes_
        probs_q_dict = {str(cl): float(np.round(p, 4)) for cl, p in zip(classes_q, probs_q)}
    except Exception:
        probs_q_dict = {"error": 0.0}

    try:
        probs_p = model_porosity.predict_proba(arr_s)[0]
        classes_p = model_porosity.classes_
        probs_p_dict = {str(cl): float(np.round(p, 4)) for cl, p in zip(classes_p, probs_p)}
    except Exception:
        probs_p_dict = {"error": 0.0}

    text_q = f"✅ Predicted Build Quality: {pred_q}"
    text_p = f"✅ Predicted Porosity: {pred_p}"

    return text_q, text_p, probs_q_dict, probs_p_dict, perf_text

# ---------- Build Gradio UI ----------
# create inputs dynamically (sliders)
inputs = []
for col in X.columns:
    min_val, max_val = input_ranges[col]
    # handle degenerate ranges
    if min_val == max_val:
        # fallback: use a Number input with default value
        inputs.append(gr.Number(value=min_val, label=f"{col} (constant value: {min_val})"))
    else:
        step = (max_val - min_val) / 100.0
        # If step is extremely small, set a reasonable minimum step
        if step == 0:
            step = 1.0
        inputs.append(
            gr.Slider(minimum=min_val, maximum=max_val, step=step, label=f"{col} (range: {min_val:.4f} → {max_val:.4f})")
        )

# outputs: predicted texts, probability labels, and a markdown performance summary
outputs = [
    gr.Textbox(label="Build Quality Prediction"),
    gr.Textbox(label="Porosity Prediction"),
    gr.Label(label="Build Quality Probabilities"),
    gr.Label(label="Porosity Probabilities"),
    gr.Markdown(label="Model Performance"),
]

title = "3D Printing — Build Quality & Porosity Prediction"
description = (
    "Enter printing parameters (use sliders) within dataset ranges to predict Build Quality and Porosity.\n\n"
    "- Predictions come with class probabilities.\n"
    "- Models are Random Forests; scaler applied automatically.\n"
    "- To force retraining, delete the model files and re-run this script."
)

# Use Blocks to layout nicely
with gr.Blocks(title=title) as demo:
    gr.Markdown(f"# {title}")
    gr.Markdown(description)

    with gr.Row():
        with gr.Column(scale=2):
            gr.Markdown("### Input Parameters")
            input_components = [gr.update() for _ in inputs]  # placeholder list; we'll actually use `inputs` directly below
            # place inputs
            input_elems = []
            for comp in inputs:
                input_elems.append(comp)
            submit_btn = gr.Button("Predict")

        with gr.Column(scale=3):
            gr.Markdown("### Predictions")
            out_q = outputs[0]
            out_p = outputs[1]
            out_q_comp = gr.Textbox(label="Build Quality Prediction", interactive=False)
            out_p_comp = gr.Textbox(label="Porosity Prediction", interactive=False)
            probs_q_comp = gr.Label(label="Build Quality Probabilities")
            probs_p_comp = gr.Label(label="Porosity Probabilities")
            perf_comp = gr.Markdown(perf_text)

    # Wire up button -> function
    # Gradio needs the actual input components (the objects in `inputs`)
    submit_btn.click(
        fn=predict_properties,
        inputs=input_elems,
        outputs=[out_q_comp, out_p_comp, probs_q_comp, probs_p_comp, perf_comp]
    )

# Launch: share=True gives public link (useful in Colab). Set server_port or server_name if running remotely.
demo.launch(share=True)
