In [1]:
# Cell 1 - Imports
import pandas as pd
import numpy as np
import pickle
import joblib
from pathlib import Path
from IPython.display import display, clear_output
import ipywidgets as widgets
import warnings
warnings.filterwarnings("ignore")

print("Libraries imported")


Libraries imported


In [2]:
# Cell 2 - Paths to models and master file (adjust if needed)
FAULT_MODEL_PATH = "best_model.pkl"       # your fault classifier pipeline
ETR_MODEL_PATH   = "ETR_MODEL.pkl"        # ETR numeric model (pickle)
ETR_ENCODERS     = "ETR_ENCODERS.pkl"     # fitted label encoders for ETR
MASTER_DATA_PATH = "TRAINING_DATA.csv"    # master file with candidate rows / dropdown values

# Load fault model (pipeline with preprocessor + classifier)
with open(FAULT_MODEL_PATH, "rb") as f:
    bundle = pickle.load(f)
fault_pipeline = bundle.get("pipeline", bundle)     # some saved objects use {pipeline, label_encoder}
fault_label_encoder = bundle.get("label_encoder", None)

print("Loaded fault pipeline:", FAULT_MODEL_PATH)

# Load ETR model and encoders
etr_model = joblib.load(ETR_MODEL_PATH)
etr_encoders = joblib.load(ETR_ENCODERS)
print("Loaded ETR model and encoders.")


Loaded fault pipeline: best_model.pkl
Loaded ETR model and encoders.


In [3]:
# Cell 3 - Load master data and preview
df_master = pd.read_csv(MASTER_DATA_PATH)
print("Master data loaded shape:", df_master.shape)
display(df_master.head())

# Print candidate columns for dropdown (month, region_name, circle_name, division_name, complaint_time)
for col in ["month", "complaint_time", "region_name", "circle_name", "division_name"]:
    if col in df_master.columns:
        vals = sorted(df_master[col].dropna().unique().astype(str).tolist())
        print(f"{col} - unique values: {len(vals)} (showing up to 10): {vals[:10]}")
    else:
        print(f"Warning: column {col} not in master file.")


Master data loaded shape: (165863, 8)


Unnamed: 0,complaints_id,month,complaint_time,region_name,circle_name,division_name,Estimated_Time_of_Restoration,season
0,11652595,5,13:21:00,Jabalpur,Katni,Katni (CITY),25,Summer
1,11525961,5,18:07:00,Jabalpur,Jabalpur (City),Jabalpur North (CITY),25,Summer
2,11655367,5,16:48:00,Rewa,Satna,SATNA (CITY),25,Summer
3,11754029,5,22:49:00,Rewa,Satna,SATNA (CITY),25,Summer
4,11776898,5,19:41:00,Rewa,Rewa,Rewa (CITY),25,Summer


month - unique values: 7 (showing up to 10): ['10', '4', '5', '6', '7', '8', '9']
complaint_time - unique values: 1440 (showing up to 10): ['00:00:00', '00:01:00', '00:02:00', '00:03:00', '00:04:00', '00:05:00', '00:06:00', '00:07:00', '00:08:00', '00:09:00']
region_name - unique values: 3 (showing up to 10): ['Jabalpur', 'Rewa', 'Sagar']
circle_name - unique values: 7 (showing up to 10): ['Chhindwara', 'Jabalpur (City)', 'Katni', 'Rewa', 'Sagar', 'Satna', 'Singrauli']
division_name - unique values: 11 (showing up to 10): ['Chhindwara (CITY)', 'Jabalpur East (CITY)', 'Jabalpur North (CITY)', 'Jabalpur South (CITY)', 'Jabalpur West (CITY)', 'Katni (CITY)', 'Rewa (CITY)', 'SATNA (CITY)', 'Sagar (CITY)', 'Vijaynagar (CITY)']


In [4]:
# Cell 4 - List of features the fault model uses (must match training)
# Replace or confirm with your actual feature list if different.
FEATURES = [
 'Feeder_ProcessStatus','DTR_ProcessStatus','Consumer_ProcessStatus','Consumer_Phase_Id',
 'f_vr','f_vy','f_vb','f_ir','f_iy','f_ib',
 'd_vr','d_vy','d_vb','d_ir','d_iy','d_ib',
 'C_tp_vr','C_tp_vy','C_tp_vb','C_tp_ir','C_tp_iy','C_tp_ib',
 'C_sp_i','C_sp_v'
]

print("Fault model features count:", len(FEATURES))
print(FEATURES)


Fault model features count: 24
['Feeder_ProcessStatus', 'DTR_ProcessStatus', 'Consumer_ProcessStatus', 'Consumer_Phase_Id', 'f_vr', 'f_vy', 'f_vb', 'f_ir', 'f_iy', 'f_ib', 'd_vr', 'd_vy', 'd_vb', 'd_ir', 'd_iy', 'd_ib', 'C_tp_vr', 'C_tp_vy', 'C_tp_vb', 'C_tp_ir', 'C_tp_iy', 'C_tp_ib', 'C_sp_i', 'C_sp_v']


In [8]:
# Cell 5 - Widgets to choose a row from master data (by Request_Id if exists) or index
# If your master file doesn't have Request_Id, it will use index

# Identify id column
id_col = "Request_Id" if "Request_Id" in df_master.columns else None

if id_col:
    id_options = ["--select--"] + df_master[id_col].astype(str).tolist()
    id_dropdown = widgets.Dropdown(options=id_options, description="Request_Id:")
else:
    id_options = ["--select index--"] + df_master.index.astype(str).tolist()
    id_dropdown = widgets.Dropdown(options=id_options, description="Row Index:")

# Dropdowns for ETR inputs (month, complaint_time, region, circle, division)
def make_dropdown_for(col):
    if col in df_master.columns:
        vals = sorted(df_master[col].dropna().astype(str).unique().tolist())
        return widgets.Dropdown(options=["--select--"]+vals, description=col+":" , layout=widgets.Layout(width='50%'))
    else:
        return widgets.Text(description=col+":")

month_dd = make_dropdown_for("month")
complaint_time_dd = make_dropdown_for("complaint_time")
region_dd = make_dropdown_for("region_name")
circle_dd = make_dropdown_for("circle_name")
division_dd = make_dropdown_for("division_name")

# Button to run prediction
run_button = widgets.Button(description="Run Predictions", button_style='success')
output = widgets.Output()

# Arrange widgets
row1 = widgets.HBox([id_dropdown])
row2 = widgets.HBox([month_dd, complaint_time_dd])
row3 = widgets.HBox([region_dd, circle_dd, division_dd])
display(row1, row2, row3, run_button, output)


HBox(children=(Dropdown(description='Row Index:', options=('--select index--', '0', '1', '2', '3', '4', '5', '…

HBox(children=(Dropdown(description='month:', layout=Layout(width='50%'), options=('--select--', '10', '4', '5…

HBox(children=(Dropdown(description='region_name:', layout=Layout(width='50%'), options=('--select--', 'Jabalp…

Button(button_style='success', description='Run Predictions', style=ButtonStyle())

Output()

In [6]:
# ---------------------------------------------------------
# Cell 6 (FAST VERSION)
# ---------------------------------------------------------

# Helper: prepare fault features for ONE row
def prepare_fault_input_from_row(row):
    """
    Extract only required FEATURES.
    Missing ones → NaN
    """
    data = {f: row[f] if f in row else np.nan for f in FEATURES}
    return pd.DataFrame([data])


# Fault prediction wrapper
def predict_fault(df_one_row):
    """
    Returns:
    - predicted fault label (string)
    - probability dataframe (if available)
    """

    pred_raw = fault_pipeline.predict(df_one_row)[0]

    # Reverse transform label
    if fault_label_encoder is not None:
        try:
            pred_label = fault_label_encoder.inverse_transform([pred_raw])[0]
        except:
            pred_label = pred_raw
    else:
        pred_label = pred_raw

    # Probabilities
    if hasattr(fault_pipeline, "predict_proba"):
        probs = fault_pipeline.predict_proba(df_one_row)
        classes = getattr(fault_pipeline, "classes_", list(range(probs.shape[1])))
        proba_df = pd.DataFrame(probs, columns=classes)
    else:
        proba_df = None

    return pred_label, proba_df


# -------------------------------
# ETR Prediction (FAST)
# -------------------------------
def fast_get_month_num(month):
    try:
        return int(str(month).split("-")[1])
    except:
        try:
            return int(month)
        except:
            return 1  # fallback


def fast_predict_ETR(month_val, complaint_time, region_name, circle_name, division_name):
    """Very fast ETR predictor."""
    
    # Extract month number
    month_num = fast_get_month_num(month_val)

    # Season logic
    if month_num in [4,5,6]:
        season = "Summer"
    elif month_num in [7,8,9]:
        season = "Rainy"
    else:
        season = "Winter"

    # Build small df
    etr_input = pd.DataFrame({
        "month": [month_num],
        "region_name": [region_name],
        "circle_name": [circle_name],
        "division_name": [division_name],
        "season": [season]
    })

    # Encode categorical using saved encoders
    for col in ["region_name","circle_name","division_name","season"]:
        enc = etr_encoders.get(col)
        if enc is None:
            continue

        val = etr_input.at[0, col]

        # Extend encoder classes only once, NOT repeatedly
        if val not in enc.classes_:
            enc.classes_ = np.append(enc.classes_, val)

        etr_input[col] = enc.transform(etr_input[[col]])

    # Predict
    numeric_etr = etr_model.predict(etr_input)[0]

    # Convert to "X hr Y min"
    total_minutes = int(round(numeric_etr))
    hours = total_minutes // 60
    mins = total_minutes % 60

    if hours > 0:
        etr_human = f"{hours} hr {mins} min"
    else:
        etr_human = f"{mins} min"

    return numeric_etr, etr_human


In [7]:
# Cell 7 - Hook up button
def on_run_clicked(b):
    with output:
        clear_output()
        try:
            # pick row based on selection
            sel = id_dropdown.value
            if sel is None or (isinstance(sel, str) and sel.startswith("--")):
                print("Please select a valid row from the master file.")
                return
            # locate row either by Request_Id or index
            if id_col:
                selected_row = df_master[df_master[id_col].astype(str) == str(sel)]
                if selected_row.empty:
                    print("Selected Request_Id not found in master data.")
                    return
                row = selected_row.iloc[0]
            else:
                idx = int(sel)
                row = df_master.loc[idx]
            
            print("Selected row preview:")
            display(row.to_frame().T)

            # Prepare fault input and predict
            fault_input_df = prepare_fault_input_from_row(row)
            print("\nFault model input (preview):")
            display(fault_input_df)
            pred_label, probs = predict_fault(fault_input_df)
            print(f"\nPredicted Fault Label: >>> {pred_label}")
            if probs is not None:
                # show top class probabilities nicely
                prob_df = pd.DataFrame(probs, columns=(fault_pipeline.classes_ if hasattr(fault_pipeline, 'classes_') else ["class_"+str(i) for i in range(probs.shape[1])]))
                print("\nPrediction probabilities (per class):")
                display(prob_df.T)

            # ETR part: use dropdown values (if user selected)
            month_val = month_dd.value if (isinstance(month_dd, widgets.Dropdown) and month_dd.value != "--select--") else row.get("month", "")
            complaint_time_val = complaint_time_dd.value if (isinstance(complaint_time_dd, widgets.Dropdown) and complaint_time_dd.value != "--select--") else row.get("complaint_time", "")
            region_val = region_dd.value if (isinstance(region_dd, widgets.Dropdown) and region_dd.value != "--select--") else row.get("region_name","")
            circle_val = circle_dd.value if (isinstance(circle_dd, widgets.Dropdown) and circle_dd.value != "--select--") else row.get("circle_name","")
            division_val = division_dd.value if (isinstance(division_dd, widgets.Dropdown) and division_dd.value != "--select--") else row.get("division_name","")

            print("\nETR inputs being used:")
            print(f"month: {month_val}, complaint_time: {complaint_time_val}, region: {region_val}, circle: {circle_val}, division: {division_val}")

            pred_num, pred_human = predict_ETR_from_inputs(month_val, complaint_time_val, region_val, circle_val, division_val)
            print(f"\nPredicted ETR (minutes): {pred_num}")
            print(f"Predicted ETR (human): {pred_human}")

        except Exception as e:
            print("Error during prediction:", str(e))

run_button.on_click(on_run_clicked)
print("Ready — select a row and click Run Predictions")


Ready — select a row and click Run Predictions
