# Scope 3 Emissions Prediction UI
This Voila app allows users to input ERP-style values and receive Scope 3 emissions predictions using a trained XGBoost model.

In [None]:
import ipywidgets as widgets
from IPython.display import display, clear_output
import pandas as pd
import joblib
import shap
import xgboost as xgb
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# Load model and encoders
model = joblib.load("xgb_model.joblib")
encoders = joblib.load("label_encoders.pkl")

In [None]:
# Define input widgets with dropdowns
commodity = widgets.Dropdown(
    options=["Steel", "Cement", "Crude Oil", "Aluminium", "Gas"],
    description="Commodity:"
)
production_value = widgets.FloatText(description="Production:")
production_unit = widgets.Dropdown(
    options=["tonnes", "barrels", "m³"],
    description="Unit:"
)
parent_entity = widgets.Text(description="Parent:")
reporting_entity = widgets.Text(description="Site:")
year = widgets.Dropdown(
    options=list(range(2015, 2026)),
    description="Year:"
)

output = widgets.Output()
submit_button = widgets.Button(description="Predict Emissions", button_style='success')

In [None]:
# Prediction function
def on_submit(b):
    with output:
        clear_output()
        # Assemble input data
        user_input = pd.DataFrame([{
            "commodity": commodity.value,
            "production_value": production_value.value,
            "production_unit": production_unit.value,
            "parent_entity": parent_entity.value,
            "reporting_entity": reporting_entity.value,
            "year": year.value
        }])

        # Apply label encoding with safe fallback
        for col in ["commodity", "production_unit", "parent_entity", "reporting_entity"]:
            if col in encoders:
                user_input[col] = user_input[col].astype(str).map(
                    lambda x: encoders[col].transform([x])[0] if x in encoders[col].classes_ else -1)

        # Feature engineering
        user_input["log_production_value"] = np.log1p(user_input["production_value"])
        user_input["total_operational_emissions_MtCO2e"] = 0  # placeholder
        user_input["emissions_intensity"] = 0  # placeholder
        user_input["parent_type"] = 0  # assumed required by model

        # Match training feature order
        for col in model.feature_names_in_:
            if col not in user_input.columns:
                user_input[col] = 0
        user_input = user_input[model.feature_names_in_]

        prediction = model.predict(user_input)[0]
        print(f"\nPredicted Scope 3 Emissions: {prediction:.4f} MtCO2")

        # SHAP fallback
        explainer = shap.Explainer(model)
        shap_values = explainer(user_input)
        top_features = pd.Series(shap_values[0].values, index=user_input.columns)
        top_features = top_features.abs().sort_values(ascending=False).head(5)
        print("\nTop 5 Contributors to Prediction:")
        for feature, value in top_features.items():
            print(f"{feature}: {value:.4f}")

In [None]:
# Bind the button click
title = widgets.HTML("<h2>Scope 3 Emissions Prediction</h2>")
submit_button.on_click(on_submit)

form_items = widgets.VBox([
    title,
    commodity,
    production_value,
    production_unit,
    parent_entity,
    reporting_entity,
    year,
    submit_button,
    output
])

display(form_items)