# Scope 3 Emissions Prediction UI
This Voila app allows users to input ERP-style values and receive Scope 3 emissions predictions using a trained XGBoost model.

In [5]:
import ipywidgets as widgets
from IPython.display import display, clear_output
import pandas as pd
import joblib
import shap
import xgboost as xgb
import numpy as np
import matplotlib.pyplot as plt

In [6]:
# Load model and encoders
model = joblib.load("xgb_model.joblib")
encoders = joblib.load("label_encoders.pkl")

In [7]:
# Define input widgets
commodity = widgets.Text(description="Commodity:")
production_value = widgets.FloatText(description="Production:")
production_unit = widgets.Text(description="Unit:")
parent_entity = widgets.Text(description="Parent:")
reporting_entity = widgets.Text(description="Site:")
year = widgets.IntText(description="Year:")

output = widgets.Output()
submit_button = widgets.Button(description="Predict Emissions", button_style='success')

In [1]:
# Prediction function
def on_submit(b):
    with output:
        clear_output()
        # Assemble input data
        user_input = pd.DataFrame([{
            "commodity": commodity.value,
            "production_value": production_value.value,
            "production_unit": production_unit.value,
            "parent_entity": parent_entity.value,
            "reporting_entity": reporting_entity.value,
            "year": year.value
        }])

        # Apply label encoding
        for col in ["commodity", "production_unit", "parent_entity", "reporting_entity"]:
            if col in encoders:
                user_input[col] = encoders[col].transform(user_input[col].astype(str))

        # Feature engineering
        user_input["log_production_value"] = np.log1p(user_input["production_value"])
        user_input["total_operational_emissions_MtCO2e"] = 0  # placeholder
        user_input["emissions_intensity"] = 0  # placeholder

        features = [
            'commodity', 'parent_entity', 'parent_type', 'reporting_entity',
            'production_unit', 'year', 'log_production_value',
            'total_operational_emissions_MtCO2e', 'emissions_intensity'
        ]
        for col in features:
            if col not in user_input.columns:
                user_input[col] = 0

        user_input = user_input[features]  # re-order

        prediction = model.predict(user_input)[0]
        print(f"\nPredicted Scope 3 Emissions: {prediction:.4f} MtCO2")

        # Explain prediction with SHAP (text-based fallback)
        explainer = shap.Explainer(model)
        shap_values = explainer(user_input)

        # Print top 5 contributing features
        top_features = pd.Series(shap_values[0].values, index=user_input.columns)
        top_features = top_features.abs().sort_values(ascending=False).head(5)

        print("\\nTop 5 Contributors to Prediction:")
        for feature, value in top_features.items():
            print(f"{feature}: {value:.4f}")

In [3]:
# Bind the button click
title = widgets.HTML("<h2>Scope 3 Emissions Prediction</h2>")
submit_button.on_click(on_submit)

form_items = widgets.VBox([
    title,
    commodity,
    production_value,
    production_unit,
    parent_entity,
    reporting_entity,
    year,
    submit_button,
    output
])

display(form_items)

NameError: name 'widgets' is not defined