In [57]:
import streamlit as st
import pickle
import pandas as pd
import numpy as np
import ast

# Load model & encoders
with open('models/ensemble_model.pkl', 'rb') as ensemble:
    model = pickle.load(ensemble)

with open('models/dummy_info.pkl', 'rb') as dummy:
    dummy_info = pickle.load(dummy)

with open('models/preprocessor.pkl', 'rb') as process:
    preprocessor = pickle.load(process)
    
with open('models/feature_order.pkl', 'rb') as order:
    feature_order = pickle.load(order)


In [58]:
import pandas as pd
pd.set_option('display.max_columns', None)

In [59]:
df = pd.DataFrame({
    "body_type": ["overweight"],
    "gender": ["female"],
    "diet": ["pescatarian"],
    "shower_frequency": ["daily"],
    "heating_energy_source": ["coal"],
    "transport": ["public"],
    "social_activity": ["often"],
    "monthly_grocery_bill": [230],
    "air_travel_frequency": ["frequently"],
    "vehicle_monthly_distance_km": [210],
    "waste_bag_size": ["large"],
    "waste_bag_weekly_count": [4],
    "tv_pc_daily_hours": [7],
    "new_clothes_monthly": [2],
    "internet_daily_hours": [8],
    "energy_efficiency": ["No"],
    "recycling": [["Metal", "Paper"]],
    "cooking_with": [["Stove", "Oven"]]
})


In [60]:
df.head()

Unnamed: 0,body_type,gender,diet,shower_frequency,heating_energy_source,transport,social_activity,monthly_grocery_bill,air_travel_frequency,vehicle_monthly_distance_km,waste_bag_size,waste_bag_weekly_count,tv_pc_daily_hours,new_clothes_monthly,internet_daily_hours,energy_efficiency,recycling,cooking_with
0,overweight,female,pescatarian,daily,coal,public,often,230,frequently,210,large,4,7,2,8,No,"[Metal, Paper]","[Stove, Oven]"


In [61]:
def transform_multilabel(df, column_name, categories):
    df = df.copy()

    def parse(x):
        if isinstance(x, list) or isinstance(x, np.ndarray):
            return x  # already list
        if isinstance(x, str):
            return ast.literal_eval(x)  # convert string to list
        if pd.isnull(x):
            return []  # missing value
        return []  # default empty

    for value in categories:
        df[value] = df[column_name].apply(lambda x: int(value in parse(x)))

    df.drop(columns=column_name, inplace=True)
    return df

# 3. Transform only 'recycling' and 'cooking_with'
df = transform_multilabel(df, 'recycling', dummy_info['recycling'])
df = transform_multilabel(df, 'cooking_with', dummy_info['cooking_with'])


In [62]:
df.head()

Unnamed: 0,body_type,gender,diet,shower_frequency,heating_energy_source,transport,social_activity,monthly_grocery_bill,air_travel_frequency,vehicle_monthly_distance_km,waste_bag_size,waste_bag_weekly_count,tv_pc_daily_hours,new_clothes_monthly,internet_daily_hours,energy_efficiency,Plastic,Glass,Paper,Metal,Microwave,Oven,Stove,Airfryer,Grill
0,overweight,female,pescatarian,daily,coal,public,often,230,frequently,210,large,4,7,2,8,No,0,0,1,1,0,1,1,0,0


In [63]:
ordinal_cols = ['body_type', 'shower_frequency', 'social_activity', 'air_travel_frequency', 'waste_bag_size', 'energy_efficiency']
onehot_cols = ['gender', 'diet', 'heating_energy_source', 'transport']

def transform_input(df_raw):
    """
    This function takes the raw user input (as a DataFrame)
    and transforms it exactly like the model expects.
    """
    df = df_raw.copy()

    # Apply the saved preprocessor
    X_transformed = preprocessor.transform(df)

    # Rebuild feature names
    ohe_feature_names = preprocessor.named_transformers_['onehot'].get_feature_names_out(onehot_cols)
    all_feature_names = ordinal_cols + list(ohe_feature_names) + [col for col in df.columns if col not in ordinal_cols + onehot_cols]

    # Create a clean DataFrame
    X_df = pd.DataFrame(X_transformed, columns=all_feature_names)
    X_df.reset_index(drop=True, inplace=True)

    return X_df

# Example usage (inside your API / web app route)
# input_df = pd.DataFrame([user_input_data])
transformed_input = transform_input(df)


In [64]:
transformed_input.head()

Unnamed: 0,body_type,shower_frequency,social_activity,air_travel_frequency,waste_bag_size,energy_efficiency,gender_male,diet_omnivore,diet_pescatarian,diet_vegan,diet_vegetarian,heating_energy_source_coal,heating_energy_source_electricity,heating_energy_source_natural gas,heating_energy_source_wood,transport_private,transport_public,transport_walk/bicycle,monthly_grocery_bill,vehicle_monthly_distance_km,waste_bag_weekly_count,tv_pc_daily_hours,new_clothes_monthly,internet_daily_hours,Plastic,Glass,Paper,Metal,Microwave,Oven,Stove,Airfryer,Grill
0,2.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,230.0,210.0,4.0,7.0,2.0,8.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0


In [65]:
# # Select only available columns
# df_final = df_final[[col for col in final_columns if col in df_final.columns]]

# Reindex to correct order
# Ensure df_final has the same columns as final_columns
# df_final = transformed_input.reindex(columns=final_columns, fill_value=0)

In [66]:
transformed_input.shape

(1, 33)

In [67]:
# Ensure transformed_input matches the model's expected feature names and order
transformed_input = transformed_input.reindex(columns=feature_order, fill_value=0)

In [68]:
transformed_input.columns

Index(['body_type', 'shower_frequency', 'social_activity',
       'air_travel_frequency', 'waste_bag_size', 'energy_efficiency',
       'gender_male', 'diet_omnivore', 'diet_pescatarian', 'diet_vegan',
       'diet_vegetarian', 'heating_energy_source_coal',
       'heating_energy_source_electricity',
       'heating_energy_source_natural gas', 'heating_energy_source_wood',
       'transport_private', 'transport_public', 'transport_walk/bicycle',
       'monthly_grocery_bill', 'vehicle_monthly_distance_km',
       'waste_bag_weekly_count', 'tv_pc_daily_hours', 'new_clothes_monthly',
       'internet_daily_hours', 'Metal', 'Plastic', 'Glass', 'Paper', 'Stove',
       'Grill', 'Airfryer', 'Microwave', 'Oven'],
      dtype='object')

In [69]:
# Make prediction
prediction = model.predict(transformed_input)

# Print the result
print(f"🌱 Your estimated carbon footprint is: {prediction[0]:.2f} units")

🌱 Your estimated carbon footprint is: 1252.50 units
