In [5]:
# === STEP 1: Import libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
import joblib

# === STEP 2: Load your COMBINED dataset (Kenya only)
df = pd.read_csv('Combined3_carbondata_updated.csv')

# === STEP 3: Define features + target
features = ['electric_consumption_kwh_per_capita', 'gdp_per_capita_ppp', 'population']
target = 'co2_emissions_value'

# === STEP 4: Standardize ONLY X (features)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df[features])
X = pd.DataFrame(X_scaled, columns=features)

# Save feature scaler for Streamlit use later
joblib.dump(scaler, 'feature_scaler.pkl')

# === STEP 5: Target Y (DO NOT scale)
y = df[target]

# === STEP 6: Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# === STEP 7: Train Random Forest model (improved params)
best_rf = RandomForestRegressor(
    n_estimators=200,
    max_depth=None,
    min_samples_split=2,
    min_samples_leaf=1,
    random_state=42
)

best_rf.fit(X_train, y_train)

# === STEP 8: Save trained model
joblib.dump(best_rf, 'kenya3_co2_model.pkl')

# === Optional: Check predictions on test set
y_pred = best_rf.predict(X_test)
print(pd.Series(y_pred).describe())


count        7.000000
mean     14718.676057
std       3906.278549
min       8586.399998
25%      12460.049806
50%      15018.549614
75%      17649.528809
max      19206.625556
dtype: float64


In [6]:
# app.py

import streamlit as st
import pandas as pd
import numpy as np
import joblib

# === Load model + feature scaler ===
model = joblib.load('kenya3_co2_model.pkl')
scaler = joblib.load('feature_scaler.pkl')

# --- Pretty Title + Intro ---
st.markdown("<h1 style='text-align: center; color: darkblue;'>Kenya CO₂ Emissions Predictor</h1>", unsafe_allow_html=True)
st.markdown("<h4 style='text-align: center;'>Predict CO₂ emissions based on GDP, electricity consumption, and population</h4>", unsafe_allow_html=True)

# --- Sidebar Inputs ---
st.sidebar.header('Enter Input Values')
electricity = st.sidebar.number_input('Electricity Consumption (kWh per capita)', min_value=0.0, value=100.0)
gdp = st.sidebar.number_input('GDP per Capita (USD)', min_value=0.0, value=1000.0)
population_millions = st.sidebar.number_input('Population (millions)', min_value=0.0, value=50.0)

# Convert population from millions to actual count
population = population_millions * 1_000_000

# === Standardize user inputs using SAME scaler ===
user_input_df = pd.DataFrame({
    'electric_consumption_kwh_per_capita': [electricity],
    'gdp_per_capita_ppp': [gdp],
    'population': [population]
})

user_input_scaled = scaler.transform(user_input_df)

# Sidebar Footer
st.sidebar.markdown("---")
st.sidebar.info("Developed by Victoria Ndegi | KPSK-DeKUT-2-13")

# --- Predict Button ---
if st.sidebar.button('Predict CO2 Emissions'):
    prediction_real = model.predict(user_input_scaled)[0]  # Direct real CO2 value

    st.success(f'Predicted CO₂ Emissions: {prediction_real:.2f} metric tons (approx)')

# Footer caption
st.markdown("<hr style='border:1px solid lightgray'>", unsafe_allow_html=True)
st.caption('This is a machine learning-powered tool to forecast Kenya’s carbon emissions.')




DeltaGenerator()