# 📈 Emissions Predictor – Regression Model

This notebook trains a regression model to estimate CO₂ emissions per shipment.

In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np

In [7]:
# 📥 Load shipment and edge data
shipments = pd.read_csv("shipments.csv")
edges = pd.read_csv("edges.csv")

# Merge on source/target as route identifier (simplified join)
merged = shipments.merge(edges, left_on=["source", "target"], right_on=["from_node", "to_node"], how="left")

# Add congestion as a random factor (simulated for demo)
merged["congestion"] = np.random.choice(["Low", "Medium", "High"], size=len(merged))

# Simulate CO2 emissions (target variable)
merged["co2_kg"] = merged["distance_km"] * np.random.uniform(0.11, 0.15, size=len(merged))
merged = merged.dropna(subset=["co2_kg"])

In [8]:
# 🎯 Feature Engineering
le_vehicle = LabelEncoder()
le_congestion = LabelEncoder()

merged["vehicle_encoded"] = le_vehicle.fit_transform(merged["vehicle_type"])
merged["congestion_encoded"] = le_congestion.fit_transform(merged["congestion"])

X = merged[["distance_km", "vehicle_encoded", "congestion_encoded"]]
y = merged["co2_kg"]
merged = merged.dropna(subset=["co2_kg"])



In [9]:
# 🔍 Train/Test Split and Model Training
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
predictions = model.predict(X_test)

In [11]:
# 📊 Evaluation
mse = mean_squared_error(y_test, predictions)
rmse = mse ** 0.5
mae = mean_absolute_error(y_test, predictions)
print("RMSE:", rmse)
print("MAE:", mae)

RMSE: 9.219545918521694
MAE: 6.6686295979252685
