# Emissions Regression Model

This notebook trains a simple regression model to learn the relationship between transport activity and CO₂ emissions using synthetic data grounded in the formulas `Emissions = Activity × Emission Factor` and `CO₂ = Energy Consumption × Emission Factor`.


In [None]:
import pathlib
import sys

import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

PROJECT_ROOT = pathlib.Path('..').resolve()
if str(PROJECT_ROOT) not in sys.path:
    sys.path.append(str(PROJECT_ROOT))

from transport_system.utils.math_models import emissions


In [None]:
rng = np.random.default_rng(42)

n_samples = 200
activity_vehicle_km = rng.uniform(1_000, 100_000, size=n_samples)
true_emission_factor = 0.18  # kg CO₂ per vehicle-km

noise = rng.normal(0, 0.05 * true_emission_factor * activity_vehicle_km)
co2_kg = emissions(activity_vehicle_km, true_emission_factor) + noise

emissions_df = pd.DataFrame({
    'activity_vehicle_km': activity_vehicle_km,
    'co2_kg': co2_kg,
})

emissions_df.head()


In [None]:
X = emissions_df[['activity_vehicle_km']].values
y = emissions_df['co2_kg'].values

model = LinearRegression()
model.fit(X, y)

y_pred = model.predict(X)

print('Estimated emission factor (kg CO₂ / vehicle-km):', model.coef_[0])
print('Intercept (kg CO₂):', model.intercept_)
print('R²:', r2_score(y, y_pred))
print('RMSE (kg CO₂):', mean_squared_error(y, y_pred, squared=False))


In [None]:
plt.figure(figsize=(6, 4))
plt.scatter(emissions_df['activity_vehicle_km'], emissions_df['co2_kg'], alpha=0.4, label='Observed')

order = np.argsort(emissions_df['activity_vehicle_km'].values)
plt.plot(
    emissions_df['activity_vehicle_km'].values[order],
    y_pred[order],
    color='red',
    label='Fitted model',
)

plt.xlabel('Activity (vehicle-km)')
plt.ylabel('CO₂ (kg)')
plt.title('Emissions Regression: CO₂ vs Activity')
plt.grid(True)
plt.legend()
plt.tight_layout()
