In [19]:
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import VotingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import pandas as pd
import numpy as np
import pickle

In [10]:
traffic_data = pd.read_csv('preprocessed_traffic_data.csv')

In [11]:
# Select relevant features for training
features = ['day_of_month', 'hour', 'dayofweek', 'latitude', 'longitude', 'maximum_speed', 'minimum_speed', 'average_speed']
X = traffic_data[features].values
y = traffic_data['number_of_vehicles'].values

In [12]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [13]:
# Define individual models
model_rf = RandomForestRegressor(n_estimators=100, random_state=42)
model_gb = GradientBoostingRegressor(n_estimators=100, random_state=42)
model_lr = LinearRegression()

# Create an ensemble model using VotingRegressor
ensemble_model = VotingRegressor(estimators=[('rf', model_rf), ('gb', model_gb), ('lr', model_lr)])

In [14]:
# Train the ensemble model
ensemble_model.fit(X_train, y_train)

In [15]:
# Make predictions on the test set
predictions_ensemble = ensemble_model.predict(X_test)

In [16]:
# Evaluate the ensemble model
mse_ensemble = mean_squared_error(y_test, predictions_ensemble)
rmse_ensemble = np.sqrt(mse_ensemble)
r2_ensemble = r2_score(y_test, predictions_ensemble)
mae_ensemble = mean_absolute_error(y_test, predictions_ensemble)

print(f'Mean Squared Error (MSE) on Test Set (Ensemble): {mse_ensemble:.4f}')
print(f'Root Mean Squared Error (RMSE) on Test Set (Ensemble): {rmse_ensemble:.4f}')
print(f'R-squared (R^2) on Test Set (Ensemble): {r2_ensemble:.4f}')
print(f'Mean Absolute Error (MAE) on Test Set (Ensemble): {mae_ensemble:.4f}')

Mean Squared Error (MSE) on Test Set (Ensemble): 2755.1892
Root Mean Squared Error (RMSE) on Test Set (Ensemble): 52.4899
R-squared (R^2) on Test Set (Ensemble): 0.7475
Mean Absolute Error (MAE) on Test Set (Ensemble): 32.1357


In [1]:
# Save the ensemble model using pickle
with open('ensemble_traffic_estimation_model.pkl', 'wb') as file:
    pickle.dump(ensemble_model, file)

In [None]:
# Load the ensemble model 
with open('ensemble_traffic_estimation_model.pkl', 'rb') as file:
    loaded_ensemble_model = pickle.load(file)