In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

In [17]:
df = pd.read_csv('vehicle_emission_dataset.csv')

df.columns = df.columns.str.replace(' ', '_')

In [18]:
X = df.drop(columns=['CO2_Emissions', 'NOx_Emissions', 'PM2.5_Emissions', 'VOC_Emissions', 'SO2_Emissions'])
y = df[['CO2_Emissions', 'NOx_Emissions', 'PM2.5_Emissions', 'VOC_Emissions', 'SO2_Emissions']]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [19]:
numeric_features = ['Engine_Size', 'Age_of_Vehicle', 'Mileage', 'Speed', 'Acceleration', 'Temperature', 'Humidity', 'Wind_Speed', 'Air_Pressure']
categorical_features = ['Vehicle_Type', 'Fuel_Type', 'Road_Type', 'Traffic_Conditions']

numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

In [20]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import GridSearchCV

model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', MultiOutputRegressor(RandomForestRegressor(random_state=42)))
])

param_grid = {
    'regressor__estimator__n_estimators': [100, 200],
    'regressor__estimator__max_depth': [None, 10, 20],
    'regressor__estimator__min_samples_split': [2, 5]
}

grid_search = GridSearchCV(model, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
grid_search.fit(X_train, y_train)

best_model = grid_search.best_estimator_

In [21]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

y_pred = best_model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Absolute Error: {mae}')
print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

Mean Absolute Error: 22.661784359569854
Mean Squared Error: 3397.06412221948
R-squared: -0.006066636818068983


In [23]:
def predict_emissions(input_data):
    """
    Predicts the emissions for a given vehicle input.
    
    Parameters:
    - input_data (dict): A dictionary containing the vehicle specifications.
    
    Returns:
    - dict: Predicted emissions for CO₂, NOₓ, PM₂.₅, VOC, and SO₂.
    """
    input_df = pd.DataFrame([input_data])
    emissions = best_model.predict(input_df)
    return {
        'CO2_Emissions': emissions[0][0],
        'NOx_Emissions': emissions[0][1],
        'PM2.5_Emissions': emissions[0][2],
        'VOC_Emissions': emissions[0][3],
        'SO2_Emissions': emissions[0][4]
    }

vehicle_input = {
    'Vehicle_Type': 'Sedan',
    'Fuel_Type': 'Petrol',
    'Engine_Size': 2.0,
    'Age_of_Vehicle': 5,
    'Mileage': 15000,
    'Speed': 60,
    'Acceleration': 8,
    'Road_Type': 'Urban',
    'Traffic_Conditions': 'Moderate',
    'Temperature': 30,
    'Humidity': 60,
    'Wind_Speed': 15,
    'Air_Pressure': 1015
}

predicted_emissions = predict_emissions(vehicle_input)
print(predicted_emissions)


{'CO2_Emissions': 262.60675178401624, 'NOx_Emissions': 1.0313269704042904, 'PM2.5_Emissions': 0.10793081378500878, 'VOC_Emissions': 0.05787838191881496, 'SO2_Emissions': 0.056535220603857794}
