<a href="https://colab.research.google.com/github/Suriya-Panneerselvam/vehicle-carbon-emission-prediction-using-machine-learning-and-alert-system-/blob/main/carbonemission.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestRegressor
import pickle

# Load Dataset
df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Fuel Consumption Ratings 2023.csv", encoding='ISO-8859-1')
df = df.dropna(subset=["CO2 Emissions (g/km)"])

if 'Year' in df.columns:
    df = df.drop(columns=['Year'])

# Features & Target
X = df.drop(columns=["CO2 Emissions (g/km)", "CO2 Rating", "Smog Rating"])
y = df["CO2 Emissions (g/km)"]

# Preprocess
cat_cols = X.select_dtypes(include=["object"]).columns.tolist()
num_cols = X.select_dtypes(exclude=["object"]).columns.tolist()

preprocessor = ColumnTransformer([
    ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols),
    ("num", "passthrough", num_cols)
])
X_processed = preprocessor.fit_transform(X)

# Split & Train
X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.2, random_state=42)
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Save model
with open("co2_emission_model.pkl", "wb") as f:
    pickle.dump({"model": model, "preprocessor": preprocessor}, f)

# Prediction function
def predict_with_alert(input_data):
    with open("co2_emission_model.pkl", "rb") as f:
        saved = pickle.load(f)
    model_loaded = saved["model"]
    preprocessor_loaded = saved["preprocessor"]

    # Predict emissions
    X_new = preprocessor_loaded.transform(pd.DataFrame([input_data]))
    prediction = model_loaded.predict(X_new)[0]

    # Find closest match in dataset for ratings
    closest_row = df.iloc[(df["CO2 Emissions (g/km)"] - prediction).abs().argsort()[:1]]
    co2_rating = closest_row["CO2 Rating"].values[0]
    smog_rating = closest_row["Smog Rating"].values[0]

    # Output
    print(f"\nCO₂ Emissions (g/km): {prediction:.2f}")
    print(f"Estimated CO₂ Rating: {co2_rating}")
    print(f"Estimated Smog Rating: {smog_rating}")

    if prediction > 250:
        print("ALERT: High CO₂ Emissions!")
    else:
        print("CO₂ emissions are within safe limits.")

# User input
new_data_dict = {
    'Make': input('Make: '),
    'Model': input('Model: '),
    'Vehicle Class': input('Vehicle Class: '),
    'Engine Size (L)': float(input('Engine Size (L): ')),
    'Cylinders': input('Cylinders: '),  # Keep as string to avoid int conversion errors
    'Transmission': input('Transmission: '),
    'Fuel Type': input('Fuel Type: '),
    'Fuel Consumption (L/100Km)': float(input('Fuel Consumption (L/100Km): ')),
    'Hwy (L/100 km)': float(input('Hwy (L/100 km): ')),
    'Comb (L/100 km)': float(input('Comb (L/100 km): ')),
    'Comb (mpg)': float(input('Comb (mpg): '))
}

predict_with_alert(new_data_dict)

Make: Audi
Model: A3 40 TFSI quattro
Vehicle Class: Subcompact
Engine Size (L): 2
Cylinders: 4
Transmission: AM7
Fuel Type: X
Fuel Consumption (L/100Km): 8.7
Hwy (L/100 km): 6.8
Comb (L/100 km): 7.9
Comb (mpg): 36

CO₂ Emissions (g/km): 183.78
Estimated CO₂ Rating: 6.0
Estimated Smog Rating: 7.0
CO₂ emissions are within safe limits.
