In [13]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Example dataset - Replace with your actual data
df = pd.read_csv('vehicle_emissions.csv')

# Display the first few rows
print(df.head())

# Encode categorical fuel type
le = LabelEncoder()
df['Fuel_Type_Encoded'] = le.fit_transform(df['fuel_type'])

# Select features and target
X = df[['engine_size','co2_emissions' ,'Fuel_Type_Encoded']]
y = df['emission_category']

# Encode target if it's categorical
y_encoded = LabelEncoder().fit_transform(y)

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


# Example input: [Engine Size, Cylinders, Fuel Consumption, Encoded Fuel Type]
sample_vehicle = np.array([[2.0, 215, le.transform(['petrol'])[0]]])
sample_vehicle_scaled = scaler.transform(sample_vehicle)

predicted_class = model.predict(sample_vehicle_scaled)
predicted_emission_standard = LabelEncoder().fit(y).inverse_transform(predicted_class)

print("Predicted Emission Category:", predicted_emission_standard[0])



   engine_size fuel_type  co2_emissions emission_category
0     1.887889    petrol     215.413794                 B
1     1.924299  electric     234.463942                 C
2     3.687571    diesel     180.042027                 A
3     1.078842    petrol     156.704264                 A
4     1.416434    diesel     269.166344                 A
Accuracy: 0.3
Classification Report:
               precision    recall  f1-score   support

           0       0.20      0.20      0.20         5
           1       0.14      0.17      0.15         6
           2       0.50      0.44      0.47         9

    accuracy                           0.30        20
   macro avg       0.28      0.27      0.27        20
weighted avg       0.32      0.30      0.31        20

Predicted Emission Category: B




In [15]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Load the dataset - Replace 'vehicle_emissions.csv' with your actual file
df = pd.read_csv('vehicle_emissions.csv')

# Display the first few rows to check the structure of the dataset
print(df.head())

# Encode the categorical 'fuel_type' column into numeric values
le_fuel = LabelEncoder()
df['Fuel_Type_Encoded'] = le_fuel.fit_transform(df['fuel_type'])

# Select relevant features (engine size, CO2 emissions, and encoded fuel type)
X = df[['engine_size', 'co2_emissions', 'Fuel_Type_Encoded']]
y = df['emission_category']  # Target variable

# Encode target variable 'emission_category' if it's categorical
le_target = LabelEncoder()
y_encoded = le_target.fit_transform(y)

# Split the dataset into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Feature scaling (Standardize the feature values)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize and train the model (Random Forest Classifier)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict on the test set
y_pred = model.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

# Example input: [Engine Size, CO2 Emissions, Encoded Fuel Type]
sample_vehicle = pd.DataFrame([[2.0, 215.4137938, le_fuel.transform(['petrol'])[0]]],
                              columns=['engine_size', 'co2_emissions', 'Fuel_Type_Encoded'])

# Scale the features of the sample vehicle (apply same scaler as the training set)
sample_vehicle_scaled = scaler.transform(sample_vehicle)

# Predict the emission category for this sample
predicted_class = model.predict(sample_vehicle_scaled)

# Convert the numerical prediction back to the emission category (A, B, C)
predicted_emission_standard = le_target.inverse_transform(predicted_class)

print("Predicted Emission Category for sample vehicle:", predicted_emission_standard[0])


   engine_size fuel_type  co2_emissions emission_category
0     1.887889    petrol     215.413794                 B
1     1.924299  electric     234.463942                 C
2     3.687571    diesel     180.042027                 A
3     1.078842    petrol     156.704264                 A
4     1.416434    diesel     269.166344                 A
Accuracy: 0.3
Classification Report:
               precision    recall  f1-score   support

           0       0.20      0.20      0.20         5
           1       0.14      0.17      0.15         6
           2       0.50      0.44      0.47         9

    accuracy                           0.30        20
   macro avg       0.28      0.27      0.27        20
weighted avg       0.32      0.30      0.31        20

Predicted Emission Category for sample vehicle: B
