In [1]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns
%matplotlib inline

# Ignore harmless warnings 

import warnings 
warnings.filterwarnings("ignore")

# Set to display all the columns in dataset

pd.set_option("display.max_columns", None)

# Import psql to run queries 

import pandasql as psql

In [2]:

CO2Emission = pd.read_csv(r"C:\Users\badda\Downloads\MY2021_Fuel_Consumption_Ratings (1).csv", header=0)

# Copy the file to back-up file

CO2Emission_bk = CO2Emission.copy()

# Display first 5 records

CO2Emission.head()

Unnamed: 0,Year,Make,Model,Vehicle_Class,Engine_Size,Cylinders,Transmission,Fuel_Type,Fuel_Consumption_city,Fuel_Consumption_Hwy,Fuel_Consumption_Comb,Fuel_Consumption_Comb_MPG,CO2_Emissions,CO2_Rating,Smog_Rating
0,2021,Acura,ILX,Compact,2.4,4,AM8,Z,9.9,7.0,8.6,33,199,6,3
1,2021,Acura,NSX,Two-seater,3.5,6,AM9,Z,11.1,10.8,11.0,26,256,4,3
2,2021,Acura,RDX SH-AWD,SUV: Small,2.0,4,AS10,Z,11.0,8.6,9.9,29,232,5,6
3,2021,Acura,RDX SH-AWD A-SPEC,SUV: Small,2.0,4,AS10,Z,11.3,9.1,10.3,27,242,5,6
4,2021,Acura,TLX SH-AWD,Compact,2.0,4,AS10,Z,11.2,8.0,9.8,29,230,5,7


In [3]:
# Display dataset information

CO2Emission.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 883 entries, 0 to 882
Data columns (total 15 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Year                       883 non-null    int64  
 1   Make                       883 non-null    object 
 2   Model                      883 non-null    object 
 3   Vehicle_Class              883 non-null    object 
 4   Engine_Size                883 non-null    float64
 5   Cylinders                  883 non-null    int64  
 6   Transmission               883 non-null    object 
 7   Fuel_Type                  883 non-null    object 
 8   Fuel_Consumption_city      883 non-null    float64
 9   Fuel_Consumption_Hwy       883 non-null    float64
 10  Fuel_Consumption_Comb      883 non-null    float64
 11  Fuel_Consumption_Comb_MPG  883 non-null    int64  
 12  CO2_Emissions              883 non-null    int64  
 13  CO2_Rating                 883 non-null    int64  

In [4]:
# Delecting the 8 columns     #not interupting the target variable

CO2Emission = CO2Emission.drop(['Year', 'Make', 'Model', 'Vehicle_Class', 'Transmission',
                                 'Fuel_Type', 'CO2_Rating', 'Smog_Rating'], axis = 1)    #axis = 1 is column/variable   axis = 0 is row/record
CO2Emission.head()

Unnamed: 0,Engine_Size,Cylinders,Fuel_Consumption_city,Fuel_Consumption_Hwy,Fuel_Consumption_Comb,Fuel_Consumption_Comb_MPG,CO2_Emissions
0,2.4,4,9.9,7.0,8.6,33,199
1,3.5,6,11.1,10.8,11.0,26,256
2,2.0,4,11.0,8.6,9.9,29,232
3,2.0,4,11.3,9.1,10.3,27,242
4,2.0,4,11.2,8.0,9.8,29,230


In [5]:
# Identify the Independent and Target variables

IndepVar = []
for col in CO2Emission.columns:
    if col != 'CO2_Emissions':
        IndepVar.append(col)

TargetVar = 'CO2_Emissions'

x = CO2Emission[IndepVar]
y = CO2Emission[TargetVar]

In [6]:
# Split the data into train and test

from sklearn.model_selection import train_test_split 

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=143)

# Display the shape of the train_data and test_data

x_train.shape, x_test.shape, y_train.shape, y_test.shape

((618, 6), (265, 6), (618,), (265,))

In [7]:
# Scaling the features by using MinMaxScaler

from sklearn.preprocessing import MinMaxScaler

mmscaler = MinMaxScaler(feature_range=(0, 1))

x_train = mmscaler.fit_transform(x_train)
x_train = pd.DataFrame(x_train)

x_test = mmscaler.fit_transform(x_test)
x_test = pd.DataFrame(x_test)

# Multi Regression Algorithm

In [8]:
# Build the model with Gradient Boosting Regressor

from sklearn.linear_model import LinearRegression  

# Create object for the model

ModelMLR = LinearRegression()

# Train the model with training data

ModelMLR.fit(x_train, y_train)

# Predict the model with test dataset

y_pred = ModelMLR.predict(x_test)

# Evaluation metrics for Regression analysis

from sklearn import metrics

print('Mean Absolute Error (MAE):', round(metrics.mean_absolute_error(y_test, y_pred),3))  
print('Mean Squared Error (MSE):', round(metrics.mean_squared_error(y_test, y_pred),3))  
print('Root Mean Squared Error (RMSE):', round(np.sqrt(metrics.mean_squared_error(y_test, y_pred)),3))
print('R2_score:', round(metrics.r2_score(y_test, y_pred),6))
print('Root Mean Squared Log Error (RMSLE):', round(np.log(np.sqrt(metrics.mean_squared_error(y_test, y_pred))),3))

# Define the function to calculate the MAPE - Mean Absolute Percentage Error

def MAPE (y_test, y_pred): 
    y_test, y_pred = np.array(y_test), np.array(y_pred)
    return np.mean(np.abs((y_test - y_pred) / y_test)) * 100

# Evaluation of MAPE 

result = MAPE(y_test, y_pred)
print('Mean Absolute Percentage Error (MAPE):', round(result, 3), '%')    #mape<10 == excellent    10<mape<20 == good

# Calculate Adjusted R squared values 

r_squared = round(metrics.r2_score(y_test, y_pred),6)
adjusted_r_squared = round(1 - (1-r_squared)*(len(y)-1)/(len(y)-x.shape[1]-1),6)
print('Adj R Square: ', adjusted_r_squared)

Mean Absolute Error (MAE): 22.98
Mean Squared Error (MSE): 881.506
Root Mean Squared Error (RMSE): 29.69
R2_score: 0.760523
Root Mean Squared Log Error (RMSLE): 3.391
Mean Absolute Percentage Error (MAPE): 8.392 %
Adj R Square:  0.758883


In [10]:
Results = pd.DataFrame({'CO2_Emissions_A':y_test, 'CO2_Emissions_P':y_pred})

# Merge two Dataframes on index of both the dataframes


ResultsFinal = CO2Emission_bk.merge(Results, left_index=True, right_index=True)
ResultsFinal.sample(5)

Unnamed: 0,Year,Make,Model,Vehicle_Class,Engine_Size,Cylinders,Transmission,Fuel_Type,Fuel_Consumption_city,Fuel_Consumption_Hwy,Fuel_Consumption_Comb,Fuel_Consumption_Comb_MPG,CO2_Emissions,CO2_Rating,Smog_Rating,CO2_Emissions_A,CO2_Emissions_P
647,2021,Mercedes-Benz,Metris Cargo Van,Special purpose vehicle,2.0,4,A9,Z,12.6,10.0,11.5,25,268,4,5,268,279.372982
45,2021,Audi,RS Q8 quattro,SUV: Standard,4.0,8,AS8,Z,18.0,12.3,15.4,18,360,2,3,360,395.935691
723,2021,Porsche,911 Carrera S,Minicompact,3.0,6,AM8,Z,12.9,10.2,11.7,24,274,4,5,274,298.96786
584,2021,Mercedes-Benz,A 220 4MATIC Sedan,Subcompact,2.0,4,AM7,Z,9.6,6.9,8.4,34,197,6,5,197,208.025685
866,2021,Volkswagen,Jetta GLI,Compact,2.0,4,M6,X,9.7,7.0,8.5,33,198,6,7,198,211.530034
