### 1. Import Libraries & Data

In [1]:
import numpy as np
import pandas as pd

In [2]:
# create Pandas Data Frame
df = pd.read_csv(r"C:/Users/monstr/Desktop/Machine_Learning_Algorithms/C_Datasets/Co2_Emission.csv")
df.head()

Unnamed: 0,MODELYEAR,MAKE,MODEL,VEHICLECLASS,ENGINESIZE,CYLINDERS,TRANSMISSION,FUELTYPE,FUELCONSUMPTION_CITY,FUELCONSUMPTION_HWY,FUELCONSUMPTION_COMB,FUELCONSUMPTION_COMB_MPG,CO2EMISSIONS
0,2014,ACURA,ILX,COMPACT,2.0,4,AS5,Z,9.9,6.7,8.5,33,196
1,2014,ACURA,ILX,COMPACT,2.4,4,M6,Z,11.2,7.7,9.6,29,221
2,2014,ACURA,ILX HYBRID,COMPACT,1.5,4,AV7,Z,6.0,5.8,5.9,48,136
3,2014,ACURA,MDX 4WD,SUV - SMALL,3.5,6,AS6,Z,12.7,9.1,11.1,25,255
4,2014,ACURA,RDX AWD,SUV - SMALL,3.5,6,AS6,Z,12.1,8.7,10.6,27,244


### 2. Standardization

In [3]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

# shrink the Data Frame to be understandable
df_regression = df[["ENGINESIZE", "FUELCONSUMPTION_HWY", "FUELCONSUMPTION_COMB", "CO2EMISSIONS"]]

df_regression.loc[:, "ENGINESIZE"] = scaler.fit_transform(df_regression.iloc[:, :-1])[:, 0]
df_regression.loc[:, "FUELCONSUMPTION_HWY"] = scaler.fit_transform(df_regression.iloc[:, :-1])[:, 1]
df_regression.loc[:, "FUELCONSUMPTION_COMB"] = scaler.fit_transform(df_regression.iloc[:, :-1])[:, 2]

df_regression.sample(7)

Unnamed: 0,ENGINESIZE,FUELCONSUMPTION_HWY,FUELCONSUMPTION_COMB,CO2EMISSIONS
578,-0.951292,-0.599528,-0.712086,209
837,0.108606,-0.742733,-0.65468,214
729,0.108606,-0.313118,-0.310245,242
29,-0.244694,-1.100745,-0.94171,224
964,-0.951292,-0.814335,-1.113928,177
535,0.249925,-0.205714,-0.16673,253
107,0.744544,0.044894,0.177705,281


### 3. Split Data Frame to Train & Test Set

In [4]:
from sklearn.model_selection import train_test_split

X = df_regression.drop(columns=["CO2EMISSIONS"]).values
y = df_regression["CO2EMISSIONS"].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

### 4. Create Model

In [5]:
from sklearn.svm import SVR

SVR_model = SVR().fit(X_train, y_train)
y_pred = SVR_model.predict(X_test)

### 5. Evaluation

In [6]:
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

MAE = mean_absolute_error(y_test, y_pred)
MSE = mean_squared_error(y_test, y_pred)
# Root Mean Squared Error (RMSE) 
RMSE = np.sqrt(MSE)
r2 = r2_score(y_test, y_pred)

In [7]:
pd.DataFrame({"Model": "Decision Tree Regression",
              "MAE": MAE,
              "MSE": MSE,
              "RMSE": RMSE,
              "R2": r2},
            index = [0])

Unnamed: 0,Model,MAE,MSE,RMSE,R2
0,Decision Tree Regression,16.687476,860.975857,29.34239,0.792435


### 6. Optimization

In [22]:
from sklearn.model_selection import GridSearchCV

parameters = {"kernel": ["linear", "poly", "rbf", "sigmoid"], 
              "C": [0.001, 0.01, 0.1, 1, 10, 100, 100]}

SVR_model_grid = GridSearchCV(SVR(), parameters)

In [23]:
SVR_model_grid.fit(X_train, y_train)

In [25]:
SVR_model_grid.best_params_

{'C': 100, 'kernel': 'rbf'}

In [26]:
SVR_model = SVR(C=100, kernel="rbf").fit(X_train, y_train)
y_pred = SVR_model.predict(X_test)
MAE = mean_absolute_error(y_test, y_pred)
MSE = mean_squared_error(y_test, y_pred)
# Root Mean Squared Error (RMSE) 
RMSE = np.sqrt(MSE)
r2 = r2_score(y_test, y_pred)
pd.DataFrame({"Model": "Decision Tree Regression",
              "MAE": MAE,
              "MSE": MSE,
              "RMSE": RMSE,
              "R2": r2},
            index = [0])

Unnamed: 0,Model,MAE,MSE,RMSE,R2
0,Decision Tree Regression,6.147998,343.506333,18.533924,0.917187
