In [50]:
import pandas as pd

data = pd.read_csv("datasets/fuel.csv")

data.head()

Unnamed: 0,id,make,model,year,VClass,drive,trans,fuelType,cylinders,displ,pv2,pv4,city,UCity,highway,UHighway,comb,co2,feScore,ghgScore
0,32204,Nissan,GT-R,2013,Subcompact Cars,All-Wheel Drive,Automatic (AM6),Premium Gasoline,6,3.8,79,0,16.4596,20.2988,22.5568,30.1798,18.7389,471,4,4
1,32205,Volkswagen,CC,2013,Compact Cars,Front-Wheel Drive,Automatic (AM-S6),Premium Gasoline,4,2.0,94,0,21.8706,26.977,31.0367,42.4936,25.2227,349,6,6
2,32206,Volkswagen,CC,2013,Compact Cars,Front-Wheel Drive,Automatic (S6),Premium Gasoline,6,3.6,94,0,17.4935,21.2,26.5716,35.1,20.6716,429,5,5
3,32207,Volkswagen,CC 4motion,2013,Compact Cars,All-Wheel Drive,Automatic (S6),Premium Gasoline,6,3.6,94,0,16.9415,20.5,25.219,33.5,19.8774,446,5,5
4,32208,Chevrolet,Malibu eAssist,2013,Midsize Cars,Front-Wheel Drive,Automatic (S6),Regular Gasoline,4,2.4,0,95,24.7726,31.9796,35.534,51.8816,28.6813,310,8,8


#### Classify "Front Wheel Drive" label

In [51]:
data["class_label"] = data["drive"].apply(lambda x: 1 if x == "Front-Wheel Drive" else 0)

data[["drive", "class_label"]].sample(5)

Unnamed: 0,drive,class_label
516,Front-Wheel Drive,1
802,Front-Wheel Drive,1
1375,Front-Wheel Drive,1
3881,All-Wheel Drive,0
3785,Front-Wheel Drive,1


In [52]:
data = data[["make", "model", "VClass", "drive", "displ", "comb", "class_label"]]

data.sample(5)

Unnamed: 0,make,model,VClass,drive,displ,comb,class_label
1875,Infiniti,Q50a AWD,Midsize Cars,All-Wheel Drive,3.7,21.6982,0
3674,Alfa Romeo,Giulia,Midsize Cars,Rear-Wheel Drive,2.9,19.7059,0
1187,Dodge,Dart,Midsize Cars,Front-Wheel Drive,2.0,27.0857,1
1635,Kia,Rio,Compact Cars,Front-Wheel Drive,1.6,30.2833,1
1919,Lexus,LS 460 AWD,Midsize Cars,All-Wheel Drive,4.6,18.2396,0


#### Based on displacment and fuel consumption

In [53]:
x = data[["displ", "comb"]]
y = data["class_label"]

In [54]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x.values, y.values, random_state = 0)

In [55]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()

model.fit(x_train, y_train)

In [56]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

scaled_x = scaler.fit_transform(x)

In [57]:
scaled_x_train, scaled_x_test, _, _ = train_test_split(scaled_x, y, random_state = 0)

In [58]:
scaled_model = LogisticRegression()

scaled_model.fit(scaled_x_train, y_train)

#### Evaluate Non-scaled Model

In [59]:
y_predict = model.predict(x_test)

In [60]:
from sklearn import metrics

metrics.confusion_matrix(y_test, y_predict)

array([[510,  90],
       [ 87, 296]], dtype=int64)

In [61]:
metrics.accuracy_score(y_test, y_predict)

0.8199389623601221

In [62]:
metrics.precision_score(y_test, y_predict)

0.7668393782383419

In [63]:
metrics.recall_score(y_test, y_predict)

0.7728459530026109

#### Evaluate Scaled Model

In [69]:
y_predict = scaled_model.predict(scaled_x_test)

In [70]:
metrics.confusion_matrix(y_test, y_predict)

array([[508,  92],
       [ 87, 296]], dtype=int64)

In [71]:
metrics.accuracy_score(y_test, y_predict)

0.8179043743641913

In [72]:
metrics.precision_score(y_test, y_predict)

0.7628865979381443

In [73]:
metrics.recall_score(y_test, y_predict)

0.7728459530026109

#### No difference because features values are in a small range, so scaling won't have big effect

In [79]:
sample_data = [[4.4, 16.1]]

class_label = model.predict(sample_data)

"Car is a Front Wheel Drive" if class_label == 1 else "Car is either Back Wheel or All Wheel Drive"

'Car is either Back Wheel or All Wheel Drive'

In [80]:
sample_data = [[2.526, 30.7]]

class_label = model.predict(sample_data)

"Car is a Front Wheel Drive" if class_label == 1 else "Car is either Back Wheel or All Wheel Drive"

'Car is a Front Wheel Drive'