# Car Evaluation

In [1]:
import pandas as pd
from sklearn.model_selection import cross_validate, train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.compose import ColumnTransformer, make_column_transformer

In [2]:
car_table = pd.read_table('data/car.data', sep = ",", header=None, names = ["price", "maint", "doors", "persons", "lug_boot", "safety", "class"],)
car_table

Unnamed: 0,price,maint,doors,persons,lug_boot,safety,class
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc
...,...,...,...,...,...,...,...
1723,low,low,5more,more,med,med,good
1724,low,low,5more,more,med,high,vgood
1725,low,low,5more,more,big,low,unacc
1726,low,low,5more,more,big,med,good


In [3]:
X_car = car_table.drop(columns=["price"])
Y_car = car_table['price']

print(X_car)
print(Y_car)



      maint  doors persons lug_boot safety  class
0     vhigh      2       2    small    low  unacc
1     vhigh      2       2    small    med  unacc
2     vhigh      2       2    small   high  unacc
3     vhigh      2       2      med    low  unacc
4     vhigh      2       2      med    med  unacc
...     ...    ...     ...      ...    ...    ...
1723    low  5more    more      med    med   good
1724    low  5more    more      med   high  vgood
1725    low  5more    more      big    low  unacc
1726    low  5more    more      big    med   good
1727    low  5more    more      big   high  vgood

[1728 rows x 6 columns]
0       vhigh
1       vhigh
2       vhigh
3       vhigh
4       vhigh
        ...  
1723      low
1724      low
1725      low
1726      low
1727      low
Name: price, Length: 1728, dtype: object


In [20]:
ordinal_feats = ["price", "maint", "doors", "persons", "lug_boot", "safety", "class"]
maint_levels = ['low', 'med', 'high', 'vhigh']
doors_levels = ['2', '3', '4', '5more']
persons_levels = ['2', '4', 'more']
boot_levels = ['small', 'med', 'big']
safety_levels = ['low', 'med', 'high']
class_levels = ['unacc', 'acc', 'good', 'vgood']



ct = make_column_transformer(
    (OrdinalEncoder(categories=[maint_levels, maint_levels, doors_levels, persons_levels, boot_levels, safety_levels, class_levels],
                    dtype = int),
                    ordinal_feats)
)

y_ct =make_column_transformer(
    (OrdinalEncoder(categories=[maint_levels],
                    dtype = int),
                    ["price"])
)

In [5]:
ct

ColumnTransformer(transformers=[('ordinalencoder',
                                 OrdinalEncoder(categories=[['low', 'med',
                                                             'high', 'vhigh'],
                                                            ['2', '3', '4',
                                                             '5more'],
                                                            ['2', '4', 'more'],
                                                            ['small', 'med',
                                                             'big'],
                                                            ['low', 'med',
                                                             'high'],
                                                            ['unacc', 'acc',
                                                             'good', 'vgood']],
                                                dtype=<class 'int'>),
                                 ['maint', 'doors'

In [21]:
X_transformed = ct.fit_transform(car_table)
column_names = (ordinal_feats)

X_transformed

array([[3, 3, 0, ..., 0, 0, 0],
       [3, 3, 0, ..., 0, 1, 0],
       [3, 3, 0, ..., 0, 2, 0],
       ...,
       [0, 0, 3, ..., 2, 0, 0],
       [0, 0, 3, ..., 2, 1, 2],
       [0, 0, 3, ..., 2, 2, 3]])

In [27]:
new_table = pd.DataFrame(X_transformed, columns=column_names)


X_car = new_table.drop(columns=["price"])
Y_car = new_table['price']

X_car

Unnamed: 0,maint,doors,persons,lug_boot,safety,class
0,3,0,0,0,0,0
1,3,0,0,0,1,0
2,3,0,0,0,2,0
3,3,0,0,1,0,0
4,3,0,0,1,1,0
...,...,...,...,...,...,...
1723,0,3,2,1,1,2
1724,0,3,2,1,2,3
1725,0,3,2,2,0,0
1726,0,3,2,2,1,2


In [30]:
from sklearn.linear_model import Ridge

pipe = make_pipeline(Ridge())

X_train, X_Test, y_train, y_test = train_test_split(X_car, Y_car, test_size=0.1, random_state=123)
scores = cross_validate(pipe, X_train, y_train, return_train_score=True)
pd.DataFrame(scores)



Unnamed: 0,fit_time,score_time,test_score,train_score
0,0.002,0.001001,0.12925,0.126411
1,0.002,0.001,0.095533,0.135324
2,0.001,0.0,0.121123,0.128422
3,0.001,0.0,0.138223,0.122452
4,0.000999,0.001,0.113972,0.130483


In [33]:
pipe_ridge = make_pipeline(Ridge(alpha=1.0))
pipe_ridge.fit(X_train, y_train)
coeffs = pipe_ridge.named_steps["ridge"].coef_

coeffs

array([-0.10089122,  0.03568669,  0.20685384,  0.09017944,  0.27520731,
       -0.69466325])

In [37]:
feats = ["maint", "doors", "persons", "lug_boot", "safety", "class"]
pd.DataFrame(data=coeffs, index=feats, columns=["Coefficients"])

Unnamed: 0,Coefficients
maint,-0.100891
doors,0.035687
persons,0.206854
lug_boot,0.090179
safety,0.275207
class,-0.694663


### Summary

### Introduction

When making large purchases, it is incredibly important to consider all of the factors that go into the pricing and decision to commit to buying. With a rise in car prices$^{1}$, this is an essential consideration. There are several factors that influence the price of a car, for example brand, size, safety, etc. The overall safety of a car increases the cost, because of individual features and equipment, including backup cameras, video displays and automatic emergency braking$^{2}$. When purchasing a vehicle, you are not only investing in the price of the car itself, but the cost of insurance, which is also influenced by a number of features. On average, the insurance cost for a 4-door car is cheaper than a 2-door, because of the target buyers, sticker and repair costs, etc$^{3}$. As 2-door cars are typically sports and luxury cars, the materials and technology used to manufacture them will likely be luxury as well$^{3}$. Following the same line of reasoning, the maintenance of a luxury car is expected to be more expensive$^{4}$. The maintenance cost is likely to be a large influential factor in the price, because it can account for above 30% of the total cost$^{5}$.


We are interested in determining the relationship between the buying price of a car and its maintenance cost, number of doors, capacity, safety and the size of the luggage boot (trunk). Therefore, the question we will be exploring in this analysis is: do these factors influence the cost of buying a car, and how? To study these relationships, we will be using the Car Evaluation dataset obtained from UC Irvine Machine Learning Repository$^{6}$. This dataset contains information about all of the explanatory variables for our analysis and buying price, represented as categorical values. Based on our initial research, we expect that maintenance and safety will be highly correlated with the buying price. 

### Methods & Results

### References 

1 Capparella, J. (2021, November 29). New car prices are skyrocketing this spring. Car and 
Driver. Retrieved February 19, 2022, from https://www.caranddriver.com/news/a36342329/new-cars-expensive-prices/ 

2 Henry, J. (2021, November 11). Average new car price tops $45,000, used car price over 
$25,000. Forbes. Retrieved February 19, 2022, from https://www.forbes.com/wheels/news/new-car-price-tops-45000/ 

3 Vallet, M. (2021, February 25). Insurance for 2-door car versus 4-door car. Carinsurance.com. 
Retrieved February 19, 2022, from https://www.carinsurance.com/2-door-vs-4-door.aspx 

4 Ltd., T. I. (2020, June 27). How much does car maintenance cost? Car Maintenance Cost | 
What Is The Average Car Maintenance Cost? Retrieved February 19, 2022, from https://www.thinkinsure.ca/insurance-help-centre/car-maintenance-cost.html 
5 Vehicle maintenance: Cost relationship and estimating ... (n.d.). Retrieved February 19, 2022, 
from https://onlinepubs.trb.org/Onlinepubs/trr/1987/1140/1140-001.pdf 

6 Car Evaluation. (1997). UCI Machine Learning Repository.