In [60]:
### import libraries required in our project
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [61]:
# read data in csv format and load it into dataframe
car_eval_df = pd.read_csv('car.data',header=None)
car_eval_df.head()

Unnamed: 0,0,1,2,3,4,5,6
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc


In [62]:
#create header for the dataframe
header = ['buying','maint','doors','persons','lug_boot','safety','class']

#set header to our dataframe
car_eval_df.columns = header
car_eval_df.head()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc


In [63]:
# describtion of our dataset 
car_eval_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1728 entries, 0 to 1727
Data columns (total 7 columns):
buying      1728 non-null object
maint       1728 non-null object
doors       1728 non-null object
persons     1728 non-null object
lug_boot    1728 non-null object
safety      1728 non-null object
class       1728 non-null object
dtypes: object(7)
memory usage: 94.6+ KB


In [64]:
# from basic information of info() function we know
# each feature is an object(categorical feature) and no missing values
# so we well count each feature to know how many unique values in each feature
for i in car_eval_df.columns.values.tolist():
    print(i)
    print(car_eval_df[i].value_counts())

buying
med      432
low      432
high     432
vhigh    432
Name: buying, dtype: int64
maint
med      432
low      432
high     432
vhigh    432
Name: maint, dtype: int64
doors
2        432
3        432
4        432
5more    432
Name: doors, dtype: int64
persons
2       576
4       576
more    576
Name: persons, dtype: int64
lug_boot
small    576
med      576
big      576
Name: lug_boot, dtype: int64
safety
med     576
low     576
high    576
Name: safety, dtype: int64
class
unacc    1210
acc       384
good       69
vgood      65
Name: class, dtype: int64


In [65]:
# well, each feature contain three unique values
# we will make indicator variable for "buying" feature
buying_dummies = pd.get_dummies(car_eval_df['buying'])

# rename dummies to different name because there is a simlirities 
buying_dummies.rename(columns={'low':'buying_low','med':'buying_med','high':'buying_high'},inplace=True)

In [66]:
# we will make indicator variable for "maint" feature
maint_dummies = pd.get_dummies(car_eval_df['maint'])

# rename dummies to different name because there is a simlirities 
maint_dummies.rename(columns={'low':'maint_low','med':'maint_med','high':'maint_high','vhigh':'maint_vhigh'},inplace=True)


In [67]:
# we will make indicator variable for "doors" feature
doors_dummies = pd.get_dummies(car_eval_df['doors'])

# rename dummies to different name because there is a simlirities 
doors_dummies.rename({'2':'two_doors','3':'three_doors','4':'four_doors','5more':'more_five_doors'},inplace=True)

In [68]:
# we will make indicator variable for "persons" feature
persons_dummies = pd.get_dummies(car_eval_df['persons'])

# rename dummies to different name because there is a simlirities 
persons_dummies.rename(columns={'2':'two_persons','4':'four_persons','more':'more_persons'},inplace=True)

In [69]:
# we will make indicator varaible for "lug_boot" feature
lug_boot_dummies = pd.get_dummies(car_eval_df['lug_boot'])

# rename dummies to different name because there is a simlirities 
lug_boot_dummies.rename(columns={'small':'small_lug_boot','med':'med_lug_boot','big':'big_lug_boot'},inplace=True)

In [70]:
# we will make indicator varaible for "safety" feature
safety_dummies = pd.get_dummies(car_eval_df['safety'])

# rename dummies to different name because there is a simlirities
safety_dummies.rename(columns={'low':'safety_low','med':'safety_med','high':'safety_high'},inplace=True)

In [71]:
# we will make indicator variable for "class" feature
class_dummies =pd.get_dummies(car_eval_df['class'])

# rename dummies to different name because there is a simlirities
class_dummies.rename(columns={'unacc':'unacc_class','acc':'acc_class','good':'good_class','vgood':'vgood_class'}
                     ,inplace=True)

In [72]:
# append all the dummies variables in "car_eval_df"
# list of all dummies objects
dummies_obj_list = [buying_dummies,maint_dummies,doors_dummies
                    ,persons_dummies,lug_boot_dummies,safety_dummies,class_dummies]
# append each dummy object from the list into "car_eval_df"
for dummy in dummies_obj_list:
    car_eval_df = pd.concat([car_eval_df,dummy],axis=1)
#check our "car_eval_df" 
car_eval_df.head()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class,buying_high,buying_low,buying_med,...,big_lug_boot,med_lug_boot,small_lug_boot,safety_high,safety_low,safety_med,acc_class,good_class,unacc_class,vgood_class
0,vhigh,vhigh,2,2,small,low,unacc,0,0,0,...,0,0,1,0,1,0,0,0,1,0
1,vhigh,vhigh,2,2,small,med,unacc,0,0,0,...,0,0,1,0,0,1,0,0,1,0
2,vhigh,vhigh,2,2,small,high,unacc,0,0,0,...,0,0,1,1,0,0,0,0,1,0
3,vhigh,vhigh,2,2,med,low,unacc,0,0,0,...,0,1,0,0,1,0,0,0,1,0
4,vhigh,vhigh,2,2,med,med,unacc,0,0,0,...,0,1,0,0,0,1,0,0,1,0


In [73]:
# save our new dataset in csv format
car_eval_df.to_csv('car_eval.csv',index=False)