In [52]:
import pandas as pd

In [53]:
df = pd.read_csv("car.data")

In [54]:
df.columns = ["buying","maintain","doors","persons","lug_boot","safety" , 'class']

In [55]:
df.head()

Unnamed: 0,buying,maintain,doors,persons,lug_boot,safety,class
0,vhigh,vhigh,2,2,small,med,unacc
1,vhigh,vhigh,2,2,small,high,unacc
2,vhigh,vhigh,2,2,med,low,unacc
3,vhigh,vhigh,2,2,med,med,unacc
4,vhigh,vhigh,2,2,med,high,unacc


In [56]:
df['class'].unique()

array(['unacc', 'acc', 'vgood', 'good'], dtype=object)

In [57]:
df['buying'].unique()

array(['vhigh', 'high', 'med', 'low'], dtype=object)

In [58]:
df['maintain'].unique()

array(['vhigh', 'high', 'med', 'low'], dtype=object)

In [59]:
df['lug_boot'].unique()

array(['small', 'med', 'big'], dtype=object)

In [60]:
df['safety'].unique()

array(['med', 'high', 'low'], dtype=object)

In [61]:

from sklearn.preprocessing import OrdinalEncoder

buying_order = ['low', 'med', 'high', 'vhigh']
maintain_order = ['low', 'med', 'high', 'vhigh']

enc = OrdinalEncoder(categories=[buying_order, maintain_order],
                    handle_unknown='use_encoded_value', unknown_value=-1)
df[['buying', 'maintain']] = enc.fit_transform(df[['buying', 'maintain']])


In [62]:

class_order = ["unacc","acc","good","vgood"]
enc_class = OrdinalEncoder(categories=[class_order],
                    handle_unknown='use_encoded_value', unknown_value=-1)
df[['class']] = enc_class.fit_transform(df[['class']])


In [63]:
df.head(1)

Unnamed: 0,buying,maintain,doors,persons,lug_boot,safety,class
0,3.0,3.0,2,2,small,med,0.0


In [64]:

lug_boot_order = ['small', 'med', 'big']
enc_lug_boot = OrdinalEncoder(categories=[lug_boot_order],
                    handle_unknown='use_encoded_value', unknown_value=-1)
df[['lug_boot']] = enc_lug_boot.fit_transform(df[['lug_boot']])

df.head(1)


Unnamed: 0,buying,maintain,doors,persons,lug_boot,safety,class
0,3.0,3.0,2,2,0.0,med,0.0


In [65]:

safety_order = ['low', 'med', 'high']
enc_safety = OrdinalEncoder(categories=[safety_order],
                    handle_unknown='use_encoded_value', unknown_value=-1)
df[['safety']] = enc_safety.fit_transform(df[['safety']])

df.head(1)


Unnamed: 0,buying,maintain,doors,persons,lug_boot,safety,class
0,3.0,3.0,2,2,0.0,1.0,0.0


In [66]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1727 entries, 0 to 1726
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   buying    1727 non-null   float64
 1   maintain  1727 non-null   float64
 2   doors     1727 non-null   object 
 3   persons   1727 non-null   object 
 4   lug_boot  1727 non-null   float64
 5   safety    1727 non-null   float64
 6   class     1727 non-null   float64
dtypes: float64(5), object(2)
memory usage: 94.6+ KB


In [67]:

import pandas as pd
from sklearn.preprocessing import OrdinalEncoder
df['doors'] = df['doors'].replace('5more', 5)
df['doors'] = pd.to_numeric(df['doors'])
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1727 entries, 0 to 1726
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   buying    1727 non-null   float64
 1   maintain  1727 non-null   float64
 2   doors     1727 non-null   int64  
 3   persons   1727 non-null   object 
 4   lug_boot  1727 non-null   float64
 5   safety    1727 non-null   float64
 6   class     1727 non-null   float64
dtypes: float64(5), int64(1), object(1)
memory usage: 94.6+ KB


In [68]:
df['persons'].unique()

array(['2', '4', 'more'], dtype=object)

In [69]:

persons_order = ['2', '4', 'more']
enc_persons = OrdinalEncoder(categories=[persons_order],
                             handle_unknown='use_encoded_value', unknown_value=-1)
df[['persons']] = enc_persons.fit_transform(df[['persons']])

df.head(1)


Unnamed: 0,buying,maintain,doors,persons,lug_boot,safety,class
0,3.0,3.0,2,0.0,0.0,1.0,0.0


In [70]:
x = df.drop('class',axis=1)
y = df['class']
print(x.shape)
print(y.shape)

In [71]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2 , random_state=42)
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

(1381, 6)
(346, 6)
(1381,)
(346,)


In [72]:

import pandas as pd
from sklearn.preprocessing import OrdinalEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score



models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "Support Vector Machine": SVC(),
    "K-Nearest Neighbors": KNeighborsClassifier(),
    "Naive Bayes": GaussianNB()
}

results = {}
for model_name, model in models.items():
  model.fit(x_train, y_train)
  y_pred = model.predict(x_test)
  accuracy = accuracy_score(y_test, y_pred)
  results[model_name] = accuracy

for model_name, accuracy in results.items():
  print(f"{model_name}: Accuracy = {accuracy}")


Logistic Regression: Accuracy = 0.8323699421965318
Decision Tree: Accuracy = 0.9653179190751445
Random Forest: Accuracy = 0.9710982658959537
Support Vector Machine: Accuracy = 0.9421965317919075
K-Nearest Neighbors: Accuracy = 0.9277456647398844
Naive Bayes: Accuracy = 0.6878612716763006


In [73]:

import pandas as pd
from sklearn.preprocessing import OrdinalEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score



rf1 = RandomForestClassifier(n_estimators=50, max_depth=10, random_state=42)
rf2 = RandomForestClassifier(n_estimators=100, max_depth=15, random_state=43)
rf3 = RandomForestClassifier(n_estimators=150, max_depth=20, random_state=44)
rf4 = RandomForestClassifier(n_estimators=200, max_depth=25, random_state=45)
rf5 = RandomForestClassifier(n_estimators=250, max_depth=30, random_state=46)

voting_clf = VotingClassifier(estimators=[
    ('rf1', rf1), ('rf2', rf2), ('rf3', rf3), ('rf4', rf4), ('rf5', rf5)],
    voting='hard')


voting_clf.fit(x_train, y_train)
y_pred = voting_clf.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Voting Classifier Accuracy: {accuracy}")



Voting Classifier Accuracy: 0.9739884393063584
