In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier, RandomForestRegressor
from sklearn.metrics import accuracy_score

# Load Data


In [3]:
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/car/car.data', names=['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'accep'])

# One Hot Encode Features and Create Binary Target

In [4]:
df['accep'] = ~(df['accep']=='unacc') #1 is acceptable, 0 if not acceptable
X = pd.get_dummies(df.iloc[:,0:6], drop_first=True)
y = df['accep']

# Split Data

In [5]:
x_train, x_test, y_train, y_test = train_test_split(X,y, random_state=0, test_size=0.25)

# Build Model

In [6]:
# 1. Bagging classifier with 10 Decision Tree base estimators
bag_dt = BaggingClassifier(DecisionTreeClassifier(max_depth=5), n_estimators=10)
bag_dt.fit(x_train, y_train)
bag_dt.predict(x_test)
bag_accuracy = bag_dt.score(x_test, y_test)
print('Accuracy score of Bagged Classifier, 10 estimators:')
print(bag_accuracy)

Accuracy score of Bagged Classifier, 10 estimators:
0.9189814814814815


# Tune Model with Max 10 Features

In [7]:
# 2.Set `max_features` to 10.
bag_dt_10 = BaggingClassifier(DecisionTreeClassifier(max_depth=5), n_estimators=10, max_features=10)
bag_dt_10.fit(x_train, y_train)

bag_accuracy_10 = bag_dt_10.score(x_test, y_test)
print('Accuracy score of Bagged Classifier, 10 estimators, 10 max features:')
print(bag_accuracy_10)

Accuracy score of Bagged Classifier, 10 estimators, 10 max features:
0.8726851851851852


# Tune Model With Logistic Regression

In [8]:
# 3. Change base estimator to Logistic Regression
from sklearn.linear_model import LogisticRegression
bag_lr = BaggingClassifier(LogisticRegression(), n_estimators=10, max_features=10)
bag_lr.fit(x_train, y_train)

print('Accuracy score of Logistic Regression, 10 estimators, 10 max features:')
bag_accuracy_lr= bag_lr.score(x_test, y_test)
print(bag_accuracy_lr)

Accuracy score of Logistic Regression, 10 estimators, 10 max features:
0.9143518518518519
