# Chapter 7: Ensemble Learning and Random Forests

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn

sklearn.__version__

'1.5.1'

In [2]:
import matplotlib.pyplot as plt

plt.rc('font', size=14)
plt.rc('axes', labelsize=14, titlesize=14)
plt.rc('legend', fontsize=14)
plt.rc('xtick', labelsize=10)
plt.rc('ytick', labelsize=10)

## Voting Classifiers

In [3]:
from sklearn.datasets import make_moons
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC


X, y = make_moons(n_samples=500, noise=0.30, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

voting_clf = VotingClassifier(
	estimators=[

		('lr', LogisticRegression(random_state=42)),
		('rf', RandomForestClassifier(random_state=42)),
		('svc', SVC(random_state=42))
		]
	)

voting_clf.fit(X_train, y_train)

In [4]:
for name, estimator in voting_clf.named_estimators_.items():
	print(name, estimator.score(X_test, y_test))

lr 0.864
rf 0.896
svc 0.896


In [5]:
voting_clf.predict(X_test[:1])

array([1], dtype=int64)

In [6]:
for name, estimator in voting_clf.named_estimators_.items():
	print(name, estimator.predict(X_test[:1]))

lr [1]
rf [1]
svc [0]


In [11]:
print("Individual classifiers:")
print("-"*50)
for name, estimator in voting_clf.named_estimators_.items():
	print("\t", name, estimator.score(X_test, y_test))

print("\nVoting classifier:")
print("-"*50)
print("\t", voting_clf.score(X_test, y_test))

Individual classifiers:
--------------------------------------------------
	 lr 0.864
	 rf 0.896
	 svc 0.896

Voting classifier:
--------------------------------------------------
	 0.912


In [13]:
voting_clf.voting = "soft"
voting_clf.named_estimators["svc"].probability = True
voting_clf.fit(X_train, y_train)
voting_clf.score(X_test, y_test)

0.92

## Bagging and Pasting

In [23]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

bagg_clf_boots = BaggingClassifier(
	DecisionTreeClassifier(random_state=47), n_estimators=500,
	max_samples=100, bootstrap=True, random_state=47, n_jobs=-1,
	oob_score=True
	)

bagg_clf_boots.fit(X_train, y_train)
bagg_clf_boots.oob_score_, bagg_clf_boots.score(X_test, y_test)

(0.9253333333333333, 0.912)

In [24]:
bagg_clf_paste = BaggingClassifier(
	DecisionTreeClassifier(random_state=47), n_estimators=500,
	max_samples=100, bootstrap=False, random_state=47, n_jobs=-1
	)

bagg_clf_paste.fit(X_train, y_train)
bagg_clf_paste.score(X_test, y_test)

0.912

In [26]:
bagg_clf_boots.oob_decision_function_[:3]

array([[0.31472081, 0.68527919],
       [0.41237113, 0.58762887],
       [1.        , 0.        ]])