# Voting Classifiers


In [1]:
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split

In [2]:
X , y = make_moons(n_samples= 1000 , noise = 0.15)

In [3]:
X_train , X_test , y_train , y_test  = train_test_split(X , y , random_state = 42 , test_size = 0.2)

In [4]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

In [5]:
log_clf = LogisticRegression()
rnd_clf = RandomForestClassifier()
svm_clf = SVC() #probabilty = True for soft 

In [6]:
voting_clf = VotingClassifier(
    estimators=[('lr' , log_clf) , ('rf' , rnd_clf) , ('svc' , svm_clf)],
    voting = 'hard'
)
voting_clf.fit(X_train , y_train)

VotingClassifier(estimators=[('lr', LogisticRegression()),
                             ('rf', RandomForestClassifier()), ('svc', SVC())])

In [7]:
from sklearn.metrics import accuracy_score

In [8]:
for clf in(log_clf , rnd_clf , voting_clf):
    clf.fit(X_train , y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__ , accuracy_score(y_test , y_pred))

LogisticRegression 0.885
RandomForestClassifier 0.98
VotingClassifier 0.985


# Bagging and Pasting 

In [9]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

In [10]:
bag_clf = BaggingClassifier(
    DecisionTreeClassifier() , n_estimators=500 , 
    max_samples = 100 , bootstrap= True ,  n_jobs=-1
)
bag_clf.fit(X_train ,y_train)

BaggingClassifier(base_estimator=DecisionTreeClassifier(), max_samples=100,
                  n_estimators=500, n_jobs=-1)

In [11]:
y_pred = bag_clf.predict(X_test)

# Out of Bag Evaluation (OOB)

In [12]:
bag_clf = BaggingClassifier(
    DecisionTreeClassifier()  , n_estimators= 500,
    bootstrap= True , n_jobs= -1 , oob_score= True
)

bag_clf.fit(X_train , y_train)

BaggingClassifier(base_estimator=DecisionTreeClassifier(), n_estimators=500,
                  n_jobs=-1, oob_score=True)

In [13]:
bag_clf.oob_score_

0.97875

In [14]:
bag_clf.oob_decision_function_

array([[0.98930481, 0.01069519],
       [0.99435028, 0.00564972],
       [0.06547619, 0.93452381],
       ...,
       [1.        , 0.        ],
       [1.        , 0.        ],
       [0.96571429, 0.03428571]])

# Radom Forest 


In [15]:
from sklearn.ensemble import RandomForestClassifier

In [16]:
rnd_clf = RandomForestClassifier(n_estimators= 500 , max_leaf_nodes= 16 , n_jobs= -1)

In [17]:
rnd_clf.fit(X_train , y_train)

RandomForestClassifier(max_leaf_nodes=16, n_estimators=500, n_jobs=-1)

# Calculating Importance

In [19]:
from sklearn.datasets import load_iris
iris = load_iris()

In [20]:
rnd_clf  = RandomForestClassifier(n_estimators=500 , n_jobs= -1)
rnd_clf.fit(iris['data'] , iris['target'])

RandomForestClassifier(n_estimators=500, n_jobs=-1)

In [22]:
for name, score in zip(iris['feature_names'] , rnd_clf.feature_importances_):
    print(name , score)

sepal length (cm) 0.10342515760222575
sepal width (cm) 0.02358219089721813
petal length (cm) 0.4369692885207142
petal width (cm) 0.43602336297984184


# Adaboost

In [23]:
from sklearn.ensemble import AdaBoostClassifier

In [24]:
ada_clf = AdaBoostClassifier(
    DecisionTreeClassifier(max_depth= 1) , n_estimators=200,
    algorithm='SAMME.R' , learning_rate= 0.5)
ada_clf.fit(X_train , y_train)

AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=1),
                   learning_rate=0.5, n_estimators=200)