In [2]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_moons

X, y = make_moons(n_samples=500, noise =0.3, random_state=46)
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=46)

In [3]:
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression

log_clf= LogisticRegression(random_state=46)
rnd_clf= RandomForestClassifier(random_state=46)
svm_clf= SVC(random_state=46, probability=True)

voting_clf = VotingClassifier(
    estimators = [('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf)],
    voting = 'hard'
)

In [4]:
voting_clf.fit(X_train, y_train)

In [5]:
from sklearn.metrics import accuracy_score

for clf in (log_clf, svm_clf, rnd_clf, voting_clf):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

LogisticRegression 0.808
SVC 0.864
RandomForestClassifier 0.896
VotingClassifier 0.88


In [7]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

#if the bootstrap is false, it is bagging. If it is true, it is pasting. Bootstrap is whether samples are drawn with replacement. If False, sampling without replacement is performed.
bag_clf = BaggingClassifier(DecisionTreeClassifier, n_estimators=500, max_samples=100, n_jobs=-1, random_state=42)

bag_clf.fit(X_train, y_train)

y_pred = bag_clf.predict(X_test)

TypeError: Cannot clone object. You should provide an instance of scikit-learn estimator instead of a class.

In [8]:
print(accuracy_score(y_test, y_pred))

0.88


In [9]:
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier

iris = load_iris()
rnd_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16, random_state=42)
rnd_clf.fit(iris['data'], iris['target'])

for name, score in zip(iris['feature_names'], rnd_clf.feature_importances_):
    print(name, score)

sepal length (cm) 0.11369993712070751
sepal width (cm) 0.02427655674840012
petal length (cm) 0.442472844942238
petal width (cm) 0.4195506611886544


In [10]:
from sklearn.ensemble import AdaBoostClassifier

#we use learning_rate in the AdaBoost Classifier to say how much quicker you want to learn. If learning rate is really high, each model will be really different. if it is too low, it won't be different enough. 
ada_clf = AdaBoostClassifier(
    DecisionTreeClassifier(max_depth=1), n_estimators=200, 
    learning_rate=0.5, random_state=42, algorithm='SAMME.R'
)

ada_clf.fit(X_train, y_train)

y_pred_adaboost = ada_clf.predict(X_test)



In [None]:
print(accuracy_score(y_test, y_pred_adaboost))

In [20]:
import numpy as np
X = np.random.rand(1000, 1) - 0.5
y = 3* X[:, 0]**2 + 0.05 * np.random.randn(1000)

In [21]:
from sklearn.ensemble import GradientBoostingRegressor

#I have very little number of estimators. If my learning rate is low, I am not going to learn a lot. It is important to take this into account when deciding those attributes of the regressor
gbrt = GradientBoostingRegressor(max_depth=5, n_estimators=10,learning_rate= 0.1, random_state=42)
gbrt.fit(X, y)

In [22]:
X_new = np.array([[0.08]])

gbrt.predict(X_new)

array([0.11206429])