<a href="https://colab.research.google.com/github/SelimOzn/ml/blob/main/EnsembleLearning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from sklearn.datasets import make_moons
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

In [None]:
X,y = make_moons(n_samples=500, noise=0.30, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)


In [None]:
voting_clf=VotingClassifier(estimators=[
 ('lr', LogisticRegression(random_state=42)),
 ('rf', RandomForestClassifier(random_state=42)),
 ('svc', SVC(random_state=42))

])
voting_clf.fit(X_train, y_train)
voting_clf.score(X_test, y_test)

0.912

In [None]:
for name,clf in voting_clf.named_estimators_.items():  #named_estimators hali fit olmamış modelleri içerir.
  print(name, clf.score(X_test, y_test))

lr 0.864
rf 0.896
svc 0.896


In [None]:
voting_clf.predict(X_test[:1])

array([1])

In [None]:
[clf.predict(X_test[:1]) for clf in voting_clf.estimators_] #Named olmadan direkt estimatorları döndürür. Named isimlerini de döndürür.

[array([1]), array([1]), array([0])]

In [None]:
voting_clf.voting="soft"
voting_clf.named_estimators["svc"].probability=True
voting_clf.fit(X_train, y_train)
voting_clf.score(X_test, y_test)

0.92

In [None]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

In [None]:
bag_clf = BaggingClassifier(DecisionTreeClassifier(), n_estimators=500, max_samples = 100, #500=Tree sayısı, 100=Subsetin instances sayısı,
                                                                                              #n_jobs=kullanılacak işlemci sayısı,-1 hepsi
                            n_jobs=-1, random_state=42)


In [None]:
bag_clf.fit(X_train, y_train)

In [None]:
bag_clf = BaggingClassifier(DecisionTreeClassifier(), n_estimators=500, oob_score=True, n_jobs=-1, random_state=42) #Test sette 0.896lık accuracy, OOB'ye göre
bag_clf.fit(X_train, y_train)
bag_clf.oob_score_

0.896

In [None]:
from sklearn.metrics import accuracy_score
y_pred = bag_clf.predict(X_test)
accuracy_score(y_test, y_pred)


0.92

In [None]:
bag_clf.oob_decision_function_[:3] #Her instance için OOB decision func. Her training instance için class proba verir.

array([[0.32352941, 0.67647059],
       [0.3375    , 0.6625    ],
       [1.        , 0.        ]])

In [None]:
from sklearn.ensemble import RandomForestClassifier
rnd_clf = RandomForestClassifier(random_state=42, n_jobs=-1, max_leaf_nodes=16, n_estimators=500)

In [None]:
rnd_clf.fit(X_train,y_train)

In [None]:
y_pred = rnd_clf.predict(X_test)

In [None]:
bag_clf = BaggingClassifier(DecisionTreeClassifier(max_features="sqrt", max_leaf_nodes=16), n_estimators=500, random_state=42)
#max_feature diyince random feature subset oluşturur. Split yaparken o featurelar arasından en iyisine göre split kontrolünü yapar.

In [None]:
from sklearn.datasets import load_iris
iris = load_iris(as_frame=True)
rnd_clf = RandomForestClassifier(n_estimators=500, random_state=42)
rnd_clf.fit(iris.data, iris.target)

In [None]:
for score, name in zip(rnd_clf.feature_importances_, iris.data.columns):
  print(round(score,2), name)

0.11 sepal length (cm)
0.02 sepal width (cm)
0.44 petal length (cm)
0.42 petal width (cm)


In [None]:
import numpy as np
from sklearn.tree import DecisionTreeRegressor

In [None]:
np.random.seed(42)
X = np.random.rand(100,1) - 0.5
y = 3*X[:,0] ** 2 + 0.05 * np.random.rand(100) #y=3x^2 + Gaussian Noise

In [None]:
tree_reg = DecisionTreeRegressor(max_depth=2, random_state=42)
tree_reg.fit(X,y)

In [None]:
y2 = y - tree_reg.predict(X)
tree_reg2 = DecisionTreeRegressor(max_depth=2, random_state=42)
tree_reg2.fit(X,y2)

In [None]:
y3 = y2-tree_reg2.predict(X)

In [None]:
tree_reg3 = DecisionTreeRegressor(max_depth=2, random_state=42)
tree_reg3.fit(X,y3)

In [None]:
X_new = np.array([[-0.4], [0.], [0.5]])
sum(tree.predict(X_new) for tree in (tree_reg, tree_reg2, tree_reg3))


array([0.50199918, 0.08055604, 0.74072131])

In [None]:
from sklearn.ensemble import GradientBoostingRegressor
gbrt = GradientBoostingRegressor(max_depth=2, n_estimators=3, learning_rate=1.0, random_state=42)
gbrt.fit(X,y)

In [None]:
gbrt_best = GradientBoostingRegressor(max_depth=2, learning_rate=0.05, n_estimators=500, n_iter_no_change=10, random_state=42)

In [None]:
gbrt_best.fit(X,y)

In [None]:
gbrt_best.n_estimators_

86

In [None]:
from sklearn.pipeline import make_pipeline
from sklearn.compose import make_column_transformer
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.preprocessing import OrdinalEncoder

hgb_reg = make_pipeline(make_column_transformer((OrdinalEncoder(), ["ocean_proximity"]),
                                              remainder="passthrough"), HistGradientBoostingRegressor(categorical_features=[0], random_state=42))

#categorical_features = kategorik columnun indeces olmalı

In [None]:
from sklearn.ensemble import StackingClassifier
stacking_clf = StackingClassifier(estimators = [("lr", LogisticRegression(random_state=42)),
("rf", RandomForestClassifier(random_state=42)),
("svc", SVC(probability=True, random_state=42))
],
                                  final_estimator = RandomForestClassifier(random_state=43),  #final_estimator tanımlanmazsa StackingClassifier log.reg; Regressor RidgeCV use.
                                  cv = 5)


In [None]:
stacking_clf.fit(X_train, y_train)