## Voting Classifier

In [1]:
from sklearn.datasets import make_moons
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

In [2]:
X_train, y_train = make_moons(n_samples=300)

In [3]:
rf_clf = RandomForestClassifier()
lgr_clf = LogisticRegression()
svm_clf = SVC()

In [4]:
voting_clf = VotingClassifier(estimators=[('rf', rf_clf),('lgr', lgr_clf),('svm', svm_clf)], voting='hard')
voting_clf.fit(X_train[:80], y_train[:80])

In [5]:
from sklearn.metrics import accuracy_score

In [6]:
for clf in (lgr_clf, rf_clf, svm_clf, voting_clf):
    clf.fit(X_train[:80],y_train[:80])
    y_predict = clf.predict(X_train[80:])
    print(clf.__class__.__name__, accuracy_score(y_train[80:], y_predict))

LogisticRegression 0.8681818181818182
RandomForestClassifier 0.9954545454545455
SVC 1.0
VotingClassifier 0.9954545454545455


## Bagging and Pasting

In [7]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

### Here I am using Bagging. If We using Pasting just set "bootstrap = False".

In [8]:
Bag_clf = BaggingClassifier(DecisionTreeClassifier(), n_estimators=500, bootstrap=True, max_samples=100, n_jobs=-1)

In [9]:
Bag_clf.fit(X_train[:240],y_train[:240])
y_pred = Bag_clf.predict(X_train[240:])

## problem of Out Of Bag (OOB).

In [10]:
Bag_clf = BaggingClassifier(DecisionTreeClassifier(), n_estimators=500, bootstrap=True, max_samples=100, n_jobs=-1, 
                            oob_score=True)

In [11]:
Bag_clf.fit(X_train[:240], y_train[:240])
Bag_clf.oob_score_

0.9833333333333333

In [12]:
y_pred = Bag_clf.predict(X_train[240:])
accuracy_score(y_pred, y_train[240:])

0.95

In [13]:
Bag_clf.oob_decision_function_

array([[0.        , 1.        ],
       [0.        , 1.        ],
       [0.6102719 , 0.3897281 ],
       [0.00291545, 0.99708455],
       [0.16138329, 0.83861671],
       [0.23030303, 0.76969697],
       [1.        , 0.        ],
       [0.        , 1.        ],
       [0.12912913, 0.87087087],
       [0.        , 1.        ],
       [0.04294479, 0.95705521],
       [0.        , 1.        ],
       [0.07911392, 0.92088608],
       [1.        , 0.        ],
       [0.7654321 , 0.2345679 ],
       [0.003125  , 0.996875  ],
       [0.99393939, 0.00606061],
       [0.        , 1.        ],
       [0.94189602, 0.05810398],
       [0.03254438, 0.96745562],
       [0.        , 1.        ],
       [0.93968254, 0.06031746],
       [0.98798799, 0.01201201],
       [0.        , 1.        ],
       [0.02034884, 0.97965116],
       [0.06790123, 0.93209877],
       [0.        , 1.        ],
       [1.        , 0.        ],
       [0.07763975, 0.92236025],
       [0.86792453, 0.13207547],
       [0.

# Random Forest

In [14]:
from sklearn.ensemble import RandomForestClassifier

In [15]:
rnd_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16, n_jobs=-1)
rnd_clf.fit(X_train[:240], y_train[:240])

In [16]:
y_pred_rnd = rnd_clf.predict(X_train[240:])

### Random forest search for best features among a random subset of features.

we can also exicute the random forest code as.

In [17]:
bag_clf = BaggingClassifier(DecisionTreeClassifier(splitter='random', max_leaf_nodes=16), n_estimators=500, max_samples=1.0, 
                            bootstrap=True, n_jobs=-1)

In [18]:
from sklearn.datasets import load_iris

In [19]:
iris = load_iris()
rnd_clf = RandomForestClassifier(n_estimators=500, n_jobs=-1)
rnd_clf.fit(iris["data"], iris["target"])

In [20]:
for name, score in zip(iris["feature_names"], rnd_clf.feature_importances_):
    print(name, score)

sepal length (cm) 0.10024751335290057
sepal width (cm) 0.02521268392191399
petal length (cm) 0.41520973985942555
petal width (cm) 0.4593300628657599


# Boosting Techniques.

## Adaboost (Adaptive Boosting)

In [21]:
from sklearn.ensemble import AdaBoostClassifier

In [22]:
ada_clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), n_estimators=200, algorithm='SAMME.R', learning_rate=0.5)
ada_clf.fit(X_train[:240],y_train[:240])

## Gredient Boosting

### Manually.

In [24]:
from sklearn.tree import DecisionTreeRegressor

In [25]:
tree_reg1 = DecisionTreeRegressor(max_depth=2)
tree_reg1.fit(X_train[:240], y_train[:240])

In [29]:
y2 = y_train[:240] - tree_reg1.predict(X_train[:240])
tree_reg2 = DecisionTreeRegressor(max_depth=2)
tree_reg2.fit(X_train[:240], y2)

In [31]:
y3 = y2 - tree_reg2.predict(X_train[:240])
tree_reg3 = DecisionTreeRegressor(max_depth=2)
tree_reg3.fit(X_train[:240], y3)

In [32]:
y_pred = sum(tree.predict(X_train[240:]) for tree in (tree_reg1, tree_reg2, tree_reg3))

## Using sklearn library

In [33]:
from sklearn.ensemble import GradientBoostingRegressor

In [34]:
grb = GradientBoostingRegressor(max_depth=2, n_estimators=3, learning_rate=1.0)
grb.fit(X_train[:240],y_train[:240])

## Early stoping in Gradient Boosting.

In [35]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [36]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train)

In [37]:
gbrt = GradientBoostingRegressor(max_depth=2, n_estimators=120)
gbrt.fit(X_train, y_train)

In [38]:
errors = [mean_squared_error(y_val, y_pred)
 for y_pred in gbrt.staged_predict(X_val)]

In [39]:
bst_n_esti = np.argmin(errors)

In [41]:
gbrt_best = GradientBoostingRegressor(max_depth=2,n_estimators=bst_n_esti)
gbrt_best.fit(X_train, y_train)

### efficient way to use Early stopping.

In [42]:
gbrt = GradientBoostingRegressor(max_depth=2, warm_start=True) #warm_start=True, which makes ScikitLear keep existing trees when the fit() method is called, allowing incremental training.

In [43]:
min_val_error = float("inf")
error_going_up = 0
for n_estimators in range(1, 120):
    gbrt.n_estimators = n_estimators
    gbrt.fit(X_train, y_train)
    y_pred = gbrt.predict(X_val)
    val_error = mean_squared_error(y_val, y_pred)
    if val_error < min_val_error:
        min_val_error = val_error
        error_going_up = 0
    else :
        error_going_up += 1
        if error_going_up == 5:
            break # early stopping

## StreamGredient Boosting.(XGBOOST)

In [47]:
! pip install xgboost
import xgboost

Collecting xgboost
  Downloading xgboost-1.6.1-py3-none-win_amd64.whl (125.4 MB)
     -------------------------------------- 125.4/125.4 MB 1.4 MB/s eta 0:00:00
Installing collected packages: xgboost
Successfully installed xgboost-1.6.1


You should consider upgrading via the 'C:\Users\LENOVO\ML_PATH\env\Scripts\python.exe -m pip install --upgrade pip' command.


In [48]:
xgb_reg = xgboost.XGBRegressor()
xgb_reg.fit(X_train, y_train)
y_pred = xgb_reg.predict(X_val)

In [49]:
xgb_reg.fit(X_train, y_train, eval_set=[(X_val, y_val)], early_stopping_rounds=2)
y_pred = xgb_reg.predict(X_val)

[0]	validation_0-rmse:0.39518
[1]	validation_0-rmse:0.33828
[2]	validation_0-rmse:0.31244
[3]	validation_0-rmse:0.30387
[4]	validation_0-rmse:0.27736
[5]	validation_0-rmse:0.26917
[6]	validation_0-rmse:0.25646
[7]	validation_0-rmse:0.24858
[8]	validation_0-rmse:0.24238
[9]	validation_0-rmse:0.23767
[10]	validation_0-rmse:0.23414
[11]	validation_0-rmse:0.23122
[12]	validation_0-rmse:0.22937
[13]	validation_0-rmse:0.22789
[14]	validation_0-rmse:0.22663
[15]	validation_0-rmse:0.22535
[16]	validation_0-rmse:0.22495
[17]	validation_0-rmse:0.22463
[18]	validation_0-rmse:0.22438
[19]	validation_0-rmse:0.22418
[20]	validation_0-rmse:0.22378
[21]	validation_0-rmse:0.22344
[22]	validation_0-rmse:0.22316
[23]	validation_0-rmse:0.22292
[24]	validation_0-rmse:0.22272
[25]	validation_0-rmse:0.22255
[26]	validation_0-rmse:0.22242
[27]	validation_0-rmse:0.22231
[28]	validation_0-rmse:0.22220
[29]	validation_0-rmse:0.22219
[30]	validation_0-rmse:0.22219
[31]	validation_0-rmse:0.22220


