In [None]:
# import pip
# pip.main(["install","mlxtend"])

In [None]:
import pandas as pd
import numpy as np

from sklearn.ensemble import BaggingClassifier, BaggingRegressor
from sklearn.ensemble import RandomForestClassifier,RandomForestRegressor,ExtraTreesClassifier,ExtraTreesRegressor
from sklearn.ensemble import AdaBoostClassifier, AdaBoostRegressor, GradientBoostingClassifier, GradientBoostingRegressor
from xgboost import XGBRegressor,XGBClassifier

from sklearn.ensemble import VotingClassifier,VotingRegressor

from sklearn.neighbors import KNeighborsClassifier,KNeighborsRegressor
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.ensemble import StackingClassifier, StackingRegressor
from mlxtend.classifier import StackingClassifier as stc
from mlxtend.regressor import StackingRegressor as strg

from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix, accuracy_score,classification_report,mean_squared_error

# (01) Classification

In [None]:
data=pd.read_csv("D:\Workshops\Machine Learning for Data Science With Python\Datasets\Bank.CSV")
data.head()

In [None]:
x=data.iloc[:,:7].values
y=data.iloc[:,7].values

In [None]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=0)

# Bootstrapping

In [None]:
arr=np.array([10,20,30,40,50])

In [None]:
np.random.choice(arr,3)

In [None]:
for i in range(5):
    print(np.random.choice(arr,5))

In [None]:
for i in range(5):
    boot_ind=np.random.choice(np.arange(x_train.shape[0]),x_train.shape[0])
    x_train_boot=x_train[boot_ind]
    y_train_boot=y_train[boot_ind]

    model=LogisticRegression()
    model.fit(x_train_boot,y_train_boot)
    y_pred=model.predict(x_test)
    print(confusion_matrix(y_test,y_pred))
    print(accuracy_score(y_test,y_pred))
    print("********************************")

# Bagging Classification

In [None]:
bcl=BaggingClassifier()
bcl.fit(x_train, y_train)

In [None]:
y_pred=bcl.predict(x_test)

In [None]:
accuracy_score(y_test,y_pred)

# Random Forest Classification

In [None]:
rfcl = RandomForestClassifier()
rfcl.fit(x_train, y_train)

In [None]:
y_pred=rfcl.predict(x_test)

In [None]:
accuracy_score(y_test,y_pred)

# Optimizing hyper parameters

In [None]:
params={"n_estimators":[100,200,300,400,500,600,700,800,900,1000],"criterion":["gini", "entropy"]}
model=RandomForestClassifier()
cv=KFold(n_splits=10,shuffle=True)

In [None]:
gsearch = GridSearchCV(model, params,cv=cv)
results = gsearch.fit(x_train, y_train)
results.best_params_

# Training with best hyper parameter estimates

In [None]:
rfcl = RandomForestClassifier(criterion='gini', n_estimators=200)
rfcl.fit(x_train, y_train)

# Extremely randomized trees Classification

In [None]:
excl = ExtraTreesClassifier()
excl.fit(x_train, y_train)

In [None]:
y_pred=excl.predict(x_test)

In [None]:
accuracy_score(y_test,y_pred)

# Ada Boosting Classification

In [None]:
adacl=AdaBoostClassifier()
adacl.fit(x_train, y_train)

In [None]:
y_pred=adacl.predict(x_test)

In [None]:
accuracy_score(y_test,y_pred)

# Gradient Boosting Classification

In [None]:
gradcl=GradientBoostingClassifier()
gradcl.fit(x_train, y_train)

In [None]:
y_pred=gradcl.predict(x_test)

In [None]:
accuracy_score(y_test,y_pred)

# XG Boost Classification

In [None]:
xgbcl=XGBClassifier()
xgbcl.fit(x_train, y_train)

In [None]:
y_pred=xgbcl.predict(x_test)

In [None]:
accuracy_score(y_test,y_pred)

# Voting Classifier - Hard Voting

In [None]:
model1=KNeighborsClassifier(n_neighbors=5)
model2=LogisticRegression()
model3=RandomForestClassifier()
T1=("knn",model1)
T2=("lgr",model2)
T3=("rf",model3)

vc=VotingClassifier(estimators=[T1,T2,T3],voting="hard")

In [None]:
vc.fit(x_train,y_train)

In [None]:
y_pred=vc.predict(x_test)

In [None]:
accuracy_score(y_test,y_pred)

# Voting Classifier - Soft Voting

In [None]:
model1=KNeighborsClassifier(n_neighbors=5)
model2=LogisticRegression()
model3=RandomForestClassifier()
T1=("knn",model1)
T2=("lgr",model2)
T3=("rf",model3)

vc=VotingClassifier(estimators=[T1,T2,T3],voting="soft")

In [None]:
vc.fit(x_train,y_train)

In [None]:
y_pred=vc.predict(x_test)

In [None]:
accuracy_score(y_test,y_pred)

# Stacking Classification sklearn

In [None]:
T1=("lgr",LogisticRegression())
T2=("knn",KNeighborsClassifier(n_neighbors=5))

In [None]:
metamodel=RandomForestClassifier()

In [None]:
stcl=StackingClassifier(estimators=[T1,T2],final_estimator=metamodel)
stcl.fit(x_train, y_train)

In [None]:
y_pred=stcl.predict(x_test)

In [None]:
accuracy_score(y_test,y_pred)

# Stacking Classification mlxtend

In [None]:
bmodel1=LogisticRegression()
bmodel2=KNeighborsClassifier(n_neighbors=5)

In [None]:
metamodel=RandomForestClassifier()

In [None]:
stcl=stc(classifiers=[bmodel1,bmodel2],meta_classifier=metamodel)

In [None]:
stcl.fit(x_train, y_train)

In [None]:
y_pred=stcl.predict(x_test)
accuracy_score(y_test,y_pred)

# (02) Regression

In [None]:
data=pd.read_csv("D:\\Workshops\\Python for Data Science Comprehensive Workshop\\Part 04 - Machine Learning\\Datasets\\Boston.CSV")
data.head()

In [None]:
x=data.iloc[:,:12].values
y=data.iloc[:,12].values

In [None]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=0)

# Bagging Regression

In [None]:
bcl=BaggingRegressor()
bcl.fit(x_train, y_train)

In [None]:
y_pred=bcl.predict(x_test)

In [None]:
np.sqrt(mean_squared_error(y_test,y_pred))

# Random Forest Regression

In [None]:
rfrg = RandomForestRegressor()
rfrg.fit(x_train, y_train)

In [None]:
y_pred=rfrg.predict(x_test)

In [None]:
np.sqrt(mean_squared_error(y_test,y_pred))

# Extremely randomized trees Regression

In [None]:
exrg = ExtraTreesRegressor()
exrg.fit(x_train, y_train)

In [None]:
y_pred=exrg.predict(x_test)

In [None]:
np.sqrt(mean_squared_error(y_test,y_pred))

# Ada Boosting Regression

In [None]:
adarg=AdaBoostRegressor()
adarg.fit(x_train, y_train)

In [None]:
y_pred=adarg.predict(x_test)

In [None]:
np.sqrt(mean_squared_error(y_test,y_pred))

# Gradient Boosting Regression

In [None]:
gradrg=AdaBoostRegressor()
gradrg.fit(x_train, y_train)

In [None]:
y_pred=gradrg.predict(x_test)

In [None]:
np.sqrt(mean_squared_error(y_test,y_pred))

# XG Boost Regression

In [None]:
xgbrg=XGBRegressor()
xgbrg.fit(x_train, y_train)

In [None]:
y_pred=xgbrg.predict(x_test)

In [None]:
np.sqrt(mean_squared_error(y_test,y_pred))

# Voting Regressor

In [None]:
model1=KNeighborsRegressor(n_neighbors=5)
model2=LinearRegression()
model3=RandomForestRegressor()
T1=("knn",model1)
T2=("lr",model2)
T3=("rf",model3)

vr=VotingRegressor(estimators=[T1,T2,T3])

In [None]:
vr.fit(x_train,y_train)

In [None]:
y_pred=vr.predict(x_test)

In [None]:
np.sqrt(mean_squared_error(y_test,y_pred))

# Stacking Regression mlxtend

In [None]:
bmodel1=LinearRegression()
bmodel2=KNeighborsRegressor(n_neighbors=5)

In [None]:
metamodel=RandomForestRegressor()

In [None]:
strg=strg(regressors=[bmodel1,bmodel2],meta_regressor=metamodel)
strg.fit(x_train, y_train)

In [None]:
y_pred=strg.predict(x_test)

In [None]:
np.sqrt(mean_squared_error(y_test,y_pred))