In [29]:
from sklearn.ensemble import VotingClassifier, VotingRegressor, BaggingClassifier
from sklearn.linear_model import LogisticRegression, LinearRegression, ElasticNet
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler

import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, r2_score, roc_auc_score
from sklearn.compose import make_column_selector, make_column_transformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV, KFold, StratifiedKFold
import warnings
warnings.filterwarnings('ignore')

In [13]:
glass = pd.read_csv('Glass.csv')
X = glass.drop(columns='Type')
y = glass.Type
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=24, test_size=0.3, stratify=y)

# Logistic Regression with Bagging and Without Bagging

In [14]:
# LOG with Bagg
log = LogisticRegression(random_state=24)
bagg = BaggingClassifier(estimator= log, random_state=24)
bagg.fit(X_train, y_train)
y_pred = bagg.predict(X_test)
print(accuracy_score(y_test, y_pred))

0.6461538461538462


In [15]:
# LOG without Bagg
log = LogisticRegression(random_state=24)
log.fit(X_train, y_train)
y_pred = log.predict(X_test)
print(accuracy_score(y_test, y_pred))

0.6461538461538462


# SVM with Bagging and Without Bagging

In [16]:
# SVM with Bagg
svm = SVC(probability=True, random_state=24)
bagg = BaggingClassifier(estimator= svm, random_state=24)
bagg.fit(X_train, y_train)
y_pred = bagg.predict(X_test)
print(accuracy_score(y_test, y_pred))

0.6153846153846154


In [17]:
# SVM without Bagg
svm = SVC(probability=True, random_state=24)
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
print(accuracy_score(y_test, y_pred))

0.35384615384615387


# Decision Tree Classifier with Bagging and Without Bagging

In [18]:
# Decision Tree Classifier with Bagg
dtr  = DecisionTreeClassifier(random_state=24)
bagg = BaggingClassifier(estimator= dtr, random_state=24)
bagg.fit(X_train, y_train)
y_pred = bagg.predict(X_test)
print(accuracy_score(y_test, y_pred))

0.6615384615384615


In [20]:
# Decisio Tree Classifier without Bagg
dtc  = DecisionTreeClassifier(random_state=24)
dtc.fit(X_train, y_train)
y_pred = dtc.predict(X_test)
print(accuracy_score(y_test, y_pred))

0.676923076923077


# GridSearchCV with Bagging and Without Bagging

In [45]:
log = LogisticRegression(random_state=24)
scaler_mm = MinMaxScaler()
scaler_ss = StandardScaler()
bagg = BaggingClassifier(random_state=24)
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state = 24)
pipe = Pipeline([('SC',None),('BAGG',bagg)])
params = {'BAGG__estimator':[log,svm,dtc], 
         'BAGG__n_estimators':[10,50,100], 
         'SC':[None,scaler_mm, scaler_ss ]}
gcv = GridSearchCV(pipe, param_grid=params,cv=kfold,scoring='neg_log_loss')
gcv.fit(X,y)

In [46]:
print("Best Score: ",gcv.best_score_)
print("Best Params: ",gcv.best_params_)

Best Score:  -0.8573207454030566
Best Params:  {'BAGG__estimator': SVC(probability=True, random_state=24), 'BAGG__n_estimators': 100, 'SC': StandardScaler()}


In [59]:
log = LogisticRegression(random_state=24)
bagg = BaggingClassifier(estimator=log, random_state=24)
# bagg.get_params()
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state = 24)
params = {'n_estimators':[10,50,100]}
gcv = GridSearchCV(bagg, param_grid=params,cv=kfold,scoring='neg_log_loss')
gcv.fit(X,y)
print("Best Score: ",gcv.best_score_)
print("Best Params: ",gcv.best_params_)

Best Score:  -0.9555422710150381
Best Params:  {'n_estimators': 50}


In [60]:
svm = SVC(probability=True, random_state=24, C=0.4)

bagg = BaggingClassifier(estimator=svm, random_state=24)
# bagg.get_params()
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state = 24)
params = {'n_estimators':[10,50,100]}
gcv = GridSearchCV(bagg, param_grid=params,cv=kfold,scoring='neg_log_loss')
gcv.fit(X,y)
print("Best Score: ",gcv.best_score_)
print("Best Params: ",gcv.best_params_)

Best Score:  -1.0814932458898299
Best Params:  {'n_estimators': 100}


In [61]:
dtr  = DecisionTreeClassifier(random_state=24)
bagg = BaggingClassifier(estimator=dtr, random_state=24)
# bagg.get_params()
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state = 24)
params = {'n_estimators':[10,50,100]}
gcv = GridSearchCV(bagg, param_grid=params,cv=kfold,scoring='neg_log_loss')
gcv.fit(X,y)
print("Best Score: ",gcv.best_score_)
print("Best Params: ",gcv.best_params_)

Best Score:  -1.1138911163016985
Best Params:  {'n_estimators': 100}


In [62]:
svm = SVC(probability=True, random_state=24)

bagg = BaggingClassifier(estimator=svm, random_state=24)
# bagg.get_params()
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state = 24)
params = {'n_estimators':[10,50,100], 
         'estimator__C':np.linspace(0.001,3,5)}
gcv = GridSearchCV(bagg, param_grid=params,cv=kfold,scoring='neg_log_loss')
gcv.fit(X,y)
print("Best Score: ",gcv.best_score_)
print("Best Params: ",gcv.best_params_)

Best Score:  -1.0779429395743088
Best Params:  {'estimator__C': 2.25025, 'n_estimators': 100}


In [63]:
dtr  = DecisionTreeClassifier(random_state=24)
bagg = BaggingClassifier(estimator=dtr, random_state=24)
# bagg.get_params()
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state = 24)
params = {'n_estimators':[10,50,100], 
         'estimator__min_samples_leaf' : [1,10,20], 
         'estimator__min_samples_split' : [5,10,20],
        'estimator__max_depth' : [None, 2, 3]}
gcv = GridSearchCV(bagg, param_grid=params,cv=kfold,scoring='neg_log_loss')
gcv.fit(X,y)
print("Best Score: ",gcv.best_score_)
print("Best Params: ",gcv.best_params_)

Best Score:  -0.686790242114751
Best Params:  {'estimator__max_depth': None, 'estimator__min_samples_leaf': 1, 'estimator__min_samples_split': 5, 'n_estimators': 100}
