In [84]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.tree import DecisionTreeClassifier,DecisionTreeRegressor
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import VotingClassifier,VotingRegressor,BaggingClassifier
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression,ElasticNet
from sklearn.preprocessing import OneHotEncoder,LabelEncoder, OrdinalEncoder
from sklearn.compose import make_column_selector,make_column_transformer
from sklearn.metrics import classification_report,f1_score,log_loss,accuracy_score,r2_score,log_loss

<h1 style = color:orange>Voting Classifier</h1>

In [36]:
sonar = pd.read_csv('../Datasets/Sonar.csv')

le = LabelEncoder()

X = sonar.drop('Class',axis = 1)
y = sonar['Class']

y_le = le.fit_transform(y)

X_train,X_test,y_train,y_test = train_test_split(X,y_le,test_size=0.3,stratify=y,random_state=25)

dtc1 = DecisionTreeClassifier(random_state=25)
dtc2 = DecisionTreeClassifier(random_state=25,max_depth=3)

knn1 = KNeighborsClassifier(n_neighbors=5)
knn2 = KNeighborsClassifier(n_neighbors=3)

nb = GaussianNB()

voting = VotingClassifier(estimators=[('tree1',dtc1),('tree2',dtc2),('knn1',knn1),('knn2',knn2),('nb',nb)],voting='soft')  # model esembling

voting.fit(X_train,y_train)
y_pred = voting.predict(X_test)
print(classification_report(y_test,y_pred))

#evaluating individual estimators 
for i in range(len(voting.estimators_)):
    print('Estimator : ',voting.estimators_[i])
    print('Accuracy Score = ',accuracy_score(y_test,voting.estimators_[i].predict(X_test)))

              precision    recall  f1-score   support

           0       0.68      0.88      0.77        34
           1       0.79      0.52      0.62        29

    accuracy                           0.71        63
   macro avg       0.74      0.70      0.70        63
weighted avg       0.73      0.71      0.70        63

Estimator :  DecisionTreeClassifier(random_state=25)
Accuracy Score =  0.6984126984126984
Estimator :  DecisionTreeClassifier(max_depth=3, random_state=25)
Accuracy Score =  0.6666666666666666
Estimator :  KNeighborsClassifier()
Accuracy Score =  0.746031746031746
Estimator :  KNeighborsClassifier(n_neighbors=3)
Accuracy Score =  0.8095238095238095
Estimator :  GaussianNB()
Accuracy Score =  0.6349206349206349


<h1 style = color:orange>Soft Voting</h1>

In [34]:
sonar = pd.read_csv('../Datasets/Sonar.csv')

# le = LabelEncoder()

X = sonar.drop('Class',axis = 1)
y = sonar['Class']

y_le = le.fit_transform(y)

X_train,X_test,y_train,y_test = train_test_split(X,y_le,test_size=0.3,stratify=y,random_state=25)

dtc1 = DecisionTreeClassifier(random_state=25)
dtc2 = DecisionTreeClassifier(random_state=25,max_depth=3)

knn1 = KNeighborsClassifier(n_neighbors=5)
knn2 = KNeighborsClassifier(n_neighbors=3)

nb = GaussianNB()


voting = VotingClassifier(estimators=[('tree1',dtc1),('tree2',dtc2),('knn1',knn1),('knn2',knn2),('nb',nb)],voting='soft')  # model esembling

voting.fit(X_train,y_train)
y_pred = voting.predict(X_test)
print(classification_report(y_test,y_pred))



              precision    recall  f1-score   support

           0       0.68      0.88      0.77        34
           1       0.79      0.52      0.62        29

    accuracy                           0.71        63
   macro avg       0.74      0.70      0.70        63
weighted avg       0.73      0.71      0.70        63



<h1 style = color:orange>Weighted Average</h1>

In [38]:
sonar = pd.read_csv('../Datasets/Sonar.csv')

# le = LabelEncoder()

X = sonar.drop('Class',axis = 1)
y = sonar['Class']

y_le = le.fit_transform(y)

X_train,X_test,y_train,y_test = train_test_split(X,y_le,test_size=0.3,stratify=y,random_state=25)

dtc1 = DecisionTreeClassifier(random_state=25)
dtc2 = DecisionTreeClassifier(random_state=25,max_depth=3)

knn1 = KNeighborsClassifier(n_neighbors=5)
knn2 = KNeighborsClassifier(n_neighbors=3)

nb = GaussianNB()


voting = VotingClassifier(estimators=[('tree1',dtc1),('tree2',dtc2),('knn1',knn1),('knn2',knn2),('nb',nb)],voting='soft',weights=[2,1,3,4,1])  # model esembling

voting.fit(X_train,y_train)
y_pred = voting.predict(X_test)
print(classification_report(y_test,y_pred))


              precision    recall  f1-score   support

           0       0.73      0.97      0.84        34
           1       0.94      0.59      0.72        29

    accuracy                           0.79        63
   macro avg       0.84      0.78      0.78        63
weighted avg       0.83      0.79      0.78        63



<h1>HR Dataset</h1>

In [45]:
ohe = OneHotEncoder(sparse_output=False,drop='first',).set_output(transform='pandas')
oe = OrdinalEncoder(categories=[['low','medium','high']]).set_output(transform='pandas')

hr = pd.read_csv('../Cases/HRAnalytics/HR_comma_sep.csv')

column_transform = make_column_transformer((ohe,['Department']),(oe,['salary']),remainder='passthrough',verbose_feature_names_out=False).set_output(transform='pandas')

X = hr.drop('left',axis = 1)
y = hr['left']

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=25,stratify=y)

X_train_ct = column_transform.fit_transform(X_train)
X_test_ct =column_transform.transform(X_test)


dtc1 = DecisionTreeClassifier(random_state=25)
dtc2 = DecisionTreeClassifier(random_state=25,max_depth=3)

knn1 = KNeighborsClassifier(n_neighbors=5)
knn2 = KNeighborsClassifier(n_neighbors=3)

nb = GaussianNB()


voting = VotingClassifier(estimators=[('tree1',dtc1),('tree2',dtc2),('knn1',knn1),('knn2',knn2),('nb',nb)])  # model esembling

voting.fit(X_train_ct,y_train)
y_pred = voting.predict(X_test_ct)
print(classification_report(y_test,y_pred))
print(log_loss(y_test, voting.predict(X_test_ct)))

              precision    recall  f1-score   support

           0       0.98      0.97      0.98      3429
           1       0.90      0.95      0.93      1070

    accuracy                           0.96      4499
   macro avg       0.94      0.96      0.95      4499
weighted avg       0.97      0.96      0.96      4499

1.2978599353271791


In [44]:
voting = VotingClassifier(estimators=[('tree1',dtc1),('tree2',dtc2),('knn1',knn1),('knn2',knn2),('nb',nb)], voting='soft')  # model esembling

voting.fit(X_train_ct,y_train)
y_pred = voting.predict(X_test_ct)
print(classification_report(y_test,y_pred))
print(log_loss(y_test, voting.predict(X_test_ct)))

              precision    recall  f1-score   support

           0       0.98      0.98      0.98      3429
           1       0.93      0.95      0.94      1070

    accuracy                           0.97      4499
   macro avg       0.96      0.97      0.96      4499
weighted avg       0.97      0.97      0.97      4499

0.9934236542010508


<h1 style = color:orange>Voting Regressor</h1>

In [82]:
concrete = pd.read_csv('../Cases/Concrete_Strength/Concrete_Data.csv')

X = concrete.drop('Strength', axis =1)
y = concrete['Strength']

X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=25,test_size=0.3)

dtr = DecisionTreeRegressor(random_state = 25)
dtr2 = DecisionTreeRegressor(random_state = 25,max_depth=5)

lr  = LinearRegression()
en = ElasticNet()

voting = VotingRegressor(estimators=[('Tree',dtr),('Tree2',dtr2),('LR',lr),("EL",en)],weights=[10,5,3,3])
voting.fit(X_train,y_train)

y_pred = voting.predict(X_test)
print(r2_score(y_test,y_pred))

for i in range(len(voting.estimators_)):
    print(voting.estimators_[i], ':',end = ' ')
    print(r2_score(y_test,voting.estimators_[i].predict(X_test)))
    


0.8550749354114082
DecisionTreeRegressor(random_state=25) : 0.8127760533837747
DecisionTreeRegressor(max_depth=5, random_state=25) : 0.7311101169515943
LinearRegression() : 0.6351839142464111
ElasticNet() : 0.6345321364921961


<h1 style = 'color: orange'>Bagging</h1>

In [97]:
sonar = pd.read_csv('../Datasets/Sonar.csv')

le = LabelEncoder()

X = sonar.drop('Class',axis = 1)
y = sonar['Class']

# y_le = le.fit_transform(y)

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,stratify=y,random_state=25)

dtc1 = DecisionTreeClassifier(random_state=25)
dtc2 = DecisionTreeClassifier(random_state=25,max_depth=3)

knn1 = KNeighborsClassifier(n_neighbors=5)
knn2 = KNeighborsClassifier(n_neighbors=3)

nb = GaussianNB()

est_list = (dtc1,dtc2,knn1,knn2,nb)
n_est = [10,15,25,50]

bagging = BaggingClassifier(estimator=nb,n_estimators=10)  # model esembling

bagging.fit(X_train,y_train)
y_pred = bagging.predict(X_test)
print(classification_report(y_test,y_pred))



for e in est_list:
    for n in n_est
        bagging = BaggingClassifier(estimator=e,n_estimators=n)
        bagging.fit(X_train,y_train)
        y_pred = bagging.predict(X_test)


              precision    recall  f1-score   support

           M       0.61      0.74      0.67        34
           R       0.59      0.45      0.51        29

    accuracy                           0.60        63
   macro avg       0.60      0.59      0.59        63
weighted avg       0.60      0.60      0.59        63



In [None]:
1