# Importing Packages

In [109]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import *
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import BaggingClassifier

from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB,MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

# Loading Data

In [110]:
data = pd.read_csv("Datasets/classification.csv")
data.head()

Unnamed: 0,Age,EstimatedSalary,Purchased
0,19,19000,0
1,35,20000,0
2,26,43000,0
3,27,57000,0
4,19,76000,0


In [111]:
print(f'Records:{data.shape[0]}\nFeatures:{data.shape[1]}')

Records:400
Features:3


In [112]:
#getting info about the dataset
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 3 columns):
 #   Column           Non-Null Count  Dtype
---  ------           --------------  -----
 0   Age              400 non-null    int64
 1   EstimatedSalary  400 non-null    int64
 2   Purchased        400 non-null    int64
dtypes: int64(3)
memory usage: 9.5 KB


### Feature Spliting

In [113]:
#spliting of target variables
x = data.iloc[:,:-1].values
y = data.iloc[:,-1].values

### Train Test Split

In [114]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.25,random_state=123)

In [115]:
print(f"Training Set Records : {x_train.shape[0]}\nTesting Set Records : {x_test.shape[0]}")

Training Set Records : 300
Testing Set Records : 100


### Data Preprocessing

In [116]:
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.fit_transform(x_test)

# Bagging Classifier

In [117]:
#defining a class for bagging classifier

In [118]:
class Bagging_Classifier:
    #constructor
    def __init__(self,base_estimator, n_estimators=10, max_samples=1.0,
                 max_features=1.0, bootstrap=True, bootstrap_features=False,
                 n_jobs=None, random_state=None, verbose=0):
        
        #defining Variables
        self.base_estimator = base_estimator
        self.n_estimators = n_estimators
        self.max_samples = max_samples
        self.max_features = max_features
        self.bootstrap = bootstrap
        self.bootstrap_features = bootstrap_features
        self.n_jobs = n_jobs
        self.random_state = random_state
        self.verbose = verbose
        
        #creating bagging classifer
        self.bagging_clf = BaggingClassifier(base_estimator=self.base_estimator,n_estimators=self.n_estimators,max_samples=self.max_samples,
                                             max_features=self.max_features,
                                             bootstrap=self.bootstrap,
                                             bootstrap_features=self.bootstrap_features,
                                             n_jobs=self.n_jobs,
                                             random_state=self.random_state,
                                             verbose=self.verbose)
        
        #fiting into classifier
    def fit(self,x_train,y_train):
        self.bagging_clf.fit(x_train,y_train)
        
        #making predictions
    def predict(self,x_test):
        return self.bagging_clf.predict(x_test)
        
        #getting scores
    def score(self,x_test,y_test):
        return self.bagging_clf.score(x_test,y_test)
        
        #hyperparameter turning to find the best parameters
    def tune_hyper_params(self,x_train,y_train,param_grid):
        self.grid_search = GridSearchCV(self.bagging_clf,param_grid,cv=10)
        self.grid_search.fit(x_train,y_train)
        print(f"Best Parameters : {self.grid_search.best_params_}")
        print(f"Best Accuracy Score : {self.grid_search.best_score_}")

### Logistic Regression

##### Using the following class for logistic regression

In [119]:
#creating an instance of Logistic Regression
LR = LogisticRegression()

In [120]:
#creating an instance of BaggingClassifier
bagging_clf = Bagging_Classifier(base_estimator=LR, n_estimators=50, max_samples=0.5)

In [121]:
# Fit the classifier on the training data
bagging_clf.fit(x_train,y_train)

In [122]:
# Predicting Values for the test data
y_pred = bagging_clf.predict(x_test)

In [123]:
# Accuracy Score before params tune
bagging_clf.score(x_test,y_test)

0.84

In [124]:
# Getting Classification Report
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.82      0.95      0.88        63
           1       0.89      0.65      0.75        37

    accuracy                           0.84       100
   macro avg       0.86      0.80      0.82       100
weighted avg       0.85      0.84      0.83       100



### Tunning Parameters

In [125]:
param_grid = {'base_estimator__C': [0.1, 1, 10], 'n_estimators': [50, 100, 200]}
bagging_clf.tune_hyper_params(x_train, y_train,param_grid)

Best Parameters : {'base_estimator__C': 1, 'n_estimators': 100}
Best Accuracy Score : 0.85


### Naive Bayes

##### Using the following class for  Naive Bayes

In [126]:
#creating an instance of Naive Bayes
GB = GaussianNB()

In [127]:
#creating an instance of BaggingClassifier
bagging_clf = Bagging_Classifier(base_estimator=GB, n_estimators=50, max_samples=0.5)

In [128]:
# Fit the classifier on the training data
bagging_clf.fit(x_train,y_train)

In [129]:
# Predicting Values for the test data
y_pred = bagging_clf.predict(x_test)

In [130]:
# Accuracy Score before params tune
bagging_clf.score(x_test,y_test)

0.9

In [131]:
# Getting Classification Report
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.88      0.97      0.92        63
           1       0.94      0.78      0.85        37

    accuracy                           0.90       100
   macro avg       0.91      0.88      0.89       100
weighted avg       0.90      0.90      0.90       100



### Decision Tree

In [132]:
#creating an instance of Decision Tree
dT = DecisionTreeClassifier(max_depth=5)

In [133]:
# creating an instance of BaggingClassifier
bagging_clf = Bagging_Classifier(base_estimator=dT, n_estimators=50, max_samples=0.5)

In [134]:
# Fit the classifier on the training data
bagging_clf.fit(x_train, y_train)

In [135]:
# Use the classifier to make predictions on the test data
y_pred = bagging_clf.predict(x_test)

In [136]:
# Getting Scores
bagging_clf.score(x_test,y_test)

0.89

In [137]:
# Getting Classification Report
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.95      0.87      0.91        63
           1       0.81      0.92      0.86        37

    accuracy                           0.89       100
   macro avg       0.88      0.90      0.88       100
weighted avg       0.90      0.89      0.89       100



### Tunning Parameters

In [138]:
param_grid = {'base_estimator__max_depth': [3, 5, 7, 9], 'n_estimators': [50, 100, 200]}
bagging_clf.tune_hyper_params(x_train, y_train,param_grid)

Best Parameters : {'base_estimator__max_depth': 3, 'n_estimators': 50}
Best Accuracy Score : 0.9266666666666667


### K-NN 

In [139]:
# Create an instance of the KNeighborsClassifier with 3 neighbors
knn = KNeighborsClassifier(n_neighbors=3)

In [140]:
# Create an instance of the BaggingClassifierWithTuning class, using the KNN estimator
bagging_clf = Bagging_Classifier(base_estimator=knn, n_estimators=50, max_samples=0.5)

In [141]:
# Fit the classifier on the training data
bagging_clf.fit(x_train, y_train)

In [142]:
# Use the classifier to make predictions on the test data
y_pred = bagging_clf.predict(x_test)

In [143]:
bagging_clf.score(x_test,y_test)

0.89

In [144]:
# Getting Classification Report
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.95      0.87      0.91        63
           1       0.81      0.92      0.86        37

    accuracy                           0.89       100
   macro avg       0.88      0.90      0.88       100
weighted avg       0.90      0.89      0.89       100



### Tunning Parameters

In [145]:
param_grid = {'base_estimator__n_neighbors': [3, 5, 7, 9, 15], 'n_estimators': [50, 100, 200]}
bagging_clf.tune_hyper_params(x_train, y_train,param_grid)

Best Parameters : {'base_estimator__n_neighbors': 5, 'n_estimators': 100}
Best Accuracy Score : 0.9200000000000002


### SVC

In [146]:
class BaggingSVC:
    def __init__(self, n_estimators=10, max_samples=1.0, max_features=1.0,
                 bootstrap=True, bootstrap_features=False, oob_score=False,
                 warm_start=False, n_jobs=None, random_state=None, verbose=0):
        
        self.n_estimators = n_estimators
        self.max_samples = max_samples
        self.max_features = max_features
        self.bootstrap = bootstrap
        self.bootstrap_features = bootstrap_features
        self.oob_score = oob_score
        self.warm_start = warm_start
        self.n_jobs = n_jobs
        self.random_state = random_state
        self.verbose = verbose
        self.svc = SVC()
        self.bagging = BaggingClassifier(self.svc)
        
    def fit(self, x, y):
        self.bagging.set_params(n_estimators=self.n_estimators, 
                                max_samples=self.max_samples,
                                max_features=self.max_features, 
                                bootstrap=self.bootstrap, 
                                bootstrap_features=self.bootstrap_features, 
                                oob_score=self.oob_score, 
                                warm_start=self.warm_start, 
                                n_jobs=self.n_jobs, 
                                random_state=self.random_state, 
                                verbose=self.verbose)
        self.bagging.fit(x, y)
        return self
    
    def predict(self, x):
        return self.bagging.predict(x)
    
    def score(self, x, y):
        return self.bagging.score(x, y)
    
    def hyperparameter_tune(self, x, y, param_grid, cv=5):
        self.bagging.set_params(base_estimator=self.svc)
        self.grid = GridSearchCV(estimator=self.bagging, param_grid=param_grid, cv=cv)
        self.grid.fit(x, y)
        self.bagging.set_params(**self.grid.best_params_)
        print("Best parameters: ", self.grid.best_params_)
        print("Best Score: ",self.grid.best_score_)

In [147]:
# Create an object of the BaggingSVC class
bagging_svc = BaggingSVC()

# Fit the classifier on the training data
bagging_svc.fit(x_train, y_train)

# Make predictions on the test data
y_pred = bagging_svc.predict(x_test)

# Print the accuracy score
print("Accuracy: ", bagging_svc.score(x_test, y_test))

Accuracy:  0.89


In [148]:
# Define the parameter grid for hyperparameter tuning
param_grid = {'base_estimator__C': [0.1, 1, 10],
              'base_estimator__kernel': ['linear', 'rbf']}

# Perform hyperparameter tuning
bagging_svc.hyperparameter_tune(x_train, y_train, param_grid)

Best parameters:  {'base_estimator__C': 1, 'base_estimator__kernel': 'rbf'}
Best Score:  0.9233333333333335
