## Using different Ensemble techniques

## Bagging Ensemble

In [1]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
from sklearn.model_selection import KFold # for sub samples
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

In [2]:
data = pd.read_csv('Fraud_check.csv')
data

Unnamed: 0,Undergrad,Marital.Status,Taxable.Income,City.Population,Work.Experience,Urban
0,NO,Single,68833,50047,10,YES
1,YES,Divorced,33700,134075,18,YES
2,NO,Married,36925,160205,30,YES
3,YES,Single,50190,193264,15,YES
4,NO,Married,81002,27533,28,NO
...,...,...,...,...,...,...
595,YES,Divorced,76340,39492,7,YES
596,YES,Divorced,69967,55369,2,YES
597,NO,Divorced,47334,154058,0,YES
598,YES,Married,98592,180083,17,NO


In [3]:
data = pd.get_dummies(data)

In [4]:
data

Unnamed: 0,Taxable.Income,City.Population,Work.Experience,Undergrad_NO,Undergrad_YES,Marital.Status_Divorced,Marital.Status_Married,Marital.Status_Single,Urban_NO,Urban_YES
0,68833,50047,10,1,0,0,0,1,0,1
1,33700,134075,18,0,1,1,0,0,0,1
2,36925,160205,30,1,0,0,1,0,0,1
3,50190,193264,15,0,1,0,0,1,0,1
4,81002,27533,28,1,0,0,1,0,1,0
...,...,...,...,...,...,...,...,...,...,...
595,76340,39492,7,0,1,1,0,0,0,1
596,69967,55369,2,0,1,1,0,0,0,1
597,47334,154058,0,1,0,1,0,0,0,1
598,98592,180083,17,0,1,0,1,0,1,0


In [5]:
Status = []
for i in range(0, (data.shape)[0]):
    if data['Taxable.Income'].iloc[i] <= 30000:
        Status.append('Risky')
    else:
        Status.append('Safe')

In [6]:
data['Status'] = Status
data

Unnamed: 0,Taxable.Income,City.Population,Work.Experience,Undergrad_NO,Undergrad_YES,Marital.Status_Divorced,Marital.Status_Married,Marital.Status_Single,Urban_NO,Urban_YES,Status
0,68833,50047,10,1,0,0,0,1,0,1,Safe
1,33700,134075,18,0,1,1,0,0,0,1,Safe
2,36925,160205,30,1,0,0,1,0,0,1,Safe
3,50190,193264,15,0,1,0,0,1,0,1,Safe
4,81002,27533,28,1,0,0,1,0,1,0,Safe
...,...,...,...,...,...,...,...,...,...,...,...
595,76340,39492,7,0,1,1,0,0,0,1,Safe
596,69967,55369,2,0,1,1,0,0,0,1,Safe
597,47334,154058,0,1,0,1,0,0,0,1,Safe
598,98592,180083,17,0,1,0,1,0,1,0,Safe


In [7]:
x = data.iloc[:, 0:-1]
y = data.iloc[:, -1]

In [8]:
kfold = KFold(n_splits=10, random_state=42, shuffle = True)
cart = DecisionTreeClassifier()
num_trees = 500
model = BaggingClassifier(base_estimator=cart, n_estimators=num_trees, random_state=None)
results = cross_val_score(model, x, y, cv=kfold)
print(results.mean()) 

0.9983333333333333


## Random Forest Ensemble

In [9]:
from sklearn.ensemble import RandomForestClassifier

num_trees = 500
max_features = 3
kfold = KFold(n_splits=10, random_state=42, shuffle = True)
model = RandomForestClassifier(n_estimators=num_trees, max_features=max_features)
results = cross_val_score(model, x, y, cv=kfold)
print(results.mean())

0.9983333333333333


## AdaBoost Ensemble

In [11]:
from sklearn.ensemble import AdaBoostClassifier

num_trees = 500
kfold = KFold(n_splits=10, random_state=42, shuffle = True)
model = AdaBoostClassifier(n_estimators=num_trees, random_state=42)
results = cross_val_score(model, x, y, cv=kfold)
print(results.mean())

0.9983333333333333


## Stacking with Logistic, Decision Tree, and SVC

In [12]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier

kfold = KFold(n_splits=10, random_state=42, shuffle = True)

# create the sub models
estimators = []
model1 = LogisticRegression(max_iter=500)
estimators.append(('logistic', model1))
model2 = DecisionTreeClassifier()
estimators.append(('cart', model2))
model3 = SVC()
estimators.append(('svm', model3))

# create the ensemble model
ensemble = VotingClassifier(estimators)
results = cross_val_score(ensemble, x, y, cv=kfold)
print(results.mean())

0.9883333333333333
