<a href="https://colab.research.google.com/github/Chirag314/Cryotherapy/blob/main/Cryotherapy_Ensemble_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

###This notebook is copied from exercises from book Ensemble Machine Learning Cookbook.

In [1]:
# Read data from github. Use raw format and copy url# Note normal url and raw url will be different.
import pandas as pd
pd.options.display.max_rows=None
pd.options.display.max_columns=None
url = 'https://raw.githubusercontent.com/PacktPublishing/Ensemble-Machine-Learning-Cookbook/master/Chapter02/Cryotherapy.csv'
df_cryotherapydata = pd.read_csv(url)
#df = pd.read_csv(url)
print(df_cryotherapydata.head(5))

   sex  age   Time  Number_of_Warts  Type  Area  Result_of_Treatment
0    1   35  12.00                5     1   100                    0
1    1   29   7.00                5     1    96                    1
2    1   50   8.00                1     3   132                    0
3    1   32  11.75                7     3   750                    0
4    1   67   9.25                1     1    42                    0


#####The following steps showcase an example of how to combine the predictions of the decision tree, SVMs, and logistic regression models for a classification problem

In [2]:
#import required libraries
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import VotingClassifier


In [4]:
#Create train and test cample from our dataset
from sklearn.model_selection import train_test_split

# Create feature and response state
feature_columns=['sex','age','Time','Number_of_Warts','Type','Area']
X=df_cryotherapydata[feature_columns]
y=df_cryotherapydata['Result_of_Treatment']

# Create train and test results
X_train, X_test, Y_train, Y_test=train_test_split(X,y,test_size=0.2,random_state=1)

In [5]:
#We build our models with the decision tree, SVM, and logistic regression algorithms:

# Create the submodels
estimators=[]

dt_model=DecisionTreeClassifier(random_state=1)
estimators.append(('DecisionTree',dt_model))

svm_model=SVC(random_state=1)
estimators.append(('SupportVector',svm_model))

logit_model=LogisticRegression(random_state=1)
estimators.append(('Logistic Regression',logit_model))

In [6]:
# build individual models with each of the classifiers we've chosen:

from sklearn.metrics import accuracy_score

for each_estimator in (dt_model, svm_model, logit_model):
  each_estimator.fit(X_train, Y_train)
  Y_pred=each_estimator.predict(X_test)
  print(each_estimator.__class__.__name__,accuracy_score(Y_test,Y_pred))

DecisionTreeClassifier 0.8333333333333334
SVC 0.4444444444444444
LogisticRegression 0.9444444444444444


In [7]:
#Using VotingClassifier() to build ensemble model with Hard Voting

ensemble_model=VotingClassifier(estimators=estimators,voting='hard')

ensemble_model.fit(X_train, Y_train)
predicted_labels=ensemble_model.predict(X_test)

print("Classifier Accuracy using Hard voting is :", accuracy_score(Y_test, predicted_labels))

Classifier Accuracy using Hard voting is : 0.8333333333333334


In [None]:
#Many classifiers can estimate class probabilities. In this case, the class labels are predicted by averaging the class probabilities. This is called soft voting and is recommended for an ensemble of well-tuned classifiers.

In [10]:
#Using VotingClassifier() to build ensemble model with Hard Voting
#simply replace voting='hard' with voting='soft' in VotingClassifier().

# create the sub models
estimators = []

dt_model = DecisionTreeClassifier(random_state=1)
estimators.append(('DecisionTree', dt_model))

svm_model = SVC(random_state=1, probability=True)
estimators.append(('SupportVector', svm_model))

logit_model = LogisticRegression(random_state=1)
estimators.append(('Logistic Regression', logit_model))

for each_estimator in (dt_model, svm_model, logit_model):
    each_estimator.fit(X_train, Y_train)
    Y_pred = each_estimator.predict(X_test)
    print(each_estimator.__class__.__name__, accuracy_score(Y_test, Y_pred))

# Using VotingClassifier() to build ensemble model with Soft Voting
ensemble_model = VotingClassifier(estimators=estimators, voting='soft')
ensemble_model.fit(X_train,Y_train)
predicted_labels = ensemble_model.predict(X_test) 
print("Classifier Accuracy using Soft Voting: ", accuracy_score(Y_test, predicted_labels))

DecisionTreeClassifier 0.8333333333333334
SVC 0.4444444444444444
LogisticRegression 0.9444444444444444
Classifier Accuracy using Soft Voting:  0.8888888888888888
