### Import Libraries

In [1]:
import pandas as pd
import time
import numpy as np
import pickle
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
#!pip install mlxtend
from mlxtend.feature_selection import SequentialFeatureSelector as sfs
#!pip install sklearn.feature_selection import SequentialFeatureSelector as sfs

### Function to Implement Forward Selection for Tree Based Algorithms

In [2]:
def forward(indep_X,dep_Y,n):
    forward_list=[]
    RF=RandomForestClassifier(n_estimators=10,criterion='entropy',random_state=0)
    DT=DecisionTreeClassifier(criterion='gini',max_features='sqrt',splitter='best',random_state=0)
    forwardModelList=[RF,DT]
    for i in forwardModelList:
        print(i)
        log_forward=sfs(i, k_features=n, forward=True, floating=False,  scoring='accuracy', cv=5, n_jobs=1)
        #log_forward=sfs(i,max_features=n)
        log_fit=log_forward.fit(indep_X,dep_Y)
        log_forward_feature=log_fit.transform(indep_X)
        forward_list.append(log_forward_feature)
    return forward_list

### Function to Split Train - Test Set and Standard Scaling the input

In [3]:
def split_scale(indep_X,dep_Y):
    X_train,X_test,Y_train,Y_test=train_test_split(indep_X,dep_Y,test_size=0.25,random_state=0)
    sc=StandardScaler()
    X_train=sc.fit_transform(X_train)
    X_test=sc.transform(X_test)
    return X_train,X_test,Y_train,Y_test

### Function to Generate Metric Reports: Confusion Matrix, Classification Report & Accuracy Score

In [4]:
def cm_predict(classifier,X_test):
    y_pred=classifier.predict(X_test)
    from sklearn.metrics import confusion_matrix
    cm=confusion_matrix(Y_test,y_pred)
    from sklearn.metrics import accuracy_score
    from sklearn.metrics import classification_report
    Accuracy=accuracy_score(Y_test,y_pred)
    report=classification_report(Y_test,y_pred)
    return classifier,Accuracy,report,X_test,Y_test,cm

### Logistic Regression Function

In [5]:
def logistic(X_train,Y_train,X_test):
    from sklearn.linear_model import LogisticRegression
    classifier=LogisticRegression(random_state=0)
    classifier.fit(X_train,Y_train)
    classifier,Accuracy,report,X_test,Y_test,cm=cm_predict(classifier,X_test)
    return classifier,Accuracy,report,X_test,Y_test,cm

### SVM Linear Function

In [6]:
def svm_linear(X_train,Y_train,X_test):
    from sklearn.svm import SVC
    classifier=SVC(kernel='rbf',random_state=0)
    classifier.fit(X_train,Y_train)
    classifier,Accuracy,report,X_test,Y_test,cm=cm_predict(classifier,X_test)
    return classifier,Accuracy,report,X_test,Y_test,cm

### SVM Non-Linear Function

In [7]:
def svm_NL(X_train,Y_train,X_test):
    from sklearn.svm import SVC
    classifier=SVC(kernel='rbf',random_state=0)
    classifier.fit(X_train,Y_train)
    classifier,Accuracy,report,X_test,Y_test,cm=cm_predict(classifier,X_test)
    return classifier,Accuracy,report,X_test,Y_test,cm

### Naive Bayes Function

In [8]:
def Naive(X_train,Y_train,X_test):
    from sklearn.naive_bayes import GaussianNB
    classifier=GaussianNB()
    classifier.fit(X_train,Y_train)
    classifier,Accuracy,report,X_test,Y_test,cm=cm_predict(classifier,X_test)
    return classifier,Accuracy,report,X_test,Y_test,cm

### KNN Function

In [9]:
def knn(X_train,Y_train,X_test):
    from sklearn.neighbors import KNeighborsClassifier
    classifier=KNeighborsClassifier(n_neighbors=5,metric='minkowski',p=2)
    classifier.fit(X_train,Y_train)
    classifier,Accuracy,report,X_test,Y_test,cm=cm_predict(classifier,X_test)
    return classifier,Accuracy,report,X_test,Y_test,cm

### Decision Tree Function

In [10]:
def Decision(X_train,Y_train,X_test):
    from sklearn.tree import DecisionTreeClassifier
    classifier=DecisionTreeClassifier(criterion='entropy',random_state=0)
    classifier.fit(X_train,Y_train)
    classifier,Accuracy,report,X_test,Y_test,cm=cm_predict(classifier,X_test)
    return classifier,Accuracy,report,X_test,Y_test,cm

### Random Forest Function

In [11]:
def random(X_train,Y_train,X_test):
    from sklearn.ensemble import RandomForestClassifier
    classifier=RandomForestClassifier(n_estimators=10,criterion='entropy',random_state=0)
    classifier.fit(X_train,Y_train)
    classifier,Accuracy,report,X_test,Y_test,cm=cm_predict(classifier,X_test)
    return classifier,Accuracy,report,X_test,Y_test,cm

### Function to create Data Frame

In [12]:
def forward_classification(alog,asvml,asvmnl,aknn,anav,adt,arf):
    forwarddataframe=pd.DataFrame(index=['RandomForest','DecisionTree'],columns=['Logistic','SVML','SVMNL','KNN','NB','DecisionTree','RandomForest'])
    for number,idex in enumerate(forwarddataframe.index):
        forwarddataframe['Logistic'][idex]=alog[number]
        forwarddataframe['SVML'][idex]=asvml[number]
        forwarddataframe['SVMNL'][idex]=asvmnl[number]
        forwarddataframe['KNN'][idex]=aknn[number]
        forwarddataframe['NB'][idex]=anav[number]
        forwarddataframe['DecisionTree'][idex]=adt[number]
        forwarddataframe['RandomForest'][idex]=arf[number]
    return forwarddataframe

### Dataset Execution

In [28]:
dataset=pd.read_csv('prep.csv',index_col=None)
df=dataset
df=pd.get_dummies(df,drop_first=True)
indep_X=df.drop('classification_yes',1)
dep_Y=df['classification_yes']
forward_list=forward(indep_X,dep_Y,7)
alog=[]
asvml=[]
asvmnl=[]
aknn=[]
anav=[]
adt=[]
arf=[]
for i in forward_list:
    X_train,X_test,Y_train,Y_test=split_scale(i,dep_Y)
    
    classifier,Accuracy,report,X_test,Y_test,cm=logistic(X_train,Y_train,X_test)
    alog.append(Accuracy)
    
    classifier,Accuracy,report,X_test,Y_test,cm=svm_linear(X_train,Y_train,X_test)
    asvml.append(Accuracy)
    
    classifier,Accuracy,report,X_test,Y_test,cm=svm_NL(X_train,Y_train,X_test)
    asvmnl.append(Accuracy)
    
    classifier,Accuracy,report,X_test,Y_test,cm=knn(X_train,Y_train,X_test)
    aknn.append(Accuracy)
    
    classifier,Accuracy,report,X_test,Y_test,cm=Naive(X_train,Y_train,X_test)
    anav.append(Accuracy)
    
    classifier,Accuracy,report,X_test,Y_test,cm=Decision(X_train,Y_train,X_test)
    adt.append(Accuracy)
    
    classifier,Accuracy,report,X_test,Y_test,cm=random(X_train,Y_train,X_test)
    arf.append(Accuracy)

result=forward_classification(alog,asvml,asvmnl,aknn,anav,adt,arf)
result

RandomForestClassifier(criterion='entropy', n_estimators=10, random_state=0)
DecisionTreeClassifier(max_features='sqrt', random_state=0)


Unnamed: 0,Logistic,SVML,SVMNL,KNN,NB,DecisionTree,RandomForest
RandomForest,0.97,0.96,0.96,0.97,0.96,0.99,0.99
DecisionTree,0.98,0.96,0.96,0.99,0.91,0.99,0.99


In [21]:
result
# for n=3

Unnamed: 0,Logistic,SVML,SVMNL,KNN,NB,DecisionTree,RandomForest
RandomForest,0.98,0.96,0.96,0.99,0.91,0.99,0.99
DecisionTree,0.97,0.97,0.97,0.99,0.82,0.99,0.99


In [23]:
result
# for n=4

Unnamed: 0,Logistic,SVML,SVMNL,KNN,NB,DecisionTree,RandomForest
RandomForest,0.95,0.96,0.96,0.99,0.93,0.99,0.99
DecisionTree,0.97,0.97,0.97,0.99,0.83,0.99,0.99


In [25]:
result
# for n=5

Unnamed: 0,Logistic,SVML,SVMNL,KNN,NB,DecisionTree,RandomForest
RandomForest,0.96,0.96,0.96,0.99,0.96,0.99,0.99
DecisionTree,0.99,0.99,0.99,0.99,0.88,0.99,0.99


In [27]:
result
# for n=6

Unnamed: 0,Logistic,SVML,SVMNL,KNN,NB,DecisionTree,RandomForest
RandomForest,0.97,0.96,0.96,0.98,0.96,0.99,0.99
DecisionTree,0.98,0.96,0.96,0.99,0.91,0.99,0.97


In [29]:
result
# for n=7

Unnamed: 0,Logistic,SVML,SVMNL,KNN,NB,DecisionTree,RandomForest
RandomForest,0.97,0.96,0.96,0.97,0.96,0.99,0.99
DecisionTree,0.98,0.96,0.96,0.99,0.91,0.99,0.99
