### Importing Libraries

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
import time
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
import pickle
import matplotlib.pyplot as plt

### Select K Best Algorithm Function

In [2]:
def selectkbest(indep_X,dep_Y,n):
    test=SelectKBest(score_func=chi2 , k=n)
    fit1=test.fit(indep_X,dep_Y)
    select_features=fit1.transform(indep_X)
    return select_features

### Train Test Split and Standard Scaler Function

In [3]:
def split_scale(indep_X,dep_Y):
    X_train,X_test,Y_train,Y_test=train_test_split(indep_X,dep_Y,test_size=0.25,random_state=0)
    sc=StandardScaler()
    X_train=sc.fit_transform(X_train)
    X_test=sc.transform(X_test)
    return X_train,X_test,Y_train,Y_test

### Confusion Matrix for classification data Function

In [4]:
def cm_predict(classifier,X_test):
    y_pred=classifier.predict(X_test)
    from sklearn.metrics import confusion_matrix
    cm=confusion_matrix(Y_test,y_pred)
    from sklearn.metrics import accuracy_score
    from sklearn.metrics import classification_report
    Accuracy=accuracy_score(Y_test,y_pred)
    report=classification_report(Y_test,y_pred)
    return classifier,Accuracy,report,X_test,Y_test,cm

### Logistic Regression Function

In [5]:
def logistic(X_train,Y_train,X_test):
    from sklearn.linear_model import LogisticRegression
    classifier=LogisticRegression(random_state=0)
    classifier.fit(X_train,Y_train)
    classifier,Accuracy,report,X_test,Y_test,cm=cm_predict(classifier,X_test)
    return classifier,Accuracy,report,X_test,Y_test,cm

### SVM Linear Function

In [6]:
def svm_linear(X_train,Y_train,X_test):
    from sklearn.svm import SVC
    classifier=SVC(kernel='rbf',random_state=0)
    classifier.fit(X_train,Y_train)
    classifier,Accuracy,report,X_test,Y_test,cm=cm_predict(classifier,X_test)
    return classifier,Accuracy,report,X_test,Y_test,cm

### SVM Non-Linear Function

In [7]:
def svm_NL(X_train,Y_train,X_test):
    from sklearn.svm import SVC
    classifier=SVC(kernel='rbf',random_state=0)
    classifier.fit(X_train,Y_train)
    classifier,Accuracy,report,X_test,Y_test,cm=cm_predict(classifier,X_test)
    return classifier,Accuracy,report,X_test,Y_test,cm

### Naive Bayes Function

In [8]:
def Naive(X_train,Y_train,X_test):
    from sklearn.naive_bayes import GaussianNB
    classifier=GaussianNB()
    classifier.fit(X_train,Y_train)
    classifier,Accuracy,report,X_test,Y_test,cm=cm_predict(classifier,X_test)
    return classifier,Accuracy,report,X_test,Y_test,cm

### KNN Function

In [9]:
def knn(X_train,Y_train,X_test):
    from sklearn.neighbors import KNeighborsClassifier
    classifier=KNeighborsClassifier(n_neighbors=5,metric='minkowski',p=2)
    classifier.fit(X_train,Y_train)
    classifier,Accuracy,report,X_test,Y_test,cm=cm_predict(classifier,X_test)
    return classifier,Accuracy,report,X_test,Y_test,cm

### Decision Tree Function

In [10]:
def Decision(X_train,Y_train,X_test):
    from sklearn.tree import DecisionTreeClassifier
    classifier=DecisionTreeClassifier(criterion='entropy',random_state=0)
    classifier.fit(X_train,Y_train)
    classifier,Accuracy,report,X_test,Y_test,cm=cm_predict(classifier,X_test)
    return classifier,Accuracy,report,X_test,Y_test,cm

### Random Forest Function

In [11]:
def random(X_train,Y_train,X_test):
    from sklearn.ensemble import RandomForestClassifier
    classifier=RandomForestClassifier(n_estimators=10,criterion='entropy',random_state=0)
    classifier.fit(X_train,Y_train)
    classifier,Accuracy,report,X_test,Y_test,cm=cm_predict(classifier,X_test)
    return classifier,Accuracy,report,X_test,Y_test,cm

### Create Dataframe

In [12]:
def selectk_classification(alogi,asvml,asvmnl,aknn,anb,adec,arf):
    dataframe=pd.DataFrame(index=['ChiSquare'],columns=['Logistic','SVML','SVMNL','KNN','NaiveBayes','DecisionTree','RandomForest'])
    for number,idex in enumerate(dataframe.index):
        dataframe['Logistic'][idex]=alogi[number]
        dataframe['SVML'][idex]=asvml[number]
        dataframe['SVMNL'][idex]=asvmnl[number]
        dataframe['KNN'][idex]=aknn[number]
        dataframe['NaiveBayes'][idex]=anb[number]
        dataframe['DecisionTree'][idex]=adec[number]
        dataframe['RandomForest'][idex]=arf[number]
    return dataframe

### Dataset Execution

In [13]:
dataset=pd.read_csv('prep.csv',index_col=None)
df=dataset
df=pd.get_dummies(df,drop_first=True)
indep_X=df.drop('classification_yes',1)
dep_Y=df['classification_yes']

In [22]:
kbest=selectkbest(indep_X,dep_Y,6)

In [23]:
alogi=[]
asvml=[]
asvmnl=[]
aknn=[]
anb=[]
adec=[]
arf=[]

In [24]:
X_train,X_test,Y_train,Y_test=split_scale(kbest,dep_Y)

In [25]:
classifier,Accuracy,report,X_test,Y_test,cm=logistic(X_train,Y_train,X_test)
alogi.append(Accuracy)
classifier,Accuracy,report,X_test,Y_test,cm=svm_linear(X_train,Y_train,X_test)
asvml.append(Accuracy)
classifier,Accuracy,report,X_test,Y_test,cm=svm_NL(X_train,Y_train,X_test)
asvmnl.append(Accuracy)
classifier,Accuracy,report,X_test,Y_test,cm=knn(X_train,Y_train,X_test)
aknn.append(Accuracy)
classifier,Accuracy,report,X_test,Y_test,cm=Naive(X_train,Y_train,X_test)
anb.append(Accuracy)
classifier,Accuracy,report,X_test,Y_test,cm=Decision(X_train,Y_train,X_test)
adec.append(Accuracy)
classifier,Accuracy,report,X_test,Y_test,cm=random(X_train,Y_train,X_test)
arf.append(Accuracy)

result=selectk_classification(alogi,asvml,asvmnl,aknn,anb,adec,arf)
result

  old_joblib = LooseVersion(joblib_version) < LooseVersion('0.12')
  old_joblib = LooseVersion(joblib_version) < LooseVersion('0.12')
  if _joblib.__version__ >= LooseVersion('0.12'):
  if _joblib.__version__ >= LooseVersion('0.12'):


Unnamed: 0,Logistic,SVML,SVMNL,KNN,NaiveBayes,DecisionTree,RandomForest
ChiSquare,0.96,0.96,0.96,0.93,0.89,0.97,0.97
