In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
import time

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import RFE
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

In [3]:
def fe_importance(indep_x, dep_y, n):
    felist = []
    log_model = LogisticRegression(solver='lbfgs')
    RF = RandomForestClassifier(n_estimators=10, criterion='entropy', random_state=0)
    DT = DecisionTreeClassifier(criterion='gini', max_features='sqrt', splitter='best', random_state=0)
    svc_model = SVC(kernel='linear', random_state=0)
    femodellist = [log_model, svc_model, RF, DT]

    for model in femodellist:
        rfe = RFE(model, n_features_to_select=n)
        rfe.fit(indep_x, dep_y)
        feature_importances = rfe.ranking_
        felist.append((model, feature_importances))
    
    return felist

In [4]:
def split_scalar(indep_x, dep_y):
    x_train, x_test, y_train, y_test = train_test_split(indep_x, dep_y, test_size=0.25, random_state=0)
    sc = StandardScaler()
    x_train = sc.fit_transform(x_train)
    x_test = sc.transform(x_test)
    return x_train, x_test, y_train, y_test


In [5]:
def cm_prediction(classifier,x_test):
    y_pred=classifier.predict(x_test)
    from sklearn.metrics import confusion_matrix
    cm=confusion_matrix(y_test,y_pred)
    from sklearn.metrics import accuracy_score
    Accuracy=accuracy_score(y_test,y_pred)
    from sklearn.metrics import classification_report
    report=classification_report(y_test,y_pred)
    return classifier,Accuracy,report,x_test,y_test,cm

In [6]:
def logistic(x_train,y_train,x_test):
    from sklearn.linear_model import LogisticRegression
    classifier=LogisticRegression(random_state=0)
    classifier.fit(x_train,y_train)
    classifier,Accuracy,report,x_test,y_test,cm=cm_prediction(classifier,x_test)
    return classifier,Accuracy,report,x_test,y_test,cm

In [7]:
def svml(x_train,y_train,x_test):
    from sklearn.svm import SVC
    classifier=SVC(kernel='linear',random_state=0)
    classifier.fit(x_train,y_train)
    classifier,Accuracy,report,x_test,y_test,cm=cm_prediction(classifier,x_test)
    return classifier,Accuracy,report,x_test,y_test,cm

In [8]:
def svmnl(x_train,y_train,x_test):
    from sklearn.svm import SVC
    classifier=SVC(kernel='rbf',random_state=0)
    classifier.fit(x_train,y_train)
    classifier,Accuracy,report,x_test,y_test,cm=cm_prediction(classifier,x_test)
    return classifier,Accuracy,report,x_test,y_test,cm

In [9]:
def naive(x_train, y_train, x_test):
    from sklearn.naive_bayes import GaussianNB
    classifier = GaussianNB() 
    classifier.fit(x_train, y_train)
    classifier,Accuracy,report,x_test,y_test,cm=cm_prediction(classifier,x_test)
    return classifier,Accuracy,report,x_test,y_test,cm

In [10]:
def knn(x_train,y_train,x_test):
    from sklearn.neighbors import KNeighborsClassifier
    classifier=KNeighborsClassifier(n_neighbors=5,metric='minkowski',p=2)
    classifier.fit(x_train,y_train)
    classifier,Accuracy,report,x_test,y_test,cm=cm_prediction(classifier,x_test)
    return classifier,Accuracy,report,x_test,y_test,cm

In [11]:
def Decision (x_train,y_train,x_test):
    from sklearn.tree import DecisionTreeClassifier
    classifier=DecisionTreeClassifier(criterion='entropy',random_state=0)
    classifier.fit(x_train,y_train)
    classifier,Accuracy,report,x_test,y_test,cm=cm_prediction(classifier,x_test)
    return classifier,Accuracy,report,x_test,y_test,cm

In [12]:
def random(x_train,y_train,x_test):
    from sklearn.ensemble import RandomForestClassifier
    classifier=RandomForestClassifier(n_estimators=10,criterion='entropy',random_state=0)
    classifier.fit(x_train,y_train)
    classifier,Accuracy,report,x_test,y_test,cm=cm_prediction(classifier,x_test)
    return classifier,Accuracy,report,x_test,y_test,cm
    

In [47]:
def feature_importance (acclog,accsvml,accsvmnl,accnav,accknn,accdes,accrf):
    dataframe=pd.DataFrame(index=['logistic','svml','svmnl',' naive',' knn','Decision','random'],columns=['logistic','svml','svmnl','naive','knn','Decision','random'])
    min_length = min(len(acclog), len(accsvml), len(accsvmnl), len(accnav), len(accknn), len(accdes), len(accrf))

    for number,idex in enumerate (dataframe.index):
        dataframe['logistic'][idex]=acclog[:min_length]
        dataframe['svml'][idex]=accsvml[:min_length]
        dataframe['svmnl'][idex]=accsvmnl[:min_length]
        dataframe['naive'][idex]=accnav[:min_length]
        dataframe['knn'][idex]=accknn[:min_length]
        dataframe['Decision'][idex]=accdes[:min_length]
        dataframe['random'][idex]=accrf[:min_length]
    return dataframe

In [48]:
dataset1=pd.read_csv('CKD.csv',index_col=None)
df2=dataset1
df2=pd.get_dummies(df2,drop_first=True)
indep_x=df2.drop('classification_yes',1)
dep_y=df2['classification_yes']

In [49]:
acclog=[]
accsvml=[]
accsvmnl=[]
accknn=[]
accnav=[]
accdes=[]
accrf=[]

In [50]:
    x_train, x_test, y_train, y_test = split_scalar(indep_x, dep_y)
    classifier, Accuracy, report, x_test, y_test, cm = logistic(x_train, y_train, x_test)
    acclog.append(Accuracy)
    classifier, Accuracy, report, x_test, y_test, cm = svml(x_train, y_train, x_test)
    accsvml.append(Accuracy)
    classifier, Accuracy, report, x_test, y_test, cm=svmnl(x_train,y_train,x_test)
    accsvmnl.append(Accuracy)
    classifier, Accuracy, report, x_test, y_test, cm = naive(x_train, y_train, x_test)
    accnav.append(Accuracy)
    classifier, Accuracy, report, x_test, y_test, cm = knn(x_train, y_train, x_test)
    accknn.append(Accuracy)
    classifier, Accuracy, report, x_test, y_test, cm = Decision(x_train, y_train, x_test)
    accdes.append(Accuracy)
    classifier, Accuracy, report, x_test, y_test, cm = random(x_train, y_train, x_test)
    accrf.append(Accuracy)
    result=feature_importance(acclog,accsvml,accsvmnl,accnav,accknn,accdes,accrf)

In [51]:
result

Unnamed: 0,logistic,svml,svmnl,naive,knn,Decision,random
logistic,[0.99],[0.98],[0.99],[0.98],[0.97],[0.9],[0.99]
svml,[0.99],[0.98],[0.99],[0.98],[0.97],[0.9],[0.99]
svmnl,[0.99],[0.98],[0.99],[0.98],[0.97],[0.9],[0.99]
naive,[0.99],[0.98],[0.99],[0.98],[0.97],[0.9],[0.99]
knn,[0.99],[0.98],[0.99],[0.98],[0.97],[0.9],[0.99]
Decision,[0.99],[0.98],[0.99],[0.98],[0.97],[0.9],[0.99]
random,[0.99],[0.98],[0.99],[0.98],[0.97],[0.9],[0.99]
