In [1]:
from sklearn.feature_selection import SelectKBest,RFE
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

In [2]:
def rfe_best(indep,dep,n):
    log_est=LogisticRegression(max_iter=1000)
    dt_est=DecisionTreeClassifier()
    rf_est=RandomForestClassifier()
    estimators=[log_est,dt_est,rf_est]
    best_features=[]
    features_name=[]
    for estimator in estimators:
        selector=RFE(estimator,n_features_to_select=n)
        fit1=selector.fit(indep,dep)
        x=fit1.transform(indep)
        best_features.append(x)
        mask=fit1.get_support(indices=True)
        names=indep.columns[mask]
        features_name.append(names)
        
    return best_features,features_name

def split_scale(rfe_x,dep):
    from sklearn.model_selection import train_test_split
    from sklearn.preprocessing import StandardScaler
    x_train,x_test,y_train,y_test=train_test_split(rfe_x,dep,test_size=1/3,random_state=0)
    sc=StandardScaler()
    x_train_sc=sc.fit_transform(x_train)
    x_test_sc=sc.transform(x_test)
    return x_train_sc,x_test_sc,y_train,y_test 

def accuracy(model,x_train_sc,x_test_sc,y_train,y_test):
    y_pred=model.predict(x_test_sc)
    accuracy=accuracy_score(y_pred,y_test)
    return accuracy
    

def logistic(x_train_sc,x_test_sc,y_train,y_test):
    from sklearn.linear_model import LogisticRegression
    model=LogisticRegression()
    model.fit(x_train_sc,y_train)
    accu=accuracy(model,x_train_sc,x_test_sc,y_train,y_test)
    return accu

def svml(x_train_sc,x_test_sc,y_train,y_test):
    from sklearn.svm import SVC
    model=SVC(kernel='linear')
    model.fit(x_train_sc,y_train)
    accu=accuracy(model,x_train_sc,x_test_sc,y_train,y_test)
    return accu

def svm_nl(x_train_sc,x_test_sc,y_train,y_test):
    from sklearn.svm import SVC
    model=SVC(kernel='rbf')
    model.fit(x_train_sc,y_train)
    accu=accuracy(model,x_train_sc,x_test_sc,y_train,y_test)
    return accu

def naive(x_train_sc,x_test_sc,y_train,y_test):
    from sklearn.naive_bayes import GaussianNB
    model=GaussianNB()
    model.fit(x_train_sc,y_train)
    accu=accuracy(model,x_train_sc,x_test_sc,y_train,y_test)
    return accu
   

def knn(x_train_sc,x_test_sc,y_train,y_test):
    from sklearn.neighbors import KNeighborsClassifier
    model=KNeighborsClassifier(n_neighbors=5,metric='minkowski',p=2)
    model.fit(x_train_sc,y_train)
    accu=accuracy(model,x_train_sc,x_test_sc,y_train,y_test)
    return accu

def Decision(x_train_sc,x_test_sc,y_train,y_test):
    from sklearn.tree import DecisionTreeClassifier
    model= DecisionTreeClassifier(criterion='entropy')
    model.fit(x_train_sc,y_train)
    accu=accuracy(model,x_train_sc,x_test_sc,y_train,y_test)
    return accu

def random_forest(x_train_sc,x_test_sc,y_train,y_test):
    from sklearn.ensemble import RandomForestClassifier
    model= RandomForestClassifier(n_estimators=10,criterion='entropy')
    model.fit(x_train_sc,y_train)
    accu=accuracy(model,x_train_sc,x_test_sc,y_train,y_test)
    return accu




In [3]:
def best_combo(indep,dep,n):
    best_rfe,column=rfe_best(indep,dep,n)
    log_accuracy=[]
    svml_accuracy=[]
    svmnl_accuracy=[]
    naive_accuracy=[]
    knn_accuracy=[]
    dt_accuracy=[]
    rf_accuracy=[]
    for rfe_x in best_rfe:
        x_train_sc,x_test_sc,y_train,y_test=split_scale(rfe_x,dep)
        log_model=logistic(x_train_sc,x_test_sc,y_train,y_test)
        log_accuracy.append(log_model)
        svml_model=svml(x_train_sc,x_test_sc,y_train,y_test)
        svml_accuracy.append(svml_model)
        svmnl_model=svm_nl(x_train_sc,x_test_sc,y_train,y_test)
        svmnl_accuracy.append(svmnl_model)
        naive_model=naive(x_train_sc,x_test_sc,y_train,y_test)
        naive_accuracy.append(naive_model)
        knn_model=knn(x_train_sc,x_test_sc,y_train,y_test)
        knn_accuracy.append(knn_model)
        dt_model=Decision(x_train_sc,x_test_sc,y_train,y_test)
        dt_accuracy.append(dt_model)
        rf_model=random_forest(x_train_sc,x_test_sc,y_train,y_test)
        rf_accuracy.append(rf_model)
    result=[log_accuracy,svml_accuracy,svmnl_accuracy,naive_accuracy,knn_accuracy,dt_accuracy,rf_accuracy]
    import pandas as pd
    accuracy_data_frame=pd.DataFrame(result,columns=['Logistic','Decision Tree','Random Forest'],index=['Logistic','SVML','SVMNL','Naive','KNN','Decision Tree','Random Forest'])
    selected_column_name=pd.DataFrame(column,index=['Logistic','Decision Tree','Random Forest'])
    return accuracy_data_frame,selected_column_name
    

In [4]:
import warnings
warnings.filterwarnings('ignore')  # Use the function from the warnings module

In [5]:
import pandas as pd
dataset=pd.read_csv("CKD.csv")
dataset=pd.get_dummies(dataset,drop_first=True)
indep=dataset.drop('classification_yes',axis=1)
dep=dataset['classification_yes']

In [6]:
 best_combo(indep,dep,5)

(               Logistic  Decision Tree  Random Forest
 Logistic       0.977444       0.992481       0.977444
 SVML           0.977444       0.992481       0.969925
 SVMNL          0.977444       0.984962       0.977444
 Naive          0.977444       0.939850       0.872180
 KNN            0.977444       0.992481       0.969925
 Decision Tree  0.977444       0.969925       0.947368
 Random Forest  0.977444       0.984962       0.969925,
                   0     1     2        3        4
 Logistic         al  sg_c  sg_d  htn_yes   dm_yes
 Decision Tree  hrmo    rc  sg_c     sg_d  htn_yes
 Random Forest    al    sc  hrmo      pcv       rc)

In [7]:
accuracy_data_frame,selected_column_name=best_combo(indep,dep,5)

In [8]:
accuracy_data_frame

Unnamed: 0,Logistic,Decision Tree,Random Forest
Logistic,0.977444,0.992481,0.947368
SVML,0.977444,0.992481,0.969925
SVMNL,0.977444,0.984962,0.969925
Naive,0.977444,0.93985,0.909774
KNN,0.977444,0.992481,0.947368
Decision Tree,0.977444,0.969925,0.962406
Random Forest,0.977444,0.992481,0.954887


In [9]:
accuracy_data_frame.max()

Logistic         0.977444
Decision Tree    0.992481
Random Forest    0.969925
dtype: float64

In [11]:
accuracy_data_frame.idxmax()

Logistic         Logistic
Decision Tree    Logistic
Random Forest        SVML
dtype: object