In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split 
import time
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
import pickle
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier   
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier

In [3]:
def featureImportance(indep_X,dep_Y,n):
    importances_list=[]
    log_model = LogisticRegression(solver='lbfgs',max_iter=2000)
    RF = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)
    #NB = GaussianNB()
    DT= DecisionTreeClassifier(criterion = 'gini', max_features='sqrt',splitter='best',random_state = 0)
    svc_model = SVC(kernel = 'linear', random_state = 0)
    #knn = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
    feature_imp=[log_model,svc_model,RF,DT]
    for model in feature_imp:
        print(model)
        model.fit(indep_X,dep_Y)
        if hasattr(model, "coef_"):
            importances = model.coef_[0]
        elif hasattr(model, 'feature_importances_'):
            importances = model.feature_importances_
        else:
            continue
        # Get top n feature indices
        top_n_indices = np.argsort(importances)[-n:]
        # Extract top n columns from indep_X
        selected_features = indep_X.iloc[:, top_n_indices]
        importances_list.append(selected_features)
    return importances_list

In [5]:
def split_scalar(indep_X,dep_Y):
        X_train, X_test, y_train, y_test = train_test_split(indep_X, dep_Y, test_size = 0.25, random_state = 0)
        sc = StandardScaler()
        X_train = sc.fit_transform(X_train)
        X_test = sc.transform(X_test)
        return X_train, X_test, y_train, y_test

In [7]:
def cm_prediction(classifier,X_test):
    y_pred = classifier.predict(X_test)
    from sklearn.metrics import confusion_matrix
    cm = confusion_matrix(y_test, y_pred)
    from sklearn.metrics import accuracy_score 
    from sklearn.metrics import classification_report 
    Accuracy=accuracy_score(y_test, y_pred )
    report=classification_report(y_test, y_pred)
    return  classifier,Accuracy,report,X_test,y_test,cm

In [9]:
def logistic(X_train,y_train,X_test):      
    from sklearn.linear_model import LogisticRegression
    classifier = LogisticRegression(random_state = 0)
    classifier.fit(X_train, y_train)
    classifier,Accuracy,report,X_test,y_test,cm=cm_prediction(classifier,X_test)
    return  classifier,Accuracy,report,X_test,y_test,cm

In [11]:
def svm_linear(X_train,y_train,X_test):
    from sklearn.svm import SVC
    classifier = SVC(kernel = 'linear', random_state = 0)
    classifier.fit(X_train, y_train)
    classifier,Accuracy,report,X_test,y_test,cm=cm_prediction(classifier,X_test)
    return  classifier,Accuracy,report,X_test,y_test,cm

In [13]:
def svm_NL(X_train,y_train,X_test):
    from sklearn.svm import SVC
    classifier = SVC(kernel = 'rbf', random_state = 0)
    classifier.fit(X_train, y_train)
    classifier,Accuracy,report,X_test,y_test,cm=cm_prediction(classifier,X_test)
    return  classifier,Accuracy,report,X_test,y_test,cm

In [15]:
def Navie(X_train,y_train,X_test):       
    from sklearn.naive_bayes import GaussianNB
    classifier = GaussianNB()
    classifier.fit(X_train, y_train)
    classifier,Accuracy,report,X_test,y_test,cm=cm_prediction(classifier,X_test)
    return  classifier,Accuracy,report,X_test,y_test,cm 

In [17]:
def knn(X_train,y_train,X_test):
    from sklearn.neighbors import KNeighborsClassifier
    classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
    classifier.fit(X_train, y_train)
    classifier,Accuracy,report,X_test,y_test,cm=cm_prediction(classifier,X_test)
    return  classifier,Accuracy,report,X_test,y_test,cm 

In [19]:
def Decision(X_train,y_train,X_test):
    from sklearn.tree import DecisionTreeClassifier
    classifier = DecisionTreeClassifier(criterion = 'entropy', random_state = 0)
    classifier.fit(X_train, y_train)
    classifier,Accuracy,report,X_test,y_test,cm=cm_prediction(classifier,X_test)
    return  classifier,Accuracy,report,X_test,y_test,cm      


In [21]:
def random(X_train,y_train,X_test):
    from sklearn.ensemble import RandomForestClassifier
    classifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)
    classifier.fit(X_train, y_train)
    classifier,Accuracy,report,X_test,y_test,cm=cm_prediction(classifier,X_test)
    return  classifier,Accuracy,report,X_test,y_test,cm

In [25]:
def feature_classification(acclog,accsvml,accsvmnl,accknn,accnav,accdes,accrf): 
    feature_dataframe=pd.DataFrame(index=['Logistic','SVC','Random','DecisionTree'],columns=['Logistic','SVMl','SVMnl','KNN','Navie','Decision','Random'])
    for number,idex in enumerate(feature_dataframe.index):
        feature_dataframe.loc[idex,'Logistic']=acclog[number]
        feature_dataframe.loc[idex,'SVMl']=accsvml[number]
        feature_dataframe.loc[idex,'SVMnl']=accsvmnl[number]
        feature_dataframe.loc[idex,'KNN']=accknn[number]
        feature_dataframe.loc[idex,'Navie']=accnav[number]
        feature_dataframe.loc[idex,'Decision']=accdes[number]
        feature_dataframe.loc[idex,'Random']=accrf[number]
    return feature_dataframe

In [27]:
df=pd.read_csv("heart_clean_final.csv")
df2=df

In [29]:
indep_X=df2.drop('target',axis=1)
dep_Y=df2['target']

In [31]:
X_train,X_test,y_train,y_test=split_scalar(indep_X,dep_Y)

In [35]:
importances_list=featureImportance(indep_X,dep_Y,5)       
acclog=[]
accsvml=[]
accsvmnl=[]
accknn=[]
accnav=[]
accdes=[]
accrf=[]
for i in importances_list:   
    X_train, X_test, y_train, y_test=split_scalar(i,dep_Y)   
    classifier,Accuracy,report,X_test,y_test,cm=logistic(X_train,y_train,X_test)
    acclog.append(Accuracy)
    
    classifier,Accuracy,report,X_test,y_test,cm=svm_linear(X_train,y_train,X_test)  
    accsvml.append(Accuracy)
    
    classifier,Accuracy,report,X_test,y_test,cm=svm_NL(X_train,y_train,X_test)  
    accsvmnl.append(Accuracy)
    
    classifier,Accuracy,report,X_test,y_test,cm=knn(X_train,y_train,X_test)  
    accknn.append(Accuracy)
    
    classifier,Accuracy,report,X_test,y_test,cm=Navie(X_train,y_train,X_test)  
    accnav.append(Accuracy)
    
    classifier,Accuracy,report,X_test,y_test,cm=Decision(X_train,y_train,X_test)  
    accdes.append(Accuracy)
    
    classifier,Accuracy,report,X_test,y_test,cm=random(X_train,y_train,X_test)  
    accrf.append(Accuracy)
    
result=feature_classification(acclog,accsvml,accsvmnl,accknn,accnav,accdes,accrf)

result


LogisticRegression(max_iter=2000)
SVC(kernel='linear', random_state=0)
RandomForestClassifier(criterion='entropy', n_estimators=10, random_state=0)
DecisionTreeClassifier(max_features='sqrt', random_state=0)


Unnamed: 0,Logistic,SVMl,SVMnl,KNN,Navie,Decision,Random
Logistic,0.804348,0.826087,0.826087,0.891304,0.826087,0.73913,0.717391
SVC,0.804348,0.826087,0.826087,0.891304,0.826087,0.73913,0.695652
Random,0.826087,0.826087,0.847826,0.847826,0.804348,0.782609,0.782609
DecisionTree,0.891304,0.934783,0.847826,0.891304,0.869565,0.673913,0.869565
