In [1]:
import pandas as pd
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier

In [2]:
def rfe_select(indep_X,dep_Y):
    
    rfelist=[]
    
    log_model=LogisticRegression(solver='lbfgs')
    SVML=SVC(kernel='linear')
    DT=DecisionTreeClassifier(criterion = 'entropy', random_state = 0)
    RF=RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)
    #KNN=KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
    #navie=GaussianNB()
    
    modellist=[log_model,SVML,DT,RF]#[KNN,navie]
    
    for i in modellist:
        rfe = RFE(i, n_features_to_select=5)
        fit = rfe.fit(indep_X,dep_Y)
        rfe_feat=fit.transform(indep_X)
        rfelist.append(rfe_feat)
    return rfelist

In [3]:
def split_scaler(rfeVar,dep_y):
    #X=rfe_feat
    from sklearn.model_selection import train_test_split
    from sklearn.preprocessing import StandardScaler
    X_train, X_test, y_train, y_test = train_test_split(rfeVar, dep_Y, test_size = 0.25, random_state = 0)
    sc=StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)
    return X_train, X_test, y_train, y_test

In [4]:
def cm_prediction(classifier,X_test,y_test):
    y_pred = classifier.predict(X_test)
    from sklearn.metrics import confusion_matrix
    from sklearn.metrics import classification_report 
    from sklearn.metrics import accuracy_score 
    cm = confusion_matrix(y_test, y_pred)
    Accuracy=accuracy_score(y_test, y_pred )
    report=classification_report(y_test, y_pred) 
    return classifier,X_test,y_test,Accuracy,report,cm

In [5]:
def logistic(X_train, y_train, X_test, y_test):
    from sklearn.linear_model import LogisticRegression
    classifier = LogisticRegression(random_state = 0)
    classifier.fit(X_train, y_train)
    classifier,X_test,y_test,Accuracy,report,cm = cm_prediction(classifier,X_test,y_test)
    return classifier,X_test,y_test,Accuracy,report,cm

def svm_linear(X_train,y_train,X_test,y_test):
    from sklearn.svm import SVC
    classifier = SVC(kernel='linear')
    classifier.fit(X_train, y_train)
    classifier,Accuracy,report,X_test,y_test,cm=cm_prediction(classifier,X_test,y_test)
    return  classifier,Accuracy,report,X_test,y_test,cm

def svm_NL_linear(X_train,y_train,X_test,y_test):
    from sklearn.svm import SVC
    classifier = SVC(kernel='rbf')
    classifier.fit(X_train, y_train)
    classifier,Accuracy,report,X_test,y_test,cm=cm_prediction(classifier,X_test,y_test)
    return  classifier,Accuracy,report,X_test,y_test,cm 

def DTree(X_train,y_train,X_test,y_test):
    from sklearn.tree import DecisionTreeClassifier
    classifier = DecisionTreeClassifier(criterion = 'entropy', random_state = 0)
    classifier.fit(X_train, y_train)
    classifier,Accuracy,report,X_test,y_test,cm=cm_prediction(classifier,X_test,y_test)
    return  classifier,Accuracy,report,X_test,y_test,cm

def RForest(X_train,y_train,X_test,y_test):
    from sklearn.ensemble import RandomForestClassifier
    classifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)
    classifier.fit(X_train, y_train)
    classifier,Accuracy,report,X_test,y_test,cm=cm_prediction(classifier,X_test,y_test)
    return  classifier,Accuracy,report,X_test,y_test,cm    

def knn(X_train,y_train,X_test,y_test):
    from sklearn.neighbors import KNeighborsClassifier
    classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
    classifier.fit(X_train, y_train)
    classifier,Accuracy,report,X_test,y_test,cm=cm_prediction(classifier,X_test,y_test)
    return  classifier,Accuracy,report,X_test,y_test,cm

def naive(X_train,y_train,X_test,y_test):
    from sklearn.naive_bayes import GaussianNB
    classifier = GaussianNB()
    classifier.fit(X_train, y_train)
    classifier,Accuracy,report,X_test,y_test,cm=cm_prediction(classifier,X_test,y_test)
    return  classifier,Accuracy,report,X_test,y_test,cm


In [6]:
def rfe_classification(acclog,accsvml,accsvmnl,accdes,accrf,accknn,accnai): 
    
    rfedataframe=pd.DataFrame(index=['Logistic','SVMl','DTree','RForest'],columns=['Logistic','SVMl','SVMNl','DTree','RForest','knn','naive'])

    for number,idex in enumerate(rfedataframe.index):
        rfedataframe['Logistic'][idex]=acclog[number]       
        rfedataframe['SVMl'][idex]=accsvml[number]
        rfedataframe['SVMNl'][idex]=accsvmnl[number]
        rfedataframe['DTree'][idex]=accdes[number]
        rfedataframe['RForest'][idex]=accrf[number]
        rfedataframe['knn'][idex]=accknn[number]
        rfedataframe['naive'][idex]=accnai[number]
    return rfedataframe

In [7]:
dataset1=pd.read_csv("prep.csv",index_col=None)
df2=dataset1
df2 = pd.get_dummies(df2, drop_first=True)

indep_X=df2.drop('classification_yes',axis = 1)
dep_Y=df2['classification_yes']

# Feature Selection
rfeVar=rfe_select(indep_X,dep_Y)

acclog=[]
accsvml=[]
accsvmnl=[]
accdes=[]
accrf=[]
accknn=[]
accnai=[]


for i in rfeVar:
    # train_test split with stdScaler
    X_train, X_test, y_train, y_test=split_scaler(i,dep_Y)
   
    classifier,X_test,y_test,Accuracy,report,cm=logistic(X_train, y_train, X_test, y_test)
    acclog.append(Accuracy)
    
    classifier,X_test,y_test,Accuracy,report,cm=svm_linear(X_train,y_train,X_test,y_test)
    accsvml.append(Accuracy)
    
    classifier,X_test,y_test,Accuracy,report,cm=svm_NL_linear(X_train,y_train,X_test,y_test)
    accsvmnl.append(Accuracy)
    
    classifier,X_test,y_test,Accuracy,report,cm=DTree(X_train,y_train,X_test,y_test)
    accdes.append(Accuracy)
    
    classifier,X_test,y_test,Accuracy,report,cm = RForest(X_train,y_train,X_test,y_test)
    accrf.append(Accuracy)
    
    classifier,X_test,y_test,Accuracy,report,cm=knn(X_train,y_train,X_test,y_test)
    accknn.append(Accuracy)
    
    classifier,X_test,y_test,Accuracy,report,cm=naive(X_train,y_train,X_test,y_test)
    accnai.append(Accuracy)
    
    

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

In [8]:
result=rfe_classification(acclog,accsvml,accsvmnl,accdes,accrf,accknn,accnai)

In [9]:
result

Unnamed: 0,Logistic,SVMl,SVMNl,DTree,RForest,knn,naive
Logistic,0.98,0.98,0.98,0.98,0.98,0.98,0.98
SVMl,0.99,0.99,0.99,0.99,0.99,0.99,0.99
DTree,1.0,1.0,1.0,0.99,0.99,1.0,0.93
RForest,0.97,0.97,0.98,0.96,0.98,0.97,0.91


In this case RFE - both Regression/classification some algorithms are not perform 
eg:SVM non-linear , Knearestclassifier & Gaussian Naviebayes beacuse in these algorthms not having coeffiecnt & feature importance so that we will not use these algorithms for feature selection

