In [1]:

# importing lib 
import pandas as pd
import numpy as np
import time
import pickle
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectKBest,chi2
from sklearn.feature_selection import RFE

In [2]:

# RFE
def rfefeature(indep_X,dep_Y,n):
    rfelist =[]
    log_model = LogisticRegression(solver='lbfgs')
    svm_model = SVC(kernel='linear',random_state =0)
    dt_model = DecisionTreeClassifier(criterion ='gini',max_features ='sqrt',splitter ='best',random_state =0)
    rf_model = RandomForestClassifier(n_estimators =10, criterion ='entropy', random_state =0)
#     nb_model = GaussianNB(priors=None)
    rfemodellist = [log_model,svm_model,dt_model,rf_model]
    for i in rfemodellist:
        print(i)
        # https://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.RFE.html
        log_rfe = RFE(estimator=i, n_features_to_select=n)
        print(log_rfe)
        log_fit = log_rfe.fit(indep_X,dep_Y)
        log_rfe_feature= log_fit.transform(indep_X)
        rfelist.append(log_rfe_feature)
    return rfelist

In [3]:
def split_scalar(indep_X,dep_Y):
    X_train,X_test,Y_train,Y_test = train_test_split(indep_X,dep_Y,test_size=0.30,random_state=0)
    from sklearn.preprocessing import StandardScaler
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)
    
    return X_train,X_test,Y_train,Y_test

In [4]:
def cm_pred(classifier,X_test):
    test_pred = classifier.predict(X_test)
    
    from sklearn.metrics import confusion_matrix
    cm = confusion_matrix(Y_test,test_pred)
    
    from sklearn.metrics import accuracy_score
    from sklearn.metrics import classification_report
    
    accuracy = accuracy_score(Y_test,test_pred)
    
    report = classification_report(Y_test,test_pred)
    return classifier,accuracy,report,X_test,Y_test,cm

In [5]:
def log(X_train,X_test,Y_train):
    classifier = LogisticRegression(random_state =0)
    classifier.fit(X_train,Y_train)
    classifier,accuracy,report,X_test,Y_test,cm = cm_pred(classifier,X_test)
    return classifier,accuracy,report,X_test,Y_test,cm

In [6]:

def svm(X_train,X_test,Y_train):
    classifier = SVC(kernel='linear',random_state =0)
    classifier.fit(X_train,Y_train)
    classifier,accuracy,report,X_test,Y_test,cm = cm_pred(classifier,X_test)
    return classifier,accuracy,report,X_test,Y_test,cm

In [7]:
def dtree(X_train,X_test,Y_train):
    classifier = DecisionTreeClassifier(criterion ='gini',max_features ='sqrt',splitter ='best',random_state =0)
    classifier.fit(X_train,Y_train)
    classifier,accuracy,report,X_test,Y_test,cm = cm_pred(classifier,X_test)
    return classifier,accuracy,report,X_test,Y_test,cm

In [8]:

def random(X_train,X_test,Y_train):
    classifier =RandomForestClassifier(n_estimators =10, criterion ='entropy', random_state =0)
    classifier.fit(X_train,Y_train)
    classifier,accuracy,report,X_test,Y_test,cm = cm_pred(classifier,X_test)
    return classifier,accuracy,report,X_test,Y_test,cm

In [9]:

def nb(X_train,X_test,Y_train):
    classifier = GaussianNB()
    classifier.fit(X_train,Y_train)
    classifier,accuracy,report,X_test,Y_test,cm = cm_pred(classifier,X_test)
    return classifier,accuracy,report,X_test,Y_test,cm

In [10]:
def knn(X_train,X_test,Y_train):
    from sklearn.neighbors import KNeighborsClassifier
    classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
    classifier.fit(X_train, Y_train)
    classifier,accuracy,report,X_test,Y_test,cm=cm_pred(classifier,X_test)
    return  classifier,accuracy,report,X_test,Y_test,cm

In [11]:
def rfe_classification(acclog,accsvm,accdtree,accrandom,accnb,accknn):
    rfe_df =pd.DataFrame(index =['Logistic','SVM','DecisionTree','RandomForest',],columns = ['Logistic','SVM','DecisionTree','RandomForest','NavieBayes','KNN'])
    for number,idex in enumerate(rfe_df.index):
        rfe_df['Logistic'][idex]=acclog[number]
        rfe_df['SVM'][idex]=accsvm[number]
        rfe_df['DecisionTree'][idex]=accdtree[number]
        rfe_df['RandomForest'][idex]=accrandom[number]
        rfe_df['NavieBayes'][idex]=accnb[number]
        rfe_df['KNN'][idex]=accknn[number]

    return rfe_df

In [12]:
dataset =pd.read_csv('Pre_loan_data.csv',index_col=None)
df = dataset

In [13]:
df

Unnamed: 0,Age,Experience,Income,Family,CCAvg,Education,Mortgage,Personal Loan,Securities Account,CD Account,Online,CreditCard
0,25,1,4.083,4,1.6,1,0,0,1,0,0,0
1,45,19,2.833,3,1.5,1,0,0,1,0,0,0
2,39,15,0.917,1,1.0,1,0,0,0,0,0,0
3,35,9,8.333,1,2.7,2,0,0,0,0,0,0
4,35,8,3.750,4,1.0,2,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...
4995,29,3,3.333,1,1.9,3,0,0,0,0,1,0
4996,30,4,1.250,4,0.4,1,85,0,0,0,1,0
4997,63,39,2.000,2,0.3,3,0,0,0,0,0,0
4998,65,40,4.083,3,0.5,2,0,0,0,0,1,0


In [14]:
indep_X = df.drop('Personal Loan',axis =1)
dep_Y = df['Personal Loan']

In [25]:
rfelist = rfefeature(indep_X,dep_Y,11)
acclog=[]
accsvm=[]
accdtree=[]
accrandom=[]
accnb=[]
accknn=[]

for i in rfelist:
    X_train,X_test,Y_train,Y_test = split_scalar(i,dep_Y)
    classifier,accuracy,report,X_test,Y_test,cm =log(X_train,X_test,Y_train)
    acclog.append(accuracy)
    classifier,accuracy,report,X_test,Y_test,cm =svm(X_train,X_test,Y_train)
    accsvm.append(accuracy)
    classifier,accuracy,report,X_test,Y_test,cm =dtree(X_train,X_test,Y_train)
    accdtree.append(accuracy)
    classifier,accuracy,report,X_test,Y_test,cm =random(X_train,X_test,Y_train)
    accrandom.append(accuracy)
    classifier,accuracy,report,X_test,Y_test,cm =nb(X_train,X_test,Y_train)
    accnb.append(accuracy)
    classifier,accuracy,report,X_test,Y_test,cm =knn(X_train,X_test,Y_train)
    accknn.append(accuracy)

LogisticRegression()
RFE(estimator=LogisticRegression(), n_features_to_select=11)
SVC(kernel='linear', random_state=0)
RFE(estimator=SVC(kernel='linear', random_state=0), n_features_to_select=11)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


DecisionTreeClassifier(max_features='sqrt', random_state=0)
RFE(estimator=DecisionTreeClassifier(max_features='sqrt', random_state=0),
    n_features_to_select=11)
RandomForestClassifier(criterion='entropy', n_estimators=10, random_state=0)
RFE(estimator=RandomForestClassifier(criterion='entropy', n_estimators=10,
                                     random_state=0),
    n_features_to_select=11)


In [26]:
result = rfe_classification(acclog,accsvm,accdtree,accrandom,accnb,accknn)

In [18]:
# result  - 5

Unnamed: 0,Logistic,SVM,DecisionTree,RandomForest,NavieBayes,KNN
Logistic,0.957333,0.958,0.982,0.978,0.926,0.978
SVM,0.954,0.955333,0.967333,0.963333,0.923333,0.968667
DecisionTree,0.954667,0.954,0.986667,0.989333,0.918667,0.979333
RandomForest,0.95,0.952,0.990667,0.986,0.901333,0.981333


In [21]:
# result # 7

Unnamed: 0,Logistic,SVM,DecisionTree,RandomForest,NavieBayes,KNN
Logistic,0.956667,0.956,0.972,0.976,0.923333,0.980667
SVM,0.956667,0.956,0.972,0.976,0.923333,0.980667
DecisionTree,0.952,0.952667,0.974,0.984667,0.901333,0.977333
RandomForest,0.95,0.951333,0.978,0.986667,0.887333,0.973333


In [24]:
# result # 9

Unnamed: 0,Logistic,SVM,DecisionTree,RandomForest,NavieBayes,KNN
Logistic,0.956667,0.957333,0.966,0.98,0.924,0.974
SVM,0.954667,0.954,0.984,0.983333,0.898667,0.97
DecisionTree,0.950667,0.950667,0.982,0.988667,0.887333,0.967333
RandomForest,0.950667,0.950667,0.982,0.988667,0.887333,0.967333


In [27]:
result #4

Unnamed: 0,Logistic,SVM,DecisionTree,RandomForest,NavieBayes,KNN
Logistic,0.954,0.954667,0.964,0.986,0.886,0.967333
SVM,0.954,0.954667,0.964,0.986,0.886,0.967333
DecisionTree,0.954,0.954667,0.964,0.986,0.886,0.967333
RandomForest,0.954,0.954667,0.964,0.986,0.886,0.967333
