In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split 
import time
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
import pickle
import matplotlib.pyplot as plt

In [2]:
def rfeFeature(indep_X, dep_Y, n):
    # Initialize an empty list to store the RFE feature selections
    rfelist = []
    
    # Define the classifiers/models that will be used for RFE
    log_model = LogisticRegression(solver='lbfgs')  # Logistic Regression model
    RF = RandomForestClassifier(n_estimators=10, criterion='entropy', random_state=0)  # Random Forest Classifier
    # NB = GaussianNB()
    DT = DecisionTreeClassifier(criterion='gini', max_features='sqrt', splitter='best', random_state=0)  # Decision Tree Classifier
    svc_model = SVC(kernel='linear', random_state=0)  # Support Vector Classifier (SVC) with linear kernel
    #knn = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
    # List of models to perform RFE on
    rfemodellist = [log_model, svc_model, RF, DT] 
    
    # Iterate through each model in the rfemodellist
    for i in rfemodellist:
        print(i)  # Print the current model being used
        
        # Apply Recursive Feature Elimination (RFE) to the current model
        log_rfe = RFE(estimator=i, n_features_to_select=n) # Select top 'n' features using RFE
        log_fit = log_rfe.fit(indep_X, dep_Y)  # Fit RFE model to the data
        
        # Transform the data to select only the 'n' features
        log_rfe_feature = log_fit.transform(indep_X)
        
        # Append the selected features to the rfelist
        rfelist.append(log_rfe_feature)
    
    return rfelist  # Return the list of RFE selected features for all models

In [3]:
#Standard scalar code
def split_scalar(indep_X,dep_Y):
        X_train, X_test, y_train, y_test = train_test_split(indep_X, dep_Y, test_size = 0.25, random_state = 0)
        sc = StandardScaler()
        X_train = sc.fit_transform(X_train)
        X_test = sc.transform(X_test)    
        return X_train, X_test, y_train, y_test
    

In [4]:
#Classification codes
def cm_prediction(classifier,X_test):
     y_pred = classifier.predict(X_test)
        
        # Making the Confusion Matrix
     from sklearn.metrics import confusion_matrix
     cm = confusion_matrix(y_test, y_pred)
        
     from sklearn.metrics import accuracy_score 
     from sklearn.metrics import classification_report 
        #from sklearn.metrics import confusion_matrix
        #cm = confusion_matrix(y_test, y_pred)
        
     Accuracy=accuracy_score(y_test, y_pred )
        
     report=classification_report(y_test, y_pred)
     return  classifier,Accuracy,report,X_test,y_test,cm


In [5]:
def logistic(X_train,y_train,X_test):       
        # Fitting K-NN to the Training set
        from sklearn.linear_model import LogisticRegression
        classifier = LogisticRegression(random_state = 0)
        classifier.fit(X_train, y_train)
        classifier,Accuracy,report,X_test,y_test,cm=cm_prediction(classifier,X_test)
        return  classifier,Accuracy,report,X_test,y_test,cm      

In [6]:
def svm_linear(X_train,y_train,X_test):
                
        from sklearn.svm import SVC
        classifier = SVC(kernel = 'linear', random_state = 0)
        classifier.fit(X_train, y_train)
        classifier,Accuracy,report,X_test,y_test,cm=cm_prediction(classifier,X_test)
        return  classifier,Accuracy,report,X_test,y_test,cm

In [7]:
def svm_NL(X_train,y_train,X_test):
                
        from sklearn.svm import SVC
        classifier = SVC(kernel = 'rbf', random_state = 0)
        classifier.fit(X_train, y_train)
        classifier,Accuracy,report,X_test,y_test,cm=cm_prediction(classifier,X_test)
        return  classifier,Accuracy,report,X_test,y_test,cm

In [8]:
def Navie(X_train,y_train,X_test):       
        # Fitting K-NN to the Training set
        from sklearn.naive_bayes import GaussianNB
        classifier = GaussianNB()
        classifier.fit(X_train, y_train)
        classifier,Accuracy,report,X_test,y_test,cm=cm_prediction(classifier,X_test)
        return  classifier,Accuracy,report,X_test,y_test,cm

In [9]:
def knn(X_train,y_train,X_test):
           
        # Fitting K-NN to the Training set
        from sklearn.neighbors import KNeighborsClassifier
        classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
        classifier.fit(X_train, y_train)
        classifier,Accuracy,report,X_test,y_test,cm=cm_prediction(classifier,X_test)
        return  classifier,Accuracy,report,X_test,y_test,cm

In [10]:
def Decision(X_train,y_train,X_test):
        
        # Fitting K-NN to the Training set
        from sklearn.tree import DecisionTreeClassifier
        classifier = DecisionTreeClassifier(criterion = 'entropy', random_state = 0)
        classifier.fit(X_train, y_train)
        classifier,Accuracy,report,X_test,y_test,cm=cm_prediction(classifier,X_test)
        return  classifier,Accuracy,report,X_test,y_test,cm 

In [11]:
def random(X_train,y_train,X_test):
        
        # Fitting K-NN to the Training set
        from sklearn.ensemble import RandomForestClassifier
        classifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)
        classifier.fit(X_train, y_train)
        classifier,Accuracy,report,X_test,y_test,cm=cm_prediction(classifier,X_test)
        return  classifier,Accuracy,report,X_test,y_test,cm

In [12]:
rfedataframe = pd.DataFrame(index=['Logistic', 'SVC', 'Decision', 'Random'])


In [13]:
rfedataframe

Logistic
SVC
Decision
Random


In [15]:
 rfedataframe = pd.DataFrame(index=['Logistic', 'SVC', 'Decision', 'Random'], columns=['Logistic', 'SVMl', 'SVMnl', 'KNN', 'Navie', 'Decision', 'Random'])

In [16]:
 rfedataframe 

Unnamed: 0,Logistic,SVMl,SVMnl,KNN,Navie,Decision,Random
Logistic,,,,,,,
SVC,,,,,,,
Decision,,,,,,,
Random,,,,,,,


In [50]:
def rfe_Classification(acclog, accsvml, accsvmnl, accknn, accnav, accdes, accrf):
    # Create the DataFrame
    rfedataframe = pd.DataFrame(index=['Logistic','SVC','Decision', 'Random'], columns=['Logistic', 'SVMl', 'SVMnl', 'KNN', 'Navie', 'Decision', 'Random'])
    
    # Populate the DataFrame
    for number,idex in enumerate(rfedataframe.index):
         rfedataframe.loc[ idex,'Logistic'] = acclog[number]
         rfedataframe.loc[idex,'SVMl'] = accsvml[number]
         rfedataframe.loc[idex,'SVMnl'] = accsvmnl[number]
         rfedataframe.loc[idex,'KNN'] = accknn[number]
         rfedataframe.loc[idex,'Navie'] = accnav[number]
         rfedataframe.loc[idex,'Decision'] = accdes[number]
         rfedataframe.loc[idex,'Random'] = accrf[number]
    
    return rfedataframe

In [51]:
dataset1=pd.read_csv("prep.csv",index_col=None)

df2=dataset1

In [52]:
df2 = pd.get_dummies(df2, drop_first=True)

In [53]:
indep_x=df2.drop('classification_yes',axis=1)
dep_Y=df2['classification_yes']

In [78]:
rfelist=rfeFeature(indep_x,dep_Y,7)   

LogisticRegression()


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

SVC(kernel='linear', random_state=0)
RandomForestClassifier(criterion='entropy', n_estimators=10, random_state=0)
DecisionTreeClassifier(max_features='sqrt', random_state=0)


In [79]:
acclog=[]
accsvml=[]
accsvmnl=[]
accknn=[]
accnav=[]
accdes=[]
accrf=[]

In [80]:
for i in rfelist:   
    X_train, X_test, y_train, y_test=split_scalar(i,dep_Y)   
    
        
    classifier,Accuracy,report,X_test,y_test,cm=logistic(X_train,y_train,X_test)
    acclog.append(Accuracy)
    
    classifier,Accuracy,report,X_test,y_test,cm=svm_linear(X_train,y_train,X_test)  
    accsvml.append(Accuracy)
    
    classifier,Accuracy,report,X_test,y_test,cm=svm_NL(X_train,y_train,X_test)  
    accsvmnl.append(Accuracy)
    
    classifier,Accuracy,report,X_test,y_test,cm=knn(X_train,y_train,X_test)  
    accknn.append(Accuracy)
    
    classifier,Accuracy,report,X_test,y_test,cm=Navie(X_train,y_train,X_test)  
    accnav.append(Accuracy)
    
    classifier,Accuracy,report,X_test,y_test,cm=Decision(X_train,y_train,X_test)  
    accdes.append(Accuracy)
    
    classifier,Accuracy,report,X_test,y_test,cm=random(X_train,y_train,X_test)  
    accrf.append(Accuracy)
    


In [81]:
acclog

[0.98, 0.99, 0.98, 0.97]

In [82]:
accsvml

[0.98, 0.99, 0.98, 0.97]

In [83]:
accsvmnl

[0.98, 0.99, 0.99, 0.98]

In [84]:
accknn

[0.96, 0.99, 0.98, 0.98]

In [85]:
accnav

[0.98, 0.99, 0.93, 0.91]

In [86]:
accdes

[0.99, 1.0, 0.95, 0.94]

In [87]:
accrf

[0.98, 0.99, 0.99, 0.97]

In [88]:
result = rfe_Classification(acclog, accsvml, accsvmnl, accknn, accnav, accdes, accrf)


In [65]:
result


Unnamed: 0,Logistic,SVMl,SVMnl,KNN,Navie,Decision,Random
Logistic,0.94,0.94,0.94,0.94,0.94,0.94,0.94
SVC,0.87,0.87,0.87,0.87,0.87,0.87,0.87
Decision,0.91,0.92,0.93,0.93,0.86,0.91,0.94
Random,0.93,0.93,0.94,0.95,0.74,0.95,0.97


In [77]:
result
#5

Unnamed: 0,Logistic,SVMl,SVMnl,KNN,Navie,Decision,Random
Logistic,0.98,0.98,0.98,0.98,0.98,0.98,0.98
SVC,0.99,0.99,0.99,0.99,0.99,0.99,0.99
Decision,0.97,0.97,0.97,0.96,0.87,0.93,0.97
Random,0.97,0.98,0.98,0.98,0.91,0.96,0.98


In [89]:
result
#7

Unnamed: 0,Logistic,SVMl,SVMnl,KNN,Navie,Decision,Random
Logistic,0.98,0.98,0.98,0.96,0.98,0.99,0.98
SVC,0.99,0.99,0.99,0.99,0.99,1.0,0.99
Decision,0.98,0.98,0.99,0.98,0.93,0.95,0.99
Random,0.97,0.97,0.98,0.98,0.91,0.94,0.97
