## RFE _Classification - Best Model

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split 
import time
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import RFE
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
import pickle
import matplotlib.pyplot as plt

In [2]:
raw_dataset = pd.read_csv("CKD.csv", index_col=None)
raw_dataset

Unnamed: 0,age,bp,sg,al,su,rbc,pc,pcc,ba,bgr,...,pcv,wc,rc,htn,dm,cad,appet,pe,ane,classification
0,2.000000,76.459948,c,3.0,0.0,normal,abnormal,notpresent,notpresent,148.112676,...,38.868902,8408.191126,4.705597,no,no,no,yes,yes,no,yes
1,3.000000,76.459948,c,2.0,0.0,normal,normal,notpresent,notpresent,148.112676,...,34.000000,12300.000000,4.705597,no,no,no,yes,poor,no,yes
2,4.000000,76.459948,a,1.0,0.0,normal,normal,notpresent,notpresent,99.000000,...,34.000000,8408.191126,4.705597,no,no,no,yes,poor,no,yes
3,5.000000,76.459948,d,1.0,0.0,normal,normal,notpresent,notpresent,148.112676,...,38.868902,8408.191126,4.705597,no,no,no,yes,poor,yes,yes
4,5.000000,50.000000,c,0.0,0.0,normal,normal,notpresent,notpresent,148.112676,...,36.000000,12400.000000,4.705597,no,no,no,yes,poor,no,yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
394,51.492308,70.000000,a,0.0,0.0,normal,normal,notpresent,notpresent,219.000000,...,37.000000,9800.000000,4.400000,no,no,no,yes,poor,no,yes
395,51.492308,70.000000,c,0.0,2.0,normal,normal,notpresent,notpresent,220.000000,...,27.000000,8408.191126,4.705597,yes,yes,no,yes,poor,yes,yes
396,51.492308,70.000000,c,3.0,0.0,normal,normal,notpresent,notpresent,110.000000,...,26.000000,9200.000000,3.400000,yes,yes,no,poor,poor,no,yes
397,51.492308,90.000000,a,0.0,0.0,normal,normal,notpresent,notpresent,207.000000,...,38.868902,8408.191126,4.705597,yes,yes,no,yes,poor,yes,yes


In [3]:
df = raw_dataset

In [4]:
df = pd.get_dummies(df,dtype = int, drop_first = True)

In [5]:
indep_x = df.drop(['classification_yes'],axis=1)
dep_y = df['classification_yes']

In [6]:
def RFEfeatures(indep_y,dep_y,n):
    rfelist = []

    svc_model = SVC(kernel='linear', random_state = 0)
    
    rfemodellist = [ svc_model]

    for model in rfemodellist:
        rfe = RFE(estimator = model, n_features_to_select = n)
        rfemodel = rfe.fit(indep_x,dep_y)
        rfe_features = rfemodel.transform(indep_x)
        rfelist.append(rfe_features)

        selected_features = indep_x.columns[rfemodel.support_]
        print("Selected Features", selected_features)
    return rfelist 

In [7]:
def split_scaler(indep_x,dep_y):
    x_train,x_test,y_train,y_test = train_test_split(indep_x,dep_y,test_size = 0.2,random_state = 0)
    sc = StandardScaler()
    x_train = sc.fit_transform(x_train)
    x_test = sc.transform(x_test)
    return x_train,x_test,y_train,y_test,sc

def cm_prediction(classifier,x_test,y_test):
    y_pred = classifier.predict(x_test)
    cm = confusion_matrix(y_test,y_pred)
    accuracy = accuracy_score(y_test,y_pred)
    clas_report = classification_report(y_test,y_pred)
    return classifier,accuracy

def svm_linear(x_train,y_train,x_test,y_test):
    from sklearn.svm import SVC
    classifier = SVC(kernel='linear',random_state=0)
    classifier.fit(x_train,y_train)
    return cm_prediction(classifier,x_test,y_test)

In [8]:
def RFE_classification(accsvml):
    RFEdataframe = pd.DataFrame(index = ['SVC'], columns = ['svm_l'])
    RFEdataframe['svm_l'] = accsvml
    return RFEdataframe

In [9]:
accsvml = []

In [10]:
rfelist = RFEfeatures(indep_x,dep_y,5)

Selected Features Index(['al', 'sg_c', 'sg_d', 'dm_yes', 'appet_yes'], dtype='object')


In [11]:
for i in rfelist:
    x_train,x_test,y_train,y_test,sc = split_scaler(i,dep_y)

    classifier,accuracy = svm_linear(x_train,y_train,x_test,y_test)
    accsvml.append(accuracy)

result = RFE_classification(accsvml)

In [12]:
result

Unnamed: 0,svm_l
SVC,1.0


In [14]:
import pickle

### Save the Model

In [15]:
filename = 'Finalized_SVMl_Model.sav'

In [16]:
pickle.dump(classifier,open(filename,'wb'))

### Save the Prepro

In [17]:
filenamesc = 'sc.pkl'

In [18]:
pickle.dump(sc,open(filenamesc,'wb'))

In [19]:
sc = pickle.load(open('sc.pkl','rb'))

In [20]:
user_input = ([[3, 1, 0, 0, 1]])

In [22]:
scaled_input = sc.transform(user_input)
scaled_input

array([[ 1.70813011,  1.93937981, -0.47140452, -0.74063735,  0.50587104]])

### Load the Model

In [23]:
loaded_model = pickle.load(open('Finalized_SVMl_Model.sav','rb'))
Prediction = loaded_model.predict(scaled_input)

### Prediction

In [25]:
Prediction

array([1])