In [1]:
# ✅ Importing Libraries :
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

from sklearn.feature_selection import SelectKBest, chi2, RFE

import pickle
import time 

In [2]:
# ✅ 1.Loading Original Dataset : 
dataset=pd.read_csv("Pre-processed_CKD_Data.csv",index_col=None)
print(dataset.shape)
dataset.head()

(399, 25)


Unnamed: 0,age,bp,sg,al,su,rbc,pc,pcc,ba,bgr,...,pcv,wc,rc,htn,dm,cad,appet,pe,ane,classification
0,2.0,76.459948,c,3.0,0.0,normal,abnormal,notpresent,notpresent,148.112676,...,38.868902,8408.191126,4.705597,no,no,no,yes,yes,no,yes
1,3.0,76.459948,c,2.0,0.0,normal,normal,notpresent,notpresent,148.112676,...,34.0,12300.0,4.705597,no,no,no,yes,poor,no,yes
2,4.0,76.459948,a,1.0,0.0,normal,normal,notpresent,notpresent,99.0,...,34.0,8408.191126,4.705597,no,no,no,yes,poor,no,yes
3,5.0,76.459948,d,1.0,0.0,normal,normal,notpresent,notpresent,148.112676,...,38.868902,8408.191126,4.705597,no,no,no,yes,poor,yes,yes
4,5.0,50.0,c,0.0,0.0,normal,normal,notpresent,notpresent,148.112676,...,36.0,12400.0,4.705597,no,no,no,yes,poor,no,yes


In [3]:
#✅ 2.Duplicating the Original Dataset
dataset2 = dataset

#✅ 3.Classifying the Nominal Columns in Dataset : 
dataset2 = pd.get_dummies(dataset2, drop_first=True)
print(dataset2.shape)
dataset2.head()

(399, 28)


Unnamed: 0,age,bp,al,su,bgr,bu,sc,sod,pot,hrmo,...,pc_normal,pcc_present,ba_present,htn_yes,dm_yes,cad_yes,appet_yes,pe_yes,ane_yes,classification_yes
0,2.0,76.459948,3.0,0.0,148.112676,57.482105,3.077356,137.528754,4.627244,12.518156,...,False,False,False,False,False,False,True,True,False,True
1,3.0,76.459948,2.0,0.0,148.112676,22.0,0.7,137.528754,4.627244,10.7,...,True,False,False,False,False,False,True,False,False,True
2,4.0,76.459948,1.0,0.0,99.0,23.0,0.6,138.0,4.4,12.0,...,True,False,False,False,False,False,True,False,False,True
3,5.0,76.459948,1.0,0.0,148.112676,16.0,0.7,138.0,3.2,8.1,...,True,False,False,False,False,False,True,False,True,True
4,5.0,50.0,0.0,0.0,148.112676,25.0,0.6,137.528754,4.627244,11.8,...,True,False,False,False,False,False,True,False,False,True


In [4]:
#✅ 4.Assigning Variables (Independent/Dependent) : 

indep_X = dataset2.drop('classification_yes', axis=1)
print(indep_X.shape)

dep_Y = dataset2['classification_yes']
print(dep_Y.shape)

(399, 27)
(399,)


In [5]:
#✅ 5.Creating Function(s) :

def train_test_split_and_StandardScaler(indep_X,dep_Y):
    
    X_train, X_test, Y_train, Y_test = train_test_split(indep_X, dep_Y, test_size = 0.25, random_state = 0)
    
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)    
    return X_train, X_test, Y_train, Y_test

def RFE_Features_Regression(indep_X, dep_Y, n):
   
    RFE_List = []

    linear_Regression = LinearRegression()
    svr_Linear_Regression = SVR(kernel = 'linear')
    decisionTree_Regression = DecisionTreeRegressor(random_state = 0)
    randomForest_Regression = RandomForestRegressor(n_estimators = 10, random_state = 0)
        
    RFE_Model_List = [linear_Regression, svr_Linear_Regression, decisionTree_Regression, randomForest_Regression]

    for model in RFE_Model_List:
        print(model)
        regressor_RFE = RFE(estimator = model, n_features_to_select=n)
        regressor_RFE_Fit = regressor_RFE.fit(indep_X, dep_Y)
        regressor_RFE_Feature = regressor_RFE.transform(indep_X)
        RFE_List.append(regressor_RFE_Feature)
    return RFE_List

def R2_Prediction(regressor,X_test,Y_test):
    y_pred = regressor.predict(X_test)
    from sklearn.metrics import r2_score
    R2_Score = r2_score(Y_test,y_pred)
    return R2_Score

def Linear_Regression(X_train,Y_train,X_test):       
    # Fitting K-NN to the Training set
    from sklearn.linear_model import LinearRegression
    regressor = LinearRegression()
    regressor.fit(X_train, Y_train)
        
    # Calling a Created Function - R2_Prediction(regressor,X_test,Y_test) which returns - R2_Score
    R2_LinearRegression = R2_Prediction(regressor,X_test,Y_test)
    return R2_LinearRegression  

def SVM_Linear(X_train,Y_train,X_test):
    
    from sklearn.svm import SVR
    regressor = SVR(kernel = 'linear')
    regressor.fit(X_train, Y_train)

    # Calling a Created Function - R2_Prediction(regressor,X_test,Y_test) which returns - R2_Score
    R2_SVM_Linear = R2_Prediction(regressor,X_test,Y_test)
    return R2_SVM_Linear

def SVM_Non_Linear(X_train,Y_train,X_test):

    from sklearn.svm import SVR
    regressor = SVR(kernel = 'rbf')
    regressor.fit(X_train, Y_train)

    # Calling a Created Function - R2_Prediction(regressor,X_test,Y_test) which returns - R2_Score
    R2_SVM_Non_Linear = R2_Prediction(regressor,X_test,Y_test)
    return R2_SVM_Non_Linear

def DecisionTree(X_train,Y_train,X_test):

    # Fitting K-NN to the Training set
    from sklearn.tree import DecisionTreeRegressor
    regressor = DecisionTreeRegressor(random_state = 0)
    regressor.fit(X_train, Y_train)

    # Calling a Created Function - R2_Prediction(regressor,X_test,Y_test) which returns - R2_Score
    R2_DecisionTree = R2_Prediction(regressor,X_test,Y_test)
    return R2_DecisionTree

def RandomForest(X_train,Y_train,X_test):

    from sklearn.ensemble import RandomForestRegressor
    regressor = RandomForestRegressor(n_estimators = 10, random_state = 0)
    regressor.fit(X_train, Y_train)

    # Calling a Created Function - R2_Prediction(regressor,X_test,Y_test) which returns - R2_Score
    R2_RandomForest = R2_Prediction(regressor,X_test,Y_test)
    return R2_RandomForest

def RFE_Regression(R2_LinearRegression, R2_SVM_Linear, R2_DecisionTree, R2_RandomForest): 

    dataframe=pd.DataFrame(index=['Logistic Regression','SVM Linear','Decision Tree','Random Forest'],
                           columns=['Logistic Regression','SVM Linear','Decision Tree','Random Forest'])
    
    #Function - enumerate() acts as a Counter which Iterates index starting from 0 (by default) and their item(s) from the iterable
    #Use enumerate() when We need both Position in the loop (number) and its value from the iterable (idex)
    
    for indexCount,indexValue in enumerate(dataframe.index):      
        dataframe.loc[indexValue, 'Logistic Regression'] = R2_LinearRegression[indexCount]       
        dataframe.loc[indexValue, 'SVM Linear'] = R2_SVM_Linear[indexCount]
        dataframe.loc[indexValue, 'Decision Tree'] = R2_DecisionTree[indexCount]
        dataframe.loc[indexValue, 'Random Forest'] = R2_RandomForest[indexCount]
    return dataframe

In [22]:
#✅ 6.Calling a Created Function - RFE_Features_Classification(With Below Parameters): which returns - RFE_List
RFE_List = RFE_Features_Regression(indep_X, dep_Y, 7)

LinearRegression()
SVR(kernel='linear')
DecisionTreeRegressor(random_state=0)
RandomForestRegressor(n_estimators=10, random_state=0)


In [7]:
#✅ 7.Creating Empty Lists
R2_LinearRegression = []
R2_SVM_Linear = []
R2_DecisionTree = []
R2_RandomForest = []

In [8]:
#✅ 8.Appending the Accuracy Score of All the Models in Created Empty List Through a For Loop 
for X in RFE_List:
    X_train, X_test, Y_train, Y_test = train_test_split_and_StandardScaler(X, dep_Y)
    
    R2_Score = Linear_Regression(X_train,Y_train,X_test)
    R2_LinearRegression.append(R2_Score)
    
    R2_Score = SVM_Linear(X_train,Y_train,X_test)  
    R2_SVM_Linear.append(R2_Score)
    
    R2_Score = DecisionTree(X_train,Y_train,X_test)  
    R2_DecisionTree.append(R2_Score)
    
    R2_Score = RandomForest(X_train,Y_train,X_test)  
    R2_RandomForest.append(R2_Score)

In [9]:
#✅ 9.Calling a Created Function - RFE_Regression(With Below Parameters): which returns - dataframe    
result = RFE_Regression(R2_LinearRegression, R2_SVM_Linear, R2_DecisionTree, R2_RandomForest)

In [10]:
#✅ 10.Calling the Final Results by Selecting Top 6 Features
result

Unnamed: 0,Logistic Regression,SVM Linear,Decision Tree,Random Forest
Logistic Regression,0.624738,0.456874,0.81723,0.814741
SVM Linear,0.610294,0.530043,0.806415,0.807916
Decision Tree,0.697365,0.665248,0.782986,0.829427
Random Forest,0.705126,0.670093,0.839675,0.875221
