In [1]:
import pandas as pd
import pickle

from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

In [2]:
dataset=pd.read_csv('prep.csv')
dataset=pd.get_dummies(dataset,drop_first=True)
dataset

Unnamed: 0,age,bp,al,su,bgr,bu,sc,sod,pot,hrmo,...,pc_normal,pcc_present,ba_present,htn_yes,dm_yes,cad_yes,appet_yes,pe_yes,ane_yes,classification_yes
0,2.000000,76.459948,3.0,0.0,148.112676,57.482105,3.077356,137.528754,4.627244,12.518156,...,False,False,False,False,False,False,True,True,False,True
1,3.000000,76.459948,2.0,0.0,148.112676,22.000000,0.700000,137.528754,4.627244,10.700000,...,True,False,False,False,False,False,True,False,False,True
2,4.000000,76.459948,1.0,0.0,99.000000,23.000000,0.600000,138.000000,4.400000,12.000000,...,True,False,False,False,False,False,True,False,False,True
3,5.000000,76.459948,1.0,0.0,148.112676,16.000000,0.700000,138.000000,3.200000,8.100000,...,True,False,False,False,False,False,True,False,True,True
4,5.000000,50.000000,0.0,0.0,148.112676,25.000000,0.600000,137.528754,4.627244,11.800000,...,True,False,False,False,False,False,True,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
394,51.492308,70.000000,0.0,0.0,219.000000,36.000000,1.300000,139.000000,3.700000,12.500000,...,True,False,False,False,False,False,True,False,False,True
395,51.492308,70.000000,0.0,2.0,220.000000,68.000000,2.800000,137.528754,4.627244,8.700000,...,True,False,False,True,True,False,True,False,True,True
396,51.492308,70.000000,3.0,0.0,110.000000,115.000000,6.000000,134.000000,2.700000,9.100000,...,True,False,False,True,True,False,False,False,False,True
397,51.492308,90.000000,0.0,0.0,207.000000,80.000000,6.800000,142.000000,5.500000,8.500000,...,True,False,False,True,True,False,True,False,True,True


In [3]:
independent=dataset.drop('classification_yes',axis=1)
dependent=dataset['classification_yes']

In [14]:
def recursiveFeatureElimination(independent,dependent,n):
    rfelist=[]
    
    from sklearn.feature_selection import RFE

    lin=LinearRegression()
    svml=SVR(kernel='linear')
    dt=DecisionTreeRegressor(random_state=0)
    rf=RandomForestRegressor(n_estimators=10,random_state=0)
    
    rfemodellist=[lin,svml,dt,rf]
    
    for estimator in rfemodellist:
        rfe=RFE(estimator,n_features_to_select=n)
        fit=rfe.fit(independent,dependent)
        rfe_features=fit.transform(independent)
        rfelist.append(rfe_features)
    return rfelist

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
def split_scaler(independent,dependent):
    X_train,X_test,Y_train,Y_test=train_test_split(independent,dependent,test_size=0.25,random_state=0)
    sc=StandardScaler()
    X_train=sc.fit_transform(X_train)
    X_test=sc.transform(X_test)
    return X_train,X_test,Y_train,Y_test

In [6]:
def r2_prediction(regressor,X_test,Y_test):
    Y_pred=regressor.predict(X_test)
    from sklearn.metrics import r2_score
    r2=r2_score(Y_test,Y_pred)
    return r2

In [23]:
def Linear(X_train,Y_train,X_test):
    regressor=LinearRegression()
    regressor.fit(X_train,Y_train)
    r2=r2_prediction(regressor,X_test,Y_test)
    return r2

def svrl(X_train,Y_train,X_test):
    regressor=SVR(kernel='linear')
    regressor.fit(X_train,Y_train)
    r2=r2_prediction(regressor,X_test,Y_test)
    return r2

def svrnl(X_train,Y_train,X_test):
    regressor=SVR(kernel='rbf')
    regressor.fit(X_train,Y_train)
    r2=r2_prediction(regressor,X_test,Y_test)
    return r2

def decisiontree(X_train,Y_train,X_test):
    regressor=DecisionTreeRegressor(random_state=0)
    regressor.fit(X_train,Y_train)
    r2=r2_prediction(regressor,X_test,Y_test)
    return r2

def randomforest(X_train,Y_train,X_test):
    regressor=RandomForestRegressor(n_estimators=10,random_state=0)
    regressor.fit(X_train,Y_train)
    r2=r2_prediction(regressor,X_test,Y_test)
    return r2

In [24]:
def table(acclin,accsvml,accsvmnl,accdt,accrf):
    dataframe=pd.DataFrame(index=['Linear','SVML','DecisionTree','RandomForest'],columns=['Linear','SVML','SVMNL','DecisionTree','RandomForest'])
    
    for number,index in enumerate(dataframe.index):
        dataframe['Linear'][index]=acclin[number]
        dataframe['SVML'][index]=accsvml[number]
        dataframe['SVMNL'][index]=accsvmnl[number]
        dataframe['DecisionTree'][index]=accdt[number]
        dataframe['RandomForest'][index]=accrf[number]
    return dataframe

In [41]:
rfelist=recursiveFeatureElimination(independent,dependent,7)
rfelist

[array([[0., 1., 0., ..., 0., 0., 0.],
        [0., 1., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 1., 0., ..., 1., 1., 0.],
        [0., 0., 0., ..., 1., 1., 1.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 array([[0., 1., 0., ..., 1., 0., 0.],
        [0., 1., 0., ..., 1., 0., 0.],
        [0., 0., 0., ..., 1., 0., 0.],
        ...,
        [0., 1., 0., ..., 1., 1., 1.],
        [0., 0., 0., ..., 1., 1., 1.],
        [0., 0., 0., ..., 1., 0., 0.]]),
 array([[ 57.48210526,  12.51815562,  38.86890244, ...,   1.        ,
           0.        ,   0.        ],
        [ 22.        ,  10.7       ,  34.        , ...,   1.        ,
           0.        ,   0.        ],
        [ 23.        ,  12.        ,  34.        , ...,   0.        ,
           0.        ,   0.        ],
        ...,
        [115.        ,   9.1       ,  26.        , ...,   1.        ,
           0.        ,   1.        ],
        [ 80.        ,   8.5       ,  38.86890244, ...,  

In [17]:
acclin=[]
accsvml=[]
accsvmnl=[]
accdt=[]
accrf=[]

In [42]:
for i in rfelist:
    X_train,X_test,Y_train,Y_test=split_scaler(i,dependent)
    
    r2_lin=Linear(X_train,Y_train,X_test)
    acclin.append(r2_lin)
            
    r2_svml=svrl(X_train,Y_train,X_test)
    accsvml.append(r2_svml)
            
    r2_svmnl=svrnl(X_train,Y_train,X_test)
    accsvmnl.append(r2_svmnl)
            
    r2_dt=decisiontree(X_train,Y_train,X_test)
    accdt.append(r2_dt)
            
    r2_rf=randomforest(X_train,Y_train,X_test)
    accrf.append(r2_rf)

In [43]:
result=table(acclin,accsvml,accsvmnl,accdt,accrf)

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  dataframe['Linear'][index]=acclin[number]
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame 

In [44]:
result #7

Unnamed: 0,Linear,SVML,SVMNL,DecisionTree,RandomForest
Linear,0.620124,0.457136,0.755437,0.77924,0.780135
SVML,0.620124,0.456871,0.758236,0.776474,0.776745
DecisionTree,0.604508,0.628206,0.897334,0.696181,0.815538
RandomForest,0.674403,0.643365,0.90712,0.836806,0.845303


In [40]:
result #6

Unnamed: 0,Linear,SVML,SVMNL,DecisionTree,RandomForest
Linear,0.620124,0.457136,0.755437,0.77924,0.780135
SVML,0.620124,0.456871,0.758236,0.776474,0.776745
DecisionTree,0.604508,0.628206,0.897334,0.696181,0.815538
RandomForest,0.674403,0.643365,0.90712,0.836806,0.845303


In [28]:
result #5

Unnamed: 0,Linear,SVML,SVMNL,DecisionTree,RandomForest
Linear,0.620124,0.457136,0.755437,0.77924,0.780135
SVML,0.620124,0.456871,0.758236,0.776474,0.776745
DecisionTree,0.604508,0.628206,0.897334,0.696181,0.815538
RandomForest,0.674403,0.643365,0.90712,0.836806,0.845303


In [36]:
result #4

Unnamed: 0,Linear,SVML,SVMNL,DecisionTree,RandomForest
Linear,0.620124,0.457136,0.755437,0.77924,0.780135
SVML,0.620124,0.456871,0.758236,0.776474,0.776745
DecisionTree,0.604508,0.628206,0.897334,0.696181,0.815538
RandomForest,0.674403,0.643365,0.90712,0.836806,0.845303


In [32]:
result #3

Unnamed: 0,Linear,SVML,SVMNL,DecisionTree,RandomForest
Linear,0.620124,0.457136,0.755437,0.77924,0.780135
SVML,0.620124,0.456871,0.758236,0.776474,0.776745
DecisionTree,0.604508,0.628206,0.897334,0.696181,0.815538
RandomForest,0.674403,0.643365,0.90712,0.836806,0.845303


In [None]:
#For all the values of n specified, we received the same output. Hence we can choose k=3 with 90% accuracy for SVM non linear algorithm for 
#model creation and Random Forest algorithm for RFE fitting