In [35]:
import pandas as pd
from sklearn.model_selection import train_test_split 
import time
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score

def split_scalar(indep_X, dep_Y):
    X_train, X_test, y_train, y_test = train_test_split(indep_X, dep_Y, test_size=0.25, random_state=0)
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)
    return X_train, X_test, y_train, y_test

def r2_prediction(regressor, X_test, y_test):
    y_pred = regressor.predict(X_test)
    r2 = r2_score(y_test, y_pred)
    return r2

def Linear(X_train, y_train, X_test):       
    regressor = LinearRegression()
    regressor.fit(X_train, y_train)
    r2 = r2_prediction(regressor, X_test, y_test)
    return r2   

def svm_linear(X_train, y_train, X_test):
    regressor = SVR(kernel='linear')
    regressor.fit(X_train, y_train)
    r2 = r2_prediction(regressor, X_test, y_test)
    return r2  

def Decision(X_train, y_train, X_test):
    regressor = DecisionTreeRegressor(random_state=0)
    regressor.fit(X_train, y_train)
    r2 = r2_prediction(regressor, X_test, y_test)
    return r2  

def random(X_train, y_train, X_test):       
    regressor = RandomForestRegressor(n_estimators=10, random_state=0)
    regressor.fit(X_train, y_train)
    r2 = r2_prediction(regressor, X_test, y_test)
    return r2 

def rfeFeature(indep_X, dep_Y, n):
    rfelist = []
    
    # Create SelectKBest object with f_regression scoring function
    kbest = SelectKBest(score_func=f_regression, k=n)
    
    rfemodellist = [LinearRegression(), SVR(kernel='linear'), DecisionTreeRegressor(random_state=0), RandomForestRegressor(n_estimators=10, random_state=0)]
    for model in rfemodellist:
        print(model)
        # Perform feature selection using SelectKBest and the given model
        selected_features = kbest.fit_transform(indep_X, dep_Y)
        rfelist.append(selected_features)
    
    return rfelist

def rfe_regression(acclog, accsvml, accdes, accrf): 
    rfedataframe = pd.DataFrame(index=['Linear', 'SVC', 'Random', 'DecisionTree'], columns=['Linear', 'SVMl', 'Decision', 'Random'])

    for number, idx in enumerate(rfedataframe.index):
        rfedataframe['Linear'][idx] = acclog[number]       
        rfedataframe['SVMl'][idx] = accsvml[number]
        rfedataframe['Decision'][idx] = accdes[number]
        rfedataframe['Random'][idx] = accrf[number]

    return rfedataframe

dataset1 = pd.read_csv("prep.csv", index_col=None)
df2 = pd.get_dummies(dataset1, drop_first=True)

indep_X = df2.drop('classification_yes',axis=1)
dep_Y = df2['classification_yes']

rfelist = rfeFeature(indep_X, dep_Y, 10)      

acclin = []
accsvml = []
accdes = []
accrf = []

for selected_features in rfelist:   
    X_train, X_test, y_train, y_test = split_scalar(selected_features, dep_Y)  
    r2_lin = Linear(X_train, y_train, X_test)
    acclin.append(r2_lin)
    
    r2_sl = svm_linear(X_train, y_train, X_test)    
    accsvml.append(r2_sl)
    
    r2_d = Decision(X_train, y_train, X_test)
    accdes.append(r2_d)
    
    r2_r = random(X_train, y_train, X_test)
    accrf.append(r2_r)

result = rfe_regression(acclin, accsvml, accdes, accrf)




LinearRegression()
SVR(kernel='linear')
DecisionTreeRegressor(random_state=0)
RandomForestRegressor(n_estimators=10, random_state=0)


In [21]:
result

Unnamed: 0,Linear,SVMl,Decision,Random
Linear,0.574456,0.551214,0.569396,0.719903
SVC,0.574456,0.551214,0.569396,0.719903
Random,0.574456,0.551214,0.569396,0.719903
DecisionTree,0.574456,0.551214,0.569396,0.719903


In [24]:
result
#4

Unnamed: 0,Linear,SVMl,Decision,Random
Linear,0.591468,0.530394,0.614198,0.764738
SVC,0.591468,0.530394,0.614198,0.764738
Random,0.591468,0.530394,0.614198,0.764738
DecisionTree,0.591468,0.530394,0.614198,0.764738


In [26]:
result
#5


Unnamed: 0,Linear,SVMl,Decision,Random
Linear,0.597934,0.520226,0.658203,0.83612
SVC,0.597934,0.520226,0.658203,0.83612
Random,0.597934,0.520226,0.658203,0.83612
DecisionTree,0.597934,0.520226,0.658203,0.83612


In [28]:
result
#6

Unnamed: 0,Linear,SVMl,Decision,Random
Linear,0.64199,0.596767,0.751736,0.885261
SVC,0.64199,0.596767,0.751736,0.885261
Random,0.64199,0.596767,0.751736,0.885261
DecisionTree,0.64199,0.596767,0.751736,0.885261


In [30]:
result
#7

Unnamed: 0,Linear,SVMl,Decision,Random
Linear,0.663447,0.643811,0.795139,0.879138
SVC,0.663447,0.643811,0.795139,0.879138
Random,0.663447,0.643811,0.795139,0.879138
DecisionTree,0.663447,0.643811,0.795139,0.879138


In [32]:
result
#8

Unnamed: 0,Linear,SVMl,Decision,Random
Linear,0.663763,0.642502,0.739583,0.857639
SVC,0.663763,0.642502,0.739583,0.857639
Random,0.663763,0.642502,0.739583,0.857639
DecisionTree,0.663763,0.642502,0.739583,0.857639


In [34]:
result
#9

Unnamed: 0,Linear,SVMl,Decision,Random
Linear,0.659383,0.63906,0.782986,0.855469
SVC,0.659383,0.63906,0.782986,0.855469
Random,0.659383,0.63906,0.782986,0.855469
DecisionTree,0.659383,0.63906,0.782986,0.855469


In [36]:
result
#10

Unnamed: 0,Linear,SVMl,Decision,Random
Linear,0.663376,0.635842,0.782986,0.854601
SVC,0.663376,0.635842,0.782986,0.854601
Random,0.663376,0.635842,0.782986,0.854601
DecisionTree,0.663376,0.635842,0.782986,0.854601
