In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_regression, RFE
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

def split_scalar(indep_X, dep_Y):
    X_train, X_test, y_train, y_test = train_test_split(indep_X, dep_Y, test_size=0.25, random_state=0)
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)
    return X_train, X_test, y_train, y_test

def Linear(X_train, y_train, X_test):
    regressor = LinearRegression()
    regressor.fit(X_train, y_train)
    r2 = r2_prediction(regressor, X_test, y_test)
    return r2

def svm_linear(X_train, y_train, X_test):
    regressor = SVR(kernel='linear')
    regressor.fit(X_train, y_train)
    r2 = r2_prediction(regressor, X_test, y_test)
    return r2

def svm_NL(X_train, y_train, X_test):
    regressor = SVR(kernel='rbf')
    regressor.fit(X_train, y_train)
    selector = SelectFromModel(regressor)
    X_selected = selector.transform(X_test)
    r2 = r2_prediction(regressor, X_test, y_test)
    return r2

def Decision(X_train, y_train, X_test):
    regressor = DecisionTreeRegressor(random_state=0)
    regressor.fit(X_train, y_train)
    r2 = r2_prediction(regressor, X_test, y_test)
    return r2

def random(X_train, y_train, X_test):
    regressor = RandomForestRegressor(n_estimators=10, random_state=0)
    regressor.fit(X_train, y_train)
    r2 = r2_prediction(regressor, X_test, y_test)
    return r2

def r2_prediction(regressor, X_test, y_test):
    y_pred = regressor.predict(X_test)
    from sklearn.metrics import r2_score
    r2 = r2_score(y_test, y_pred)
    return r2

def rfeFeature(indep_X, dep_Y, n):
    rfelist = []

    lin = LinearRegression()
    SVRl = SVR(kernel='linear')  
    dec = DecisionTreeRegressor(random_state=0)
    rf = RandomForestRegressor(n_estimators=10, random_state=0)

    rfemodellist = [lin, SVRl, dec, rf]
    for i in rfemodellist:
        print(i)
        log_rfe = RFE(estimator=i, n_features_to_select=n)
        X_rfe = log_rfe.fit_transform(indep_X, dep_Y)
        rfelist.append(X_rfe)

    return rfelist

def selectKBestFeature(indep_X, dep_Y, k):
    X_new = SelectKBest(f_regression, k=k).fit_transform(indep_X, dep_Y)
    return X_new

def rfe_regression(acclin, accsvml, accdes, accrf):
    rfedataframe = pd.DataFrame(index=['Linear', 'SVR', 'Random', 'Decision'], columns=['Linear', 'SVR', 'Random', 'Decision'])
    for number, index in enumerate(rfedataframe.index):
        rfedataframe['Linear'][index] = acclin[number]
        rfedataframe['SVR'][index] = accsvml[number]
        rfedataframe['Random'][index] = accdes[number]
        rfedataframe['Decision'][index] = accrf[number]
    return rfedataframe

dataset1 = pd.read_csv("prep.csv", index_col=None)
df2 = dataset1
df2 = pd.get_dummies(df2, drop_first=True)

indep_X = df2.drop('classification_yes', axis=1)
dep_Y = df2['classification_yes']

# Select the number of features for RFE
n_features = 5

# RFE Feature Selection
rfelist = rfeFeature(indep_X, dep_Y, n_features)

# SelectKBest Feature Selection
k_best_X = selectKBestFeature(indep_X, dep_Y, n_features)

acclin = []
accsvml = []
accdes = []
accrf = []

# Iterate through each selected feature set
for i in rfelist:
    X_train, X_test, y_train, y_test = split_scalar(i, dep_Y)
    r2_lin = Linear(X_train, y_train, X_test)
    acclin.append(r2_lin)

    r2_sl = svm_linear(X_train, y_train, X_test)
    accsvml.append(r2_sl)

    r2_d = Decision(X_train, y_train, X_test)
    accdes.append(r2_d)

    r2_r = random(X_train, y_train, X_test)
    accrf.append(r2_r)

# Results for RFE
result_rfe = rfe_regression(acclin, accsvml, accdes, accrf)

# Results for SelectKBest
X_train_kbest, X_test_kbest, y_train_kbest, y_test_kbest = split_scalar(k_best_X, dep_Y)
r2_lin_kbest = Linear(X_train_kbest, y_train_kbest, X_test_kbest)
r2_sl_kbest = svm_linear(X_train_kbest, y_train_kbest, X_test_kbest)
r2_d_kbest = Decision(X_train_kbest, y_train_kbest, X_test_kbest)
r2_r_kbest = random(X_train_kbest, y_train_kbest, X_test_kbest)

result_kbest = pd.DataFrame(index=['Linear', 'SVR', 'Random', 'Decision'],
                            columns=['Linear', 'SVR', 'Random', 'Decision'])
result_kbest['Linear'] = r2_lin_kbest
result_kbest['SVR'] = r2_sl_kbest
result_kbest['Random'] = r2_d_kbest
result_kbest['Decision'] = r2_r_kbest

print("Results for RFE:")
result_rfe

LinearRegression()
SVR(kernel='linear')
DecisionTreeRegressor(random_state=0)
RandomForestRegressor(n_estimators=10, random_state=0)
Results for RFE:


Unnamed: 0,Linear,SVR,Random,Decision
Linear,0.620124,0.457136,0.77924,0.780135
SVR,0.604508,0.456871,0.776474,0.776745
Random,0.674403,0.628206,0.696181,0.815538
Decision,0.686361,0.643365,0.836806,0.845303


In [3]:
print("\nResults for SelectKBest:")
result_kbest


Results for SelectKBest:


Unnamed: 0,Linear,SVR,Random,Decision
Linear,0.597934,0.520226,0.658203,0.83612
SVR,0.597934,0.520226,0.658203,0.83612
Random,0.597934,0.520226,0.658203,0.83612
Decision,0.597934,0.520226,0.658203,0.83612
