In [1]:
import os
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import RFE

from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score


def split_scalar(indep_X, dep_Y):
    X_train, X_test, y_train, y_test = train_test_split(
        indep_X, dep_Y, test_size=0.25, random_state=0
    )

    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)

    return X_train, X_test, y_train, y_test


def r2_prediction(model, X_test, y_test):
    return r2_score(y_test, model.predict(X_test))


def Linear(X_train, y_train, X_test, y_test):
    m = LinearRegression()
    m.fit(X_train, y_train)
    return r2_prediction(m, X_test, y_test)


def svm_linear(X_train, y_train, X_test, y_test):
    m = SVR(kernel='linear')
    m.fit(X_train, y_train)
    return r2_prediction(m, X_test, y_test)


def Decision(X_train, y_train, X_test, y_test):
    m = DecisionTreeRegressor(random_state=0)
    m.fit(X_train, y_train)
    return r2_prediction(m, X_test, y_test)


def random_forest(X_train, y_train, X_test, y_test):
    m = RandomForestRegressor(n_estimators=10, random_state=0)
    m.fit(X_train, y_train)
    return r2_prediction(m, X_test, y_test)


def rfeFeature(indep_X, dep_Y, n_features):
    models = [
        LinearRegression(),
        SVR(kernel='linear'),
        DecisionTreeRegressor(random_state=0),
        RandomForestRegressor(n_estimators=10, random_state=0)
    ]

    return [
        RFE(model, n_features_to_select=n_features)
        .fit_transform(indep_X, dep_Y)
        for model in models
    ]


try:
    dataset = pd.read_csv("prep (1).csv")
except:
    print("\n⚠️ prep.csv not found in current folder.")
    print("⚠️ Place prep.csv in the same folder and re-run.")
else:
    df = pd.get_dummies(dataset, drop_first=True)

    indep_X = df.drop(columns="classification_yes")
    dep_Y = df["classification_yes"]

    rfelist = rfeFeature(indep_X, dep_Y, 3)

    acclin, accsvml, accdes, accrf = [], [], [], []

    for X_sel in rfelist:
        X_train, X_test, y_train, y_test = split_scalar(X_sel, dep_Y)

        acclin.append(Linear(X_train, y_train, X_test, y_test))
        accsvml.append(svm_linear(X_train, y_train, X_test, y_test))
        accdes.append(Decision(X_train, y_train, X_test, y_test))
        accrf.append(random_forest(X_train, y_train, X_test, y_test))

    result = pd.DataFrame(
        {
            "Linear": acclin,
            "SVM_Linear": accsvml,
            "DecisionTree": accdes,
            "RandomForest": accrf
        },
        index=["Linear_RFE", "SVM_RFE", "Decision_RFE", "RF_RFE"]
    )

    print("\nRFE REGRESSION RESULT:\n")
    print(result)


RFE REGRESSION RESULT:

                Linear  SVM_Linear  DecisionTree  RandomForest
Linear_RFE    0.441961    0.262153      0.441961      0.441816
SVM_RFE       0.441961    0.262153      0.441961      0.441816
Decision_RFE  0.664893    0.609652      0.965961      0.916304
RF_RFE        0.676174    0.670691      0.933504      0.887256


In [2]:
result

Unnamed: 0,Linear,SVM_Linear,DecisionTree,RandomForest
Linear_RFE,0.441961,0.262153,0.441961,0.441816
SVM_RFE,0.441961,0.262153,0.441961,0.441816
Decision_RFE,0.664893,0.609652,0.965961,0.916304
RF_RFE,0.676174,0.670691,0.933504,0.887256
