In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
from sklearn.feature_selection import SelectFromModel

def split_scalar(indep_X, dep_Y):
    X_train, X_test, y_train, y_test = train_test_split(indep_X, dep_Y, test_size=0.25, random_state=0)
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)
    return X_train, X_test, y_train, y_test

def r2_prediction(regressor, X_test, y_test):
    y_pred = regressor.predict(X_test)
    r2 = r2_score(y_test, y_pred)
    return r2

def Linear(X_train, y_train, X_test):       
    regressor = LinearRegression()
    regressor.fit(X_train, y_train)
    r2 = r2_prediction(regressor, X_test, y_test)
    return r2   

def svm_linear(X_train, y_train, X_test):                
    regressor = SVR(kernel='linear')
    regressor.fit(X_train, y_train)
    r2 = r2_prediction(regressor, X_test, y_test)
    return r2  
    
def svm_NL(X_train, y_train, X_test):                
    regressor = SVR(kernel='rbf')
    regressor.fit(X_train, y_train)
    r2 = r2_prediction(regressor, X_test, y_test)
    return r2  

def Decision(X_train, y_train, X_test):        
    regressor = DecisionTreeRegressor(random_state=0)
    regressor.fit(X_train, y_train)
    r2 = r2_prediction(regressor, X_test, y_test)
    return r2  

def random(X_train, y_train, X_test):       
    regressor = RandomForestRegressor(n_estimators=10, random_state=0)
    regressor.fit(X_train, y_train)
    r2 = r2_prediction(regressor, X_test, y_test)
    return r2 

def selectk_regression(acclin, accsvml, accsvmnl, accdes, accrf): 
    dataframe = pd.DataFrame(index=['SelectFromModel'], columns=['Linear', 'SVMl', 'SVMnl', 'Decision', 'Random'])

    for number, idx in enumerate(dataframe.index):
        dataframe['Linear'][idx] = acclin[number]       
        dataframe['SVMl'][idx] = accsvml[number]
        dataframe['SVMnl'][idx] = accsvmnl[number]
        dataframe['Decision'][idx] = accdes[number]
        dataframe['Random'][idx] = accrf[number]

    return dataframe

dataset1 = pd.read_csv("prep.csv", index_col=None)
df2 = pd.get_dummies(dataset1, drop_first=True)

indep_X = df2.drop("classification_yes", axis=1)
dep_Y = df2['classification_yes']

# Use SelectFromModel for feature importance selection
select_model = LinearRegression()  # You can use any regression model for feature importance
sfm = SelectFromModel(select_model, threshold='mean')
sfm.fit(indep_X, dep_Y)
kbest = sfm.transform(indep_X)

acclin = []
accsvml = []
accsvmnl = []
accdes = []
accrf = []
X_train, X_test, y_train, y_test = split_scalar(kbest, dep_Y)

for i in kbest:   
    r2_lin = Linear(X_train, y_train, X_test)
    acclin.append(r2_lin)
    
    r2_sl = svm_linear(X_train, y_train, X_test)    
    accsvml.append(r2_sl)
    
    r2_NL = svm_NL(X_train, y_train, X_test)
    accsvmnl.append(r2_NL)
    
    r2_d = Decision(X_train, y_train, X_test)
    accdes.append(r2_d)
    
    r2_r = random(X_train, y_train, X_test)
    accrf.append(r2_r)

result = selectk_regression(acclin, accsvml, accsvmnl, accdes, accrf)
print(result)


                   Linear      SVMl     SVMnl  Decision    Random
SelectFromModel  0.709204  0.684292  0.873753  0.952168  0.932773
