In [20]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score

def split_scalar(indep_X, dep_Y):
    X_train, X_test, y_train, y_test = train_test_split(indep_X, dep_Y, test_size=0.25, random_state=0)
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)
    return X_train, X_test, y_train, y_test

def r2_prediction(regressor, X_test, y_test):
    y_pred = regressor.predict(X_test)
    r2 = r2_score(y_test, y_pred)
    return r2

def Linear(X_train, y_train, X_test):       
    regressor = LinearRegression()
    regressor.fit(X_train, y_train)
    r2 = r2_prediction(regressor, X_test, y_test)
    return r2

def svm_linear(X_train, y_train, X_test):                
    regressor = SVR(kernel='linear')
    regressor.fit(X_train, y_train)
    r2 = r2_prediction(regressor, X_test, y_test)
    return r2  
    
def svm_NL(X_train, y_train, X_test):                
    regressor = SVR(kernel='rbf')
    regressor.fit(X_train, y_train)
    r2 = r2_prediction(regressor, X_test, y_test)
    return r2  

def Decision(X_train, y_train, X_test):        
    regressor = DecisionTreeRegressor(random_state=0)
    regressor.fit(X_train, y_train)
    r2 = r2_prediction(regressor, X_test, y_test)
    return r2  

def random(X_train, y_train, X_test):       
    regressor = RandomForestRegressor(n_estimators=10, random_state=0)
    regressor.fit(X_train, y_train)
    r2 = r2_prediction(regressor, X_test, y_test)
    return r2 

def backward_selection_regression(indep_X, dep_Y):
    selected_features = list(indep_X.columns)
    best_r2 = 0

    while len(selected_features) > 1:
        r2_list = []
        for feature in selected_features:
            remaining_features = [feat for feat in selected_features if feat != feature]
            X_train, X_test, y_train, y_test = split_scalar(indep_X[remaining_features], dep_Y)
            r2 = Linear(X_train, y_train, X_test)  # You can replace with any regression function you want
            r2_list.append((feature, r2))

        r2_list.sort(key=lambda x: x[1], reverse=True)
        best_feature, best_r2 = r2_list[0]

        if best_r2 >= 0:
            selected_features.remove(best_feature)
        else:
            break

    return selected_features

dataset1 = pd.read_csv("prep.csv", index_col=None)
df2 = pd.get_dummies(dataset1, drop_first=True)

indep_X = df2.drop("classification_yes", axis=1)
dep_Y = df2['classification_yes']

selected_features = backward_selection_regression(indep_X, dep_Y)
X_train, X_test, y_train, y_test = split_scalar(df2[selected_features], dep_Y)

r2_lin = Linear(X_train, y_train, X_test)
r2_sl = svm_linear(X_train, y_train, X_test)
r2_NL = svm_NL(X_train, y_train, X_test)
r2_d = Decision(X_train, y_train, X_test)
r2_r = random(X_train, y_train, X_test)

# Create a table using pandas DataFrame
result= pd.DataFrame({'Model': ['Linear', 'SVM Linear', 'SVM Non-Linear', 'Decision Tree', 'Random Forest'],
                             'R2 Score': [r2_lin, r2_sl, r2_NL, r2_d, r2_r]})


print(result)


            Model  R2 Score
0          Linear  0.555692
1      SVM Linear  0.545355
2  SVM Non-Linear  0.648081
3   Decision Tree  0.645411
4   Random Forest  0.634405


In [21]:
result

Unnamed: 0,Model,R2 Score
0,Linear,0.555692
1,SVM Linear,0.545355
2,SVM Non-Linear,0.648081
3,Decision Tree,0.645411
4,Random Forest,0.634405
