In [1]:
import pandas as pd
import numpy as np
import time
import pickle
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor



In [15]:
def split_scalar(indep_X, dep_Y):
    X_train, X_test, y_train, y_test = train_test_split(indep_X, dep_Y, test_size=0.25, random_state=0)
    # Feature Scaling
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)
    return X_train, X_test, y_train, y_test

def selectkbest_feature_selection(indep_X, dep_Y, k):
    skb = SelectKBest(score_func=f_regression, k=k)
    skb.fit(indep_X, dep_Y)
    indep_X_selected = skb.transform(indep_X)
    return indep_X_selected

def r2_prediction(regressor, X_test, y_test):
    y_pred = regressor.predict(X_test)
    r2 = r2_score(y_test, y_pred)
    return r2

def Linear(X_train, y_train, X_test):
    regressor = LinearRegression()
    regressor.fit(X_train, y_train)
    r2 = r2_prediction(regressor, X_test, y_test)
    return r2

def svm_linear(X_train, y_train, X_test):
    regressor = SVR(kernel='linear')
    regressor.fit(X_train, y_train)
    r2 = r2_prediction(regressor, X_test, y_test)
    return r2

def svm_NL(X_train, y_train, X_test):
    regressor = SVR(kernel='rbf')
    regressor.fit(X_train, y_train)
    r2 = r2_prediction(regressor, X_test, y_test)
    return r2

def Decision(X_train, y_train, X_test):
    regressor = DecisionTreeRegressor(random_state=0)
    regressor.fit(X_train, y_train)
    r2 = r2_prediction(regressor, X_test, y_test)
    return r2

def random(X_train, y_train, X_test):
    regressor = RandomForestRegressor(n_estimators=10, random_state=0)
    regressor.fit(X_train, y_train)
    r2 = r2_prediction(regressor, X_test, y_test)
    return r2

def selectk_Regression(acclog, accsvml, accsvmnl, accdes, accrf): 
    
    dataframe = pd.DataFrame(index=['f_regression'], columns=['Linear', 'SVMl', 'SVMnl', 'Decision', 'Random'])
    
    for number, idex in enumerate(dataframe.index):      
        dataframe['Linear'][idex] = acclog[number]       
        dataframe['SVMl'][idex] = accsvml[number]
        dataframe['SVMnl'][idex] = accsvmnl[number]  # Add this line
        dataframe['Decision'][idex] = accdes[number]
        dataframe['Random'][idex] = accrf[number]
    
    return dataframe



In [4]:
dataset1=pd.read_csv("prep.csv",index_col=None)

df2=dataset1

df2 = pd.get_dummies(df2, drop_first=True)

indep_X=df2.drop('classification_yes', 1)
dep_Y=df2['classification_yes']

In [16]:
# Main code

k_selected = 3  # choose the number of features to select

acclin=[]
accsvm=[]
accsvmnl=[]
accdec=[]
accrf=[]

# Perform feature selection
indep_X_selected = selectkbest_feature_selection(indep_X, dep_Y, k_selected)

# Split the data
X_train, X_test, y_train, y_test = split_scalar(indep_X_selected, dep_Y)

# Train and evaluate models
r2_lin = Linear(X_train, y_train, X_test)
acclin.append(r2_lin)
r2_svm_linear = svm_linear(X_train, y_train, X_test)
accsvm.append(r2_svm_linear)
r2_svm_NL = svm_NL(X_train, y_train, X_test)
accsvmnl.append(r2_svm_NL)
r2_dec = Decision(X_train, y_train, X_test)
accdec.append(r2_dec)
r2_rf = random(X_train, y_train, X_test)
accrf.append(r2_rf)
# Print or use the r2 values as needed
print("Linear Regression R2:", r2_lin)
print("SVM Linear Kernel R2:", r2_svm_linear)
print("SVM Nonlinear Kernel R2:", r2_svm_NL)
print("Decision Tree R2:", r2_dec)
print("Random Forest R2:", r2_rf)

Linear Regression R2: 0.5744558189911728
SVM Linear Kernel R2: 0.5512143372309233
SVM Nonlinear Kernel R2: 0.7326073437304528
Decision Tree R2: 0.5693962543624705
Random Forest R2: 0.7199034910854167


In [17]:
result=selectk_Regression(acclin,accsvm,accsvmnl,accdec,accrf)

result


Unnamed: 0,Linear,SVMl,SVMnl,Decision,Random
f_regression,0.574456,0.551214,0.732607,0.569396,0.719903
