In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import chi2, RFE, SelectKBest
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
import pickle
from sklearn.metrics import r2_score

In [2]:
#feature Selection
def selectkbest(Indep,dep,n):
    best = SelectKBest()
    fit1=best.fit(Indep,dep)
    feature=fit1.transform(Indep)
    return feature
    

In [3]:
#train_test split
def train_test(Indep, dep):
    x_train,x_test,y_train,y_test= train_test_split(Indep, dep, test_size=0.30, random_state=0)
    sc = StandardScaler()
    x_train=sc.fit_transform(x_train)
    x_test=sc.transform(x_test)
    return x_train,x_test, y_train,y_test 

In [4]:
#r2 score
def r2s(regressor,x_test,y_test):
    y_pred=regressor.predict(x_test)
    r2=r2_score(y_test,y_pred)
    return r2

In [5]:
#Linear Regression Model
def linear(x_train,y_train,x_test):
    regressor = LinearRegression()
    regressor.fit(x_train,y_train)
    r2=r2s(regressor,x_test,y_test)
    return r2

In [6]:
#RandomForest Model
def RF(x_train,y_train,x_test):
    regressor=RandomForestRegressor()
    regressor.fit(x_train,y_train)
    r2=r2s(regressor,x_test,y_test)
    return r2

In [7]:
#Decision Tree Model
def DT(x_train,y_train,x_test):
    regressor=DecisionTreeRegressor()
    regressor.fit(x_train,y_train)
    r2=r2s(regressor,x_test,y_test)
    return r2

In [8]:
#SVR Model
def SVRM(x_train,y_train,x_test):
    regressor=SVR()
    regressor.fit(x_train,y_train)
    r2=r2s(regressor,x_test,y_test)
    return r2

In [9]:
#Table Creation
def selectk_regression(linearm,rfm,dtm,svrm):
    dataframe = pd.DataFrame(index=['2','3','4','5','6','7','8'],columns=[ 'Linear', 'RandomForest', 'DecisionTree', 'SVR'])
    for num, idex in enumerate(dataframe.index):
        dataframe.loc[idex, 'Linear']=linearm[num]
        dataframe.loc[idex, 'RandomForest']=rfm[num]
        dataframe.loc[idex,'DecisionTree']=dtm[num]
        dataframe.loc[idex,'SVR']=svrm[num]
    return dataframe

In [10]:
dataset= pd.read_csv("prep.csv")
ds=dataset

In [11]:
ds= pd.get_dummies(ds, drop_first=True)


In [12]:
Indep=ds.drop('classification_yes', axis=1)
dep=ds['classification_yes']


In [13]:
for n in range(2,9):
    kbest = selectkbest(Indep,dep,n)

In [14]:
linearm=[]
rfm=[]
dtm=[]
svrm=[]

In [15]:
x_train,x_test, y_train,y_test= train_test(kbest, dep)
for i in kbest:

    r2_lin= linear(x_train,y_train,x_test)
    linearm.append(r2_lin)

    r2_rf= RF(x_train,y_train,x_test)
    rfm.append(r2_rf)

    r2_dt= DT(x_train,y_train,x_test)
    dtm.append(r2_dt)

    r2_svr= SVRM(x_train,y_train,x_test)
    svrm.append(r2_svr)

result = selectk_regression(linearm,rfm,dtm,svrm)

In [16]:
print("KBest_Chi2")
result

KBest_Chi2


Unnamed: 0,Linear,RandomForest,DecisionTree,SVR
2,0.670275,0.862596,0.751111,0.91814
3,0.670275,0.860668,0.751111,0.91814
4,0.670275,0.863687,0.751111,0.91814
5,0.670275,0.855392,0.786667,0.91814
6,0.670275,0.869728,0.751111,0.91814
7,0.670275,0.860864,0.751111,0.91814
8,0.670275,0.868277,0.786667,0.91814
