In [1]:
import pandas as pd
import numpy as np
import time
import pickle
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.feature_selection import RFE
from sklearn.model_selection import train_test_split

In [3]:
def split_scalar(indep_X,dep_Y):
    X_train,X_test,y_train,y_test=train_test_split(indep_X,dep_Y,test_size=1/4,random_state=0)
    sc=StandardScaler()
    X_train=sc.fit_transform(X_train)
    X_test=sc.transform(X_test)
    return X_train,X_test,y_train,y_test


def r2_prediction(regressor,X_test,y_test):
    y_pred=regressor.predict(X_test)
    from sklearn.metrics import r2_score
    r2=r2_score(y_test,y_pred)
    return r2

def Linear(X_train,y_train,X_test):
    from sklearn.linear_model import LinearRegression
    regressor=LinearRegression()
    regressor.fit(X_train,y_train)
    r2=r2_prediction(regressor,X_test,y_test)
    return r2

def svm(X_train,y_train,X_test):
    
    from sklearn.svm import SVR
    from sklearn.model_selection import GridSearchCV
    param_grid={'kernel':['linear', 'poly', 'rbf', 'sigmoid'],'gamma':['scale','auto'],'C':[0.1,1,10,100]}
    regressor=GridSearchCV(SVR(),param_grid,refit=True,verbose=0,n_jobs=1)
    regressor.fit(X_train,y_train)
    print(f'r2_score for the best parameter of SVM Tree is : ',regressor.best_params_)
    r2=r2_prediction(regressor,X_test,y_test)
    return r2
    
def Decision(X_train,y_train,X_test):
    from sklearn.tree import DecisionTreeRegressor
    from sklearn.model_selection import GridSearchCV
    param_grid={'criterion':['squared_error', 'friedman_mse', 'absolute_error', 'poisson'],
           'splitter':['best', 'random'],
           'max_features':[ 'sqrt', 'log2']}
    regressor=GridSearchCV(DecisionTreeRegressor(),param_grid,refit=True,verbose=0,n_jobs=1)
    regressor.fit(X_train,y_train)
    r2=r2_prediction(regressor,X_test,y_test)
    return r2

def Random(X_train,y_train,X_test):
    from sklearn.ensemble import RandomForestRegressor
    from sklearn.model_selection import GridSearchCV
    param_grid={'criterion':['squared_error', 'absolute_error', 'friedman_mse', 'poisson'],
           'max_features':['sqrt', 'log2'],'n_estimators':[10,50,100,150]}
    regressor=GridSearchCV(RandomForestRegressor(),param_grid,refit=True,verbose=0,n_jobs=1)
    regressor.fit(X_train,y_train)
    r2=r2_prediction(regressor,X_test,y_test)
    return r2

def rfeFeature(indep_X, dep_Y, n):
    # Define param_grid for Linear Regression
    lin_param_grid = {}  
    
    # Define param_grid for SVM
    svm_param_grid = {'kernel': ['linear', 'poly', 'rbf', 'sigmoid'], 'gamma': ['scale', 'auto'], 'C': [0.1, 1, 10, 100]}
    
    # Define param_grid for Decision Tree
    decision_param_grid = {'criterion': ['squared_error', 'friedman_mse', 'absolute_error', 'poisson'],
                           'splitter': ['best', 'random'],
                           'max_features': ['sqrt', 'log2']}
    
    # Define param_grid for RandomForest
    random_param_grid = {'criterion': ['squared_error', 'absolute_error', 'friedman_mse', 'poisson'],
                         'max_features': ['sqrt', 'log2'], 'n_estimators': [10, 50, 100, 150]}
    
    rfelist = []
    
    from sklearn.linear_model import LinearRegression
    lin = LinearRegression()
    
    from sklearn.svm import SVR
    from sklearn.model_selection import GridSearchCV
    svr = GridSearchCV(SVR(), svm_param_grid, refit=True, verbose=0, n_jobs=1)

    from sklearn.tree import DecisionTreeRegressor
    dec = GridSearchCV(DecisionTreeRegressor(), decision_param_grid, refit=True, verbose=0, n_jobs=1)

    from sklearn.ensemble import RandomForestRegressor
    rf = GridSearchCV(RandomForestRegressor(), random_param_grid, refit=True, verbose=0, n_jobs=1)

    rfemodellist = [lin, svr, dec, rf]
    for i in rfemodellist:
        print(i)
        log_rfe = RFE(i, n_features_to_select=n)
        log_fit = log_rfe.fit(indep_X, dep_Y)
        log_rfe_feature = log_fit.transform(indep_X)
        rfelist.append(log_rfe_feature)
    return rfelist


    
def rfe_regression(acclog,accsvml,accdes,accrf): 
    
    rfedataframe=pd.DataFrame(index=['Linear','SVC','Random','DecisionTree'],columns=['Linear','SVM',
                                                                                        'Decision','Random'])

    for number,idex in enumerate(rfedataframe.index):
        
        rfedataframe['Linear'][idex]=acclog[number]       
        rfedataframe['SVM'][idex]=accsvm[number]
        rfedataframe['Decision'][idex]=accdes[number]
        rfedataframe['Random'][idex]=accrf[number]
    return rfedataframe

In [4]:
df=pd.read_csv('prep.csv',index_col=None)
df2=df
df2=pd.get_dummies(df2,drop_first=True)

indep_X=df2.drop('classification_yes',1)
dep_Y=df2['classification_yes']

In [None]:
rfelist=rfeFeature(indep_X,dep_Y,3)

acclin=[]
accsvm=[]
accdes=[]
accrf=[]

for i in rfelist:
    X_train,X_test,y_train,y_test=split_scalar(i,dep_Y)
    r2_lin=Linear(X_train,y_train,X_test)
    acclin.append(r2_lin)
    
    r2_sl=svm(X_train,y_test,X_test)
    accsvm.append(r2_sl)
    
    r2_d=Decision(X_train,y_test,X_test)
    accdes.append(r2_d)
    
    r2_r=Random(X_train,y_test,X_test)
    accrf.append(r2_r)
    

LinearRegression()
GridSearchCV(estimator=SVR(), n_jobs=1,
             param_grid={'C': [0.1, 1, 10, 100], 'gamma': ['scale', 'auto'],
                         'kernel': ['linear', 'poly', 'rbf', 'sigmoid']})
