In [237]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV , RandomizedSearchCV,train_test_split
from sklearn import metrics
from sklearn.model_selection import cross_validate
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# Random Forest 

In [228]:
def r_forest(X,Y):
    model=RandomForestRegressor()
    paramz={'criterion':('mse','mae'),'max_features':('auto','sqrt','log2')}
    
    grid_search = GridSearchCV(estimator = model, param_grid=paramz, scoring=('neg_mean_squared_error')
                               , cv=3, return_train_score=True, iid=False)
    GS_result=cross_validate(grid_search,X,Y,
                                cv=5,scoring=('neg_mean_squared_error'
                                              ,'neg_mean_absolute_error'
                                              ,'neg_median_absolute_error'),return_train_score=True)
    random_search=RandomizedSearchCV(estimator=model,param_distributions=paramz ,scoring=('neg_mean_squared_error')
                                ,cv=3,n_iter=5,return_train_score=True,iid=False)
    RS_result = cross_validate(random_search, x, y, cv=5, scoring=('neg_mean_absolute_error', 'neg_mean_squared_error', 
                                                              'neg_median_absolute_error'), return_train_score=True)
    return GS_result, RS_result

# KNieghbors

In [227]:
def kNeighbors(X,Y):
    model=KNeighborsRegressor()
    paramz={'n_neighbors':[5,10,15],'algorithm':('ball_tree','kd_tree','brute')}
    
    grid_search = GridSearchCV(estimator = model, param_grid=paramz, scoring=('neg_mean_squared_error')
                               , cv=3, return_train_score=True, iid=False)
    GS_result=cross_validate(grid_search,X,Y,
                                cv=5,scoring=('neg_mean_squared_error'
                                              ,'neg_mean_absolute_error'
                                              ,'neg_median_absolute_error'),return_train_score=True)
    random_search=RandomizedSearchCV(estimator=model,param_distributions=paramz ,scoring=('neg_mean_squared_error')
                                ,cv=3,n_iter=5,return_train_score=True,iid=False)
    RS_result = cross_validate(random_search, x, y, cv=5, scoring=('neg_mean_absolute_error', 'neg_mean_squared_error', 
                                                              'neg_median_absolute_error'), return_train_score=True)
    return GS_result, RS_result

# Linear Regressor

In [226]:
def l_r(X,Y):
    model = LinearRegression()
    paramz = {'fit_intercept' : [True, False],'normalize' : [True, False], 'copy_X' : [True, False] }
    
    grid_search = GridSearchCV(estimator = model, param_grid=paramz, scoring=('neg_mean_squared_error')
                               , cv=3, return_train_score=True, iid=False)
    GS_result=cross_validate(grid_search,X,Y,
                                cv=5,scoring=('neg_mean_squared_error'
                                              ,'neg_mean_absolute_error'
                                              ,'neg_median_absolute_error'),return_train_score=True)
    random_search=RandomizedSearchCV(estimator=model,param_distributions=paramz ,scoring=('neg_mean_squared_error')
                                ,cv=3,n_iter=5,return_train_score=True,iid=False)
    RS_result = cross_validate(random_search, x, y, cv=5, scoring=('neg_mean_absolute_error', 'neg_mean_squared_error', 
                                                              'neg_median_absolute_error'), return_train_score=True)
    return GS_result, RS_result

# Suport Vector Regressor

In [239]:
def svr(X,Y):
    model= SVR()
    paramz={'kernel':('linear', 'poly', 'rbf', 'sigmoid')}
    
    grid_search = GridSearchCV(estimator = model, param_grid=paramz, scoring=('neg_mean_squared_error')
                               , cv=3, return_train_score=True, iid=False)
    GS_result=cross_validate(grid_search,X,Y,
                                cv=5,scoring=('neg_mean_squared_error'
                                              ,'neg_mean_absolute_error'
                                              ,'neg_median_absolute_error'),return_train_score=True)
    random_search=RandomizedSearchCV(estimator=model,param_distributions=paramz ,scoring=('neg_mean_squared_error')
                                ,cv=3,n_iter=5,return_train_score=True,iid=False)
    RS_result = cross_validate(random_search, x, y, cv=5, scoring=('neg_mean_absolute_error', 'neg_mean_squared_error', 
                                                              'neg_median_absolute_error'), return_train_score=True)
    return GS_result, RS_result

# Decision Tree Regressor

In [247]:
def dtr(X,Y):
    model=DecisionTreeRegressor()
    paramz={'min_samples_split':[2,3,4]}
    
    grid_search = GridSearchCV(estimator = model, param_grid=paramz, scoring=('neg_mean_squared_error')
                               , cv=3, return_train_score=True, iid=False)
    GS_result=cross_validate(grid_search,X,Y,
                                cv=5,scoring=('neg_mean_squared_error'
                                              ,'neg_mean_absolute_error'
                                              ,'neg_median_absolute_error'),return_train_score=True)
    random_search=RandomizedSearchCV(estimator=model,param_distributions=paramz ,scoring=('neg_mean_squared_error')
                                ,cv=3,n_iter=5,return_train_score=True,iid=False)
    RS_result = cross_validate(random_search, x, y, cv=5, scoring=('neg_mean_absolute_error', 'neg_mean_squared_error', 
                                                              'neg_median_absolute_error'), return_train_score=True)
    return GS_result, RS_result
    

# Breast Cancer Dataframe

In [248]:
scaler=MinMaxScaler()
breastC=pd.read_csv("breastC.csv",header=None)
df=breastC.values
y=df[:,0]
scaler.fit(df)
df=scaler.transform(df)
x=df[:,1:]
r_forest(x,y)
l_r(x,y)
kNeighbors(x,y)
svr(x,y)
dtr(x,y)



({'fit_time': array([0.03886366, 0.03689981, 0.03793073, 0.06180167, 0.03889489]),
  'score_time': array([0.00099778, 0.00099897, 0.00099874, 0.00099659, 0.0009985 ]),
  'test_neg_mean_squared_error': array([-80.28205128, -40.33333333, -66.97435897, -54.47364672,
         -76.86842105]),
  'train_neg_mean_squared_error': array([-0.        , -0.        , -0.22903226, -0.65698925, -0.        ]),
  'test_neg_mean_absolute_error': array([-5.97435897, -3.87179487, -5.61538462, -4.59401709, -6.02631579]),
  'train_neg_mean_absolute_error': array([-0.        , -0.        , -0.18709677, -0.34193548, -0.        ]),
  'test_neg_median_absolute_error': array([-2., -1., -4., -1., -3.]),
  'train_neg_median_absolute_error': array([-0., -0., -0., -0., -0.])},
 {'fit_time': array([0.03889632, 0.03789759, 0.03889346, 0.03988457, 1.1509378 ]),
  'score_time': array([0.00099897, 0.00099826, 0.00099945, 0.00099993, 0.00303078]),
  'test_neg_mean_absolute_error': array([-4.02564103, -4.34615385, -5.576923

# CPU Computer Hardware 

In [250]:
machine=pd.read_csv("machine.csv",header=None)

machine_values=machine.values
machine_values=machine_values[:,2:9]
y=machine_values[:,-1]
scaler.fit(machine_values)
machine_values=scaler.transform(machine_values)
x=machine_values[:,:-1]

r_forest(x,y)
l_r(x,y)
kNeighbors(x,y)
svr(x,y)
dtr(x,y)



({'fit_time': array([0.01997137, 0.01798511, 0.01894927, 0.01791787, 0.01795435]),
  'score_time': array([0.0009644 , 0.00099993, 0.00099921, 0.00199652, 0.00099683]),
  'test_neg_mean_squared_error': array([ -7169.23611111,  -3284.11904762,  -2247.5       ,  -2975.46428571,
         -31545.34485095]),
  'train_neg_mean_squared_error': array([-281.29121756, -123.15668663, -116.24830339, -110.43692615,
         -293.81130952]),
  'test_neg_mean_absolute_error': array([-48.79761905, -26.97619048, -26.35714286, -42.07142857,
         -79.16666667]),
  'train_neg_mean_absolute_error': array([-6.70538922, -3.06786427, -2.81516966, -2.68942116, -8.66150794]),
  'test_neg_median_absolute_error': array([-21.75, -11.5 , -11.  , -30.5 , -20.  ]),
  'train_neg_median_absolute_error': array([-1.5, -0. , -0. , -0. , -2.5])},
 {'fit_time': array([0.01891327, 0.01798725, 0.01795197, 0.01994228, 0.01891804]),
  'score_time': array([0.00099707, 0.0009954 , 0.00099516, 0.00099921, 0.00099754]),
  'test_

# Crime Dataframe

In [251]:

communities=pd.read_csv("communities.csv",header=None)
coloana=[0,1,2,3,4]
communities.drop(communities.columns[coloana],axis=1,inplace=True)
communities = communities[communities != '?']
communities.dropna(axis=1,inplace=True)
com=communities.values
y=com[:,-1]
scaler.fit(com)
com=scaler.transform(com)
x=com[:,:-1]
r_forest(x,y)
l_r(x,y)
kNeighbors(x,y)
svr(x,y)
dtr(x,y)



({'fit_time': array([1.08932781, 1.04996562, 1.01183677, 1.01382446, 0.99587202]),
  'score_time': array([0.00199556, 0.00199676, 0.00099921, 0.00199294, 0.00199628]),
  'test_neg_mean_squared_error': array([-0.04312322, -0.04069436, -0.03715559, -0.0353401 , -0.04073266]),
  'train_neg_mean_squared_error': array([-4.81630094e-04, -3.02382445e-04, -7.36217346e-04, -7.42599947e-36,
         -9.86496071e-36]),
  'test_neg_mean_absolute_error': array([-0.13660401, -0.13513784, -0.12821637, -0.12328321, -0.13135678]),
  'train_neg_mean_absolute_error': array([-1.06750261e-02, -5.96865204e-03, -1.17784744e-02, -3.91536333e-19,
         -5.34764380e-19]),
  'test_neg_median_absolute_error': array([-0.07333333, -0.08      , -0.07333333, -0.07      , -0.08      ]),
  'train_neg_median_absolute_error': array([-0.005, -0.   , -0.005, -0.   , -0.   ])},
 {'fit_time': array([1.02230334, 0.9733479 , 1.0120523 , 1.04923248, 1.02987909]),
  'score_time': array([0.00099897, 0.00099683, 0.00199962, 0.0

# Boston Housing

In [252]:
housing=pd.read_csv("housing.csv",delimiter='\s+',header=None)
housing.head()
df=housing.values
y=df[:,-1]
scaler.fit(df)
df=scaler.transform(df)
x=df[:,:-1]
r_forest(x,y)
l_r(x,y)
kNeighbors(x,y)
svr(x,y)
dtr(x,y)



({'fit_time': array([0.03889656, 0.04388189, 0.05385447, 0.04085159, 0.0409236 ]),
  'score_time': array([0.00199509, 0.00199604, 0.0009985 , 0.00099874, 0.00096536]),
  'test_neg_mean_squared_error': array([-11.96352941, -53.88336634, -28.4230253 , -51.36963696,
         -75.49544554]),
  'train_neg_mean_squared_error': array([-0.        , -0.        , -0.37389712, -0.55255144, -0.        ]),
  'test_neg_mean_absolute_error': array([-2.7372549 , -4.45148515, -3.91551155, -4.24257426, -5.07722772]),
  'train_neg_mean_absolute_error': array([-0.        , -0.        , -0.37950617, -0.40329218, -0.        ]),
  'test_neg_median_absolute_error': array([-2.2       , -2.5       , -2.96666667, -2.55      , -3.2       ]),
  'train_neg_median_absolute_error': array([-0. , -0. , -0.2, -0.2, -0. ])},
 {'fit_time': array([0.0398922 , 0.04089212, 0.0458777 , 0.04107237, 0.04487848]),
  'score_time': array([0.0009985 , 0.00103426, 0.00099492, 0.00099802, 0.00099945]),
  'test_neg_mean_absolute_error