In [237]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV , RandomizedSearchCV,train_test_split
from sklearn import metrics
from sklearn.model_selection import cross_validate
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# Random Forest 

In [228]:
def r_forest(X,Y):
    model=RandomForestRegressor()
    paramz={'criterion':('mse','mae'),'max_features':('auto','sqrt','log2')}
    
    grid_search = GridSearchCV(estimator = model, param_grid=paramz, scoring=('neg_mean_squared_error')
                               , cv=3, return_train_score=True, iid=False)
    GS_result=cross_validate(grid_search,X,Y,
                                cv=5,scoring=('neg_mean_squared_error'
                                              ,'neg_mean_absolute_error'
                                              ,'neg_median_absolute_error'),return_train_score=True)
    random_search=RandomizedSearchCV(estimator=model,param_distributions=paramz ,scoring=('neg_mean_squared_error')
                                ,cv=3,n_iter=5,return_train_score=True,iid=False)
    RS_result = cross_validate(random_search, x, y, cv=5, scoring=('neg_mean_absolute_error', 'neg_mean_squared_error', 
                                                              'neg_median_absolute_error'), return_train_score=True)
    return GS_result, RS_result

# KNieghbors

In [227]:
def kNeighbors(X,Y):
    model=KNeighborsRegressor()
    paramz={'n_neighbors':[5,10,15],'algorithm':('ball_tree','kd_tree','brute')}
    
    grid_search = GridSearchCV(estimator = model, param_grid=paramz, scoring=('neg_mean_squared_error')
                               , cv=3, return_train_score=True, iid=False)
    GS_result=cross_validate(grid_search,X,Y,
                                cv=5,scoring=('neg_mean_squared_error'
                                              ,'neg_mean_absolute_error'
                                              ,'neg_median_absolute_error'),return_train_score=True)
    random_search=RandomizedSearchCV(estimator=model,param_distributions=paramz ,scoring=('neg_mean_squared_error')
                                ,cv=3,n_iter=5,return_train_score=True,iid=False)
    RS_result = cross_validate(random_search, x, y, cv=5, scoring=('neg_mean_absolute_error', 'neg_mean_squared_error', 
                                                              'neg_median_absolute_error'), return_train_score=True)
    return GS_result, RS_result

# Linear Regressor

In [226]:
def l_r(X,Y):
    model = LinearRegression()
    paramz = {'fit_intercept' : [True, False],'normalize' : [True, False], 'copy_X' : [True, False] }
    
    grid_search = GridSearchCV(estimator = model, param_grid=paramz, scoring=('neg_mean_squared_error')
                               , cv=3, return_train_score=True, iid=False)
    GS_result=cross_validate(grid_search,X,Y,
                                cv=5,scoring=('neg_mean_squared_error'
                                              ,'neg_mean_absolute_error'
                                              ,'neg_median_absolute_error'),return_train_score=True)
    random_search=RandomizedSearchCV(estimator=model,param_distributions=paramz ,scoring=('neg_mean_squared_error')
                                ,cv=3,n_iter=5,return_train_score=True,iid=False)
    RS_result = cross_validate(random_search, x, y, cv=5, scoring=('neg_mean_absolute_error', 'neg_mean_squared_error', 
                                                              'neg_median_absolute_error'), return_train_score=True)
    return GS_result, RS_result

# Suport Vector Regressor

In [239]:
def svr(X,Y):
    model= SVR()
    paramz={'kernel':('linear', 'poly', 'rbf', 'sigmoid')}
    
    grid_search = GridSearchCV(estimator = model, param_grid=paramz, scoring=('neg_mean_squared_error')
                               , cv=3, return_train_score=True, iid=False)
    GS_result=cross_validate(grid_search,X,Y,
                                cv=5,scoring=('neg_mean_squared_error'
                                              ,'neg_mean_absolute_error'
                                              ,'neg_median_absolute_error'),return_train_score=True)
    random_search=RandomizedSearchCV(estimator=model,param_distributions=paramz ,scoring=('neg_mean_squared_error')
                                ,cv=3,n_iter=5,return_train_score=True,iid=False)
    RS_result = cross_validate(random_search, x, y, cv=5, scoring=('neg_mean_absolute_error', 'neg_mean_squared_error', 
                                                              'neg_median_absolute_error'), return_train_score=True)
    return GS_result, RS_result

# Decision Tree Regressor

In [247]:
def dtr(X,Y):
    model=DecisionTreeRegressor()
    paramz={'min_samples_split':[2,3,4]}
    
    grid_search = GridSearchCV(estimator = model, param_grid=paramz, scoring=('neg_mean_squared_error')
                               , cv=3, return_train_score=True, iid=False)
    GS_result=cross_validate(grid_search,X,Y,
                                cv=5,scoring=('neg_mean_squared_error'
                                              ,'neg_mean_absolute_error'
                                              ,'neg_median_absolute_error'),return_train_score=True)
    random_search=RandomizedSearchCV(estimator=model,param_distributions=paramz ,scoring=('neg_mean_squared_error')
                                ,cv=3,n_iter=5,return_train_score=True,iid=False)
    RS_result = cross_validate(random_search, x, y, cv=5, scoring=('neg_mean_absolute_error', 'neg_mean_squared_error', 
                                                              'neg_median_absolute_error'), return_train_score=True)
    return GS_result, RS_result
    

# Breast Cancer Dataframe

In [248]:
scaler=MinMaxScaler()
breastC=pd.read_csv("breastC.csv",header=None)
df=breastC.values
y=df[:,0]
scaler.fit(df)
df=scaler.transform(df)
x=df[:,1:]
r_forest(x,y)
l_r(x,y)
kNeighbors(x,y)
svr(x,y)
dtr(x,y)



({'fit_time': array([0.03886366, 0.03689981, 0.03793073, 0.06180167, 0.03889489]),
  'score_time': array([0.00099778, 0.00099897, 0.00099874, 0.00099659, 0.0009985 ]),
  'test_neg_mean_squared_error': array([-80.28205128, -40.33333333, -66.97435897, -54.47364672,
         -76.86842105]),
  'train_neg_mean_squared_error': array([-0.        , -0.        , -0.22903226, -0.65698925, -0.        ]),
  'test_neg_mean_absolute_error': array([-5.97435897, -3.87179487, -5.61538462, -4.59401709, -6.02631579]),
  'train_neg_mean_absolute_error': array([-0.        , -0.        , -0.18709677, -0.34193548, -0.        ]),
  'test_neg_median_absolute_error': array([-2., -1., -4., -1., -3.]),
  'train_neg_median_absolute_error': array([-0., -0., -0., -0., -0.])},
 {'fit_time': array([0.03889632, 0.03789759, 0.03889346, 0.03988457, 1.1509378 ]),
  'score_time': array([0.00099897, 0.00099826, 0.00099945, 0.00099993, 0.00303078]),
  'test_neg_mean_absolute_error': array([-4.02564103, -4.34615385, -5.576923

# CPU Computer Hardware 

In [49]:
machine=pd.read_csv("machine.csv",header=None)

machine_values=machine.values
machine_values=machine_values[:,2:9]
print(machine_values)
machine.head()

[[125 256 6000 ... 16 128 198]
 [29 8000 32000 ... 8 32 269]
 [29 8000 32000 ... 8 32 220]
 ...
 [125 2000 8000 ... 2 14 52]
 [480 512 8000 ... 0 0 67]
 [480 1000 4000 ... 0 0 45]]


Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,adviser,32/60,125,256,6000,256,16,128,198,199
1,amdahl,470v/7,29,8000,32000,32,8,32,269,253
2,amdahl,470v/7a,29,8000,32000,32,8,32,220,253
3,amdahl,470v/7b,29,8000,32000,32,8,32,172,253
4,amdahl,470v/7c,29,8000,16000,32,8,16,132,132


# Crime Dataframe

In [147]:

communities=pd.read_csv("communities.csv",header=None)
coloana=[0,1,2,3,4]
communities.drop(communities.columns[coloana],axis=1,inplace=True)
communities = communities[communities != '?']
communities.dropna(axis=1,inplace=True)
communities.head()

Unnamed: 0,5,6,7,8,9,10,11,12,13,14,...,96,97,98,99,100,118,119,120,125,127
0,0.19,0.33,0.02,0.9,0.12,0.17,0.34,0.47,0.29,0.32,...,0.12,0.42,0.5,0.51,0.64,0.12,0.26,0.2,0.32,0.2
1,0.0,0.16,0.12,0.74,0.45,0.07,0.26,0.59,0.35,0.27,...,0.21,0.5,0.34,0.6,0.52,0.02,0.12,0.45,0.0,0.67
2,0.0,0.42,0.49,0.56,0.17,0.04,0.39,0.47,0.28,0.32,...,0.14,0.49,0.54,0.67,0.56,0.01,0.21,0.02,0.0,0.43
3,0.04,0.77,1.0,0.08,0.12,0.1,0.51,0.5,0.34,0.21,...,0.19,0.3,0.73,0.64,0.65,0.02,0.39,0.28,0.0,0.12
4,0.01,0.55,0.02,0.95,0.09,0.05,0.38,0.38,0.23,0.36,...,0.11,0.72,0.64,0.61,0.53,0.04,0.09,0.02,0.0,0.03


# Boston Housing

In [163]:
housing=pd.read_csv("housing.csv",delimiter='\s+',header=None)
housing.head()
df=housing.values
y=df[:,-1]
scaler.fit(df)
df=scaler.transform(df)
x=df[:,:-1]
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=1/3)

[24.  21.6 34.7 33.4 36.2 28.7 22.9 27.1 16.5 18.9 15.  18.9 21.7 20.4
 18.2 19.9 23.1 17.5 20.2 18.2 13.6 19.6 15.2 14.5 15.6 13.9 16.6 14.8
 18.4 21.  12.7 14.5 13.2 13.1 13.5 18.9 20.  21.  24.7 30.8 34.9 26.6
 25.3 24.7 21.2 19.3 20.  16.6 14.4 19.4 19.7 20.5 25.  23.4 18.9 35.4
 24.7 31.6 23.3 19.6 18.7 16.  22.2 25.  33.  23.5 19.4 22.  17.4 20.9
 24.2 21.7 22.8 23.4 24.1 21.4 20.  20.8 21.2 20.3 28.  23.9 24.8 22.9
 23.9 26.6 22.5 22.2 23.6 28.7 22.6 22.  22.9 25.  20.6 28.4 21.4 38.7
 43.8 33.2 27.5 26.5 18.6 19.3 20.1 19.5 19.5 20.4 19.8 19.4 21.7 22.8
 18.8 18.7 18.5 18.3 21.2 19.2 20.4 19.3 22.  20.3 20.5 17.3 18.8 21.4
 15.7 16.2 18.  14.3 19.2 19.6 23.  18.4 15.6 18.1 17.4 17.1 13.3 17.8
 14.  14.4 13.4 15.6 11.8 13.8 15.6 14.6 17.8 15.4 21.5 19.6 15.3 19.4
 17.  15.6 13.1 41.3 24.3 23.3 27.  50.  50.  50.  22.7 25.  50.  23.8
 23.8 22.3 17.4 19.1 23.1 23.6 22.6 29.4 23.2 24.6 29.9 37.2 39.8 36.2
 37.9 32.5 26.4 29.6 50.  32.  29.8 34.9 37.  30.5 36.4 31.1 29.1 50.
 33.3 3