In [1]:
# importando pandas, numpy y matplotlib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_boston
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import VotingRegressor
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix,r2_score
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import GridSearchCV

In [2]:
# importando los datasets de sklearn
boston = datasets.load_boston()
boston_df = pd.DataFrame(boston.data, columns=boston.feature_names)
boston_df['TARGET'] = boston.target
#boston_df.head() # estructura de nuestro dataset.
df = boston_df
df.describe()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,TARGET
count,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0
mean,3.613524,11.363636,11.136779,0.06917,0.554695,6.284634,68.574901,3.795043,9.549407,408.237154,18.455534,356.674032,12.653063,22.532806
std,8.601545,23.322453,6.860353,0.253994,0.115878,0.702617,28.148861,2.10571,8.707259,168.537116,2.164946,91.294864,7.141062,9.197104
min,0.00632,0.0,0.46,0.0,0.385,3.561,2.9,1.1296,1.0,187.0,12.6,0.32,1.73,5.0
25%,0.082045,0.0,5.19,0.0,0.449,5.8855,45.025,2.100175,4.0,279.0,17.4,375.3775,6.95,17.025
50%,0.25651,0.0,9.69,0.0,0.538,6.2085,77.5,3.20745,5.0,330.0,19.05,391.44,11.36,21.2
75%,3.677083,12.5,18.1,0.0,0.624,6.6235,94.075,5.188425,24.0,666.0,20.2,396.225,16.955,25.0
max,88.9762,100.0,27.74,1.0,0.871,8.78,100.0,12.1265,24.0,711.0,22.0,396.9,37.97,50.0


In [3]:
HOW_SCALE = "minmax"  # standard|minmax  como re-escalar los atributos

In [4]:
X = df.drop("TARGET", axis=1).to_numpy()
y = df["TARGET"].to_numpy()

In [5]:
if HOW_SCALE == "standard":
    scaler=StandardScaler()
    X = scaler.fit_transform(X)
elif HOW_SCALE == "minmax":
    scaler = MinMaxScaler()
    X = scaler.fit_transform(X)

In [6]:
x_train,x_test,y_train,y_test=train_test_split(X, y, test_size=0.20)



# Buscando parametros para los modelos

In [7]:
# Training classifiers
reg1 = GradientBoostingRegressor()
reg2 = RandomForestRegressor()
reg3 = LinearRegression()


## Parametros para GradientBoostingRegressor

In [8]:
#create a dictionary of all values we want to test for n_neighbors
params_gradient = {'n_estimators': np.arange(1, 100)}
#use gridsearch to test all values for n_neighbors
knn_gs = GridSearchCV(reg1, params_gradient, cv=5,n_jobs=-1)
#fit model to training data
knn_gs.fit(x_train, y_train)

GridSearchCV(cv=5, error_score=nan,
             estimator=GradientBoostingRegressor(alpha=0.9, ccp_alpha=0.0,
                                                 criterion='friedman_mse',
                                                 init=None, learning_rate=0.1,
                                                 loss='ls', max_depth=3,
                                                 max_features=None,
                                                 max_leaf_nodes=None,
                                                 min_impurity_decrease=0.0,
                                                 min_impurity_split=None,
                                                 min_samples_leaf=1,
                                                 min_samples_split=2,
                                                 min_weight_fraction_leaf=0.0,
                                                 n_estimators=100,
                                                 n_iter_n...
             param_grid={'n_

In [9]:
#save best model
knn_best = knn_gs.best_estimator_
#check best n_neigbors value
print(knn_gs.best_params_)

{'n_estimators': 74}


## Parametros para RandomForestRegressor

In [10]:
#create a dictionary of all values we want to test for n_neighbors
params_gradient = {'n_estimators': np.arange(1, 100)}
#use gridsearch to test all values for n_neighbors
knn_gs = GridSearchCV(reg2, params_gradient, cv=5,n_jobs=-1)
#fit model to training data
knn_gs.fit(x_train, y_train)

GridSearchCV(cv=5, error_score=nan,
             estimator=RandomForestRegressor(bootstrap=True, ccp_alpha=0.0,
                                             criterion='mse', max_depth=None,
                                             max_features='auto',
                                             max_leaf_nodes=None,
                                             max_samples=None,
                                             min_impurity_decrease=0.0,
                                             min_impurity_split=None,
                                             min_samples_leaf=1,
                                             min_samples_split=2,
                                             min_weight_fraction_leaf=0.0,
                                             n_estimators=100, n_jobs=None,
                                             oob_score=False, rand...
             param_grid={'n_estimators': array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
     

In [11]:
#save best model
knn_best = knn_gs.best_estimator_
#check best n_neigbors value
print(knn_gs.best_params_)

{'n_estimators': 55}


# Creando la votación

In [12]:
reg1 = GradientBoostingRegressor(n_estimators= 98)
reg2 = RandomForestRegressor(n_estimators= 49)
reg3 = LinearRegression()

In [13]:
ereg = VotingRegressor(estimators=[('gb', reg1), ('rf', reg2), ('lr', reg3)])
ereg = ereg.fit(x_train, y_train)

# Predicción con votación

## Score

In [14]:
prediccion=ereg.predict(x_test)
ereg.score(x_test, prediccion)

1.0

## Accuracy

In [15]:
print("Accuracy Votacion: {:.3f}".format(r2_score(y_test,prediccion,)))

Accuracy Votacion: 0.852


# Prediccion individual

In [16]:
individual1 = GradientBoostingRegressor(n_estimators= 98)
individual2 = RandomForestRegressor(n_estimators= 80)
individual3 = LinearRegression()

In [17]:
individual1.fit(x_train,y_train)
individual2.fit(x_train,y_train)
individual3.fit(x_train,y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

## GradientBoostingRegressor

In [18]:
y_pred = individual1.predict(x_test)
individual1.score(x_test, y_test)

0.8017471206264777

In [19]:
print("R2: {:.3f}".format(r2_score(y_test,y_pred)))

R2: 0.802


## Random Forest

In [20]:
y_pred = individual2.predict(x_test)
individual2.score(x_test, y_test)

0.8530463124215992

In [21]:
print("R2: {:.3f}".format(r2_score(y_test,y_pred)))

R2: 0.853


## LinearRegression

In [22]:
y_pred = individual3.predict(x_test)
individual3.score(x_test, y_test)

0.7751540037844519

In [23]:
print("R2: {:.3f}".format(r2_score(y_test,y_pred)))

R2: 0.775
