In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from time import strftime, gmtime

In [2]:
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import linear_model
from sklearn.model_selection import GridSearchCV
from sklearn import neighbors
from sklearn import tree
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import LinearSVR
from sklearn.svm import SVR
from sklearn import neural_network
from sklearn.multioutput import MultiOutputRegressor

In [5]:
import xgboost as xgb

In [7]:
from sagemaker import experiments
from sagemaker.utils import unique_name_from_base

In [None]:
#para la familia se necesitan pequeños datos y que sea fácil de usar

In [None]:
import boto3

# Crear un cliente S3
s3 = boto3.client('s3')

# Definir el nombre del bucket y la ruta del archivo
bucket_name = 'datalatet01740327929864'
file_path = 'experiment_regression/cement_slump.csv'
# Descargar el archivo desde S3
with open('cement_slump.csv', 'wb') as f:
    s3.download_fileobj(bucket_name, file_path, f)

In [None]:
run_name = "randomforest-regression-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())

In [None]:
experiment_name = unique_name_from_base("Experimento01")

In [None]:
df = pd.read_csv('cement_slump.csv')
X = df.drop('Compressive Strength (28-day)(Mpa)',axis=1)
y = df['Compressive Strength (28-day)(Mpa)']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)

In [None]:
scaler = StandardScaler()

In [None]:
scaled_X_train = scaler.fit_transform(X_train)
scaled_X_test = scaler.transform(X_test)

# Linear Regression

# Entrenamiento

In [None]:
run_name = "linear-regression-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
with experiments.Run(experiment_name,run_name=run_name) as run :
    #entrenamiento
    linear_regr = linear_model.LinearRegression()
    linear_regr.fit(scaled_X_train,y_train)
    print(linear_regr.coef_.tolist())
    #verificacion con datos de train
    llr_y_predict=linear_regr.predict(scaled_X_train)
    llr_mae=mean_absolute_error(y_train,llr_y_predict)
    llr_mse=np.sqrt(mean_squared_error(y_train,llr_y_predict))
    llr_r2= r2_score(y_train,llr_y_predict)
    run.log_metric(name="train:mae",value=llr_mae)
    run.log_metric(name="train:mse",value=llr_mse) 
    run.log_metric(name="train:r2_score",value=llr_r2) 
    #verificacion con datos de test
    llr_y_predict=linear_regr.predict(scaled_X_test)
    llr_mae=mean_absolute_error(y_test,llr_y_predict)
    llr_mse=np.sqrt(mean_squared_error(y_test,llr_y_predict))
    llr_r2= r2_score(y_test,llr_y_predict)
    run.log_metric(name="test:mae",value=llr_mae)
    run.log_metric(name="test:mse",value=llr_mse)
    run.log_metric(name="train:r2_score",value=llr_r2) 

# Grid Search

In [None]:
run_name = "grid-linear-regression-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
with experiments.Run(experiment_name,run_name=run_name) as run :
    linear_param_grid= {
    'alpha': [0.0001, 0.001, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 
                10.0, 20, 50, 100, 500, 1000 ] 
    }
    ridge = Ridge()
    folds = 5
    llr_grid = GridSearchCV(estimator=ridge,param_grid=linear_param_grid,scoring='neg_mean_absolute_error',cv=folds,return_train_score=True,verbose=1)
    llr_grid.fit(X_train,y_train)
    print(llr_grid.best_params_)
    run.log_parameter(name="alpha",value=llr_grid.best_params_['alpha'])
    #evaluar con datos de train
    llr_grid_preds = llr_grid.predict(scaled_X_train)
    llr_mae=mean_absolute_error(y_train,llr_grid_preds)
    llr_mse=np.sqrt(mean_squared_error(y_train,llr_grid_preds))
    llr_r2= r2_score(y_train,llr_grid_preds)
    
    run.log_metric(name="train:mae",value=llr_mae)
    run.log_metric(name="train:mse",value=llr_mse) 
    run.log_metric(name="train:r2_score",value=llr_r2) 

    #verificacion con datos de test
    llr_grid_preds=llr_grid.predict(scaled_X_test)
    llr_mae=mean_absolute_error(y_test,llr_grid_preds)
    llr_mse=np.sqrt(mean_squared_error(y_test,llr_grid_preds))
    llr_r2= r2_score(y_test,llr_grid_preds)
    
    run.log_metric(name="test:mae",value=llr_mae)
    run.log_metric(name="test:mse",value=llr_mse)
    run.log_metric(name="train:r2_score",value=llr_r2) 

# Bayesian linear regression

# Entrenamiento

In [None]:
run_name = "bayesian-linear-regression-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
with experiments.Run(experiment_name,run_name=run_name) as run :
    #crea modelo
    bayesian_reg = linear_model.BayesianRidge(verbose=True)
    bayesian_params=bayesian_reg.get_params()
    run.log_parameter('alpha_1',bayesian_params['alpha_1'] )
    run.log_parameter('alpha_2',bayesian_params['alpha_2'] )   
    run.log_parameter('lambda_1',bayesian_params['lambda_1'] )
    run.log_parameter('lambda_2',bayesian_params['lambda_2'] ) 
    run.log_parameter('n_iter',bayesian_params['n_iter'] )    
    #entrenar modelo
    bayesian_reg.fit(scaled_X_train,y_train)   
    #verificar entrenamiento con datos de train
    bayesian_reg_y_predict=bayesian_reg.predict(scaled_X_train)
    bayeasian_mae=mean_absolute_error(y_train,bayesian_reg_y_predict)
    bayeasian_mse=np.sqrt(mean_squared_error(y_train,bayesian_reg_y_predict))
    bayeasian_r2=r2_score(y_train,bayesian_reg_y_predict)
    
    run.log_metric(name="train:mae",value=bayeasian_mae)
    run.log_metric(name="train:mse",value=bayeasian_mse)   
    run.log_metric(name="train:r2",value=bayeasian_r2)
    #verificar entrenamiento con datos de test
    bayesian_reg_y_predict=bayesian_reg.predict(scaled_X_test)
    bayeasian_mae=mean_absolute_error(y_test,bayesian_reg_y_predict)
    bayeasian_mse=np.sqrt(mean_squared_error(y_test,bayesian_reg_y_predict))
    bayeasian_r2=r2_score(y_test,bayesian_reg_y_predict)
    
    run.log_metric(name="test:mae",value=bayeasian_mae)
    run.log_metric(name="test:mse",value=bayeasian_mse)
    run.log_metric(name="test:r2",value=bayeasian_r2)

# Grid Search

In [None]:
run_name = "grid-bayesian-linear-regression-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
with experiments.Run(experiment_name,run_name=run_name) as run :
    bayesian_reg_param_grid= {
    "n_iter" : [1,10,50,150,300,600],
    "alpha_1" : [1e-3,1e-6,1e-9],
    "alpha_2" : [1e-3,1e-6,1e-9],
    "lambda_1" : [1e-3,1e-6,1e-9],
    "lambda_2" : [1e-3,1e-6,1e-9],
    "normalize" : [False,True]
    }
    bayesian_reg = linear_model.BayesianRidge()
    bayesian_grid = GridSearchCV(bayesian_reg,param_grid=bayesian_reg_param_grid)
    bayesian_grid.fit(scaled_X_train,y_train)
    print(bayesian_grid.best_params_)
    run.log_parameter(name="alpha_1",value=bayesian_grid.best_params_['alpha_1'])   
    run.log_parameter(name="alpha_2",value=bayesian_grid.best_params_['alpha_2'])   
    run.log_parameter(name="lambda_1",value=bayesian_grid.best_params_['lambda_1'])     
    run.log_parameter(name="lambda_2",value=bayesian_grid.best_params_['lambda_2'])        
    run.log_parameter(name="n_iter",value=bayesian_grid.best_params_['n_iter'])     

    #evaluar con datos de train
    bayesian_grid_preds = bayesian_grid.predict(scaled_X_train)
    bayeasian_mae=mean_absolute_error(y_train,bayesian_grid_preds)
    bayeasian_mse=np.sqrt(mean_squared_error(y_train,bayesian_grid_preds))
    bayeasian_r2=r2_score(y_train,bayesian_grid_preds)
    
    run.log_metric(name="train:mae",value=bayeasian_mae)
    run.log_metric(name="train:mse",value=bayeasian_mse)
    run.log_metric(name="train:r2",value=bayeasian_r2)
    #evaluar con datos de test
    bayesian_grid_preds = bayesian_grid.predict(scaled_X_test)
    bayeasian_mae=mean_absolute_error(y_test,bayesian_grid_preds)
    bayeasian_mse=np.sqrt(mean_squared_error(y_test,bayesian_grid_preds))
    bayeasian_mae=r2_score(y_test,bayesian_grid_preds)
    
    run.log_metric(name="test:mae",value=bayeasian_mae)
    run.log_metric(name="test:mse",value=bayeasian_mse)
    run.log_metric(name="test:r2",value=bayeasian_r2)

# K-Nearest Neighbors

# Entrenamiento

In [None]:
run_name = "knn-regression-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
with experiments.Run(experiment_name,run_name=run_name) as run :
    #entrenamiento
    knn_regression = neighbors.KNeighborsRegressor()
    knn_regression.fit(scaled_X_train, y_train)
    print(knn_regression.get_params())
    #verificacion con datos de train
    knn_y_predict=knn_regression.predict(scaled_X_train)
    knn_mae=mean_absolute_error(y_train,knn_y_predict)
    knn_mse=np.sqrt(mean_squared_error(y_train,knn_y_predict))
    knn_r2=r2_score(y_train,knn_y_predict)
    
    run.log_metric(name="train:mae",value=llr_mae)
    run.log_metric(name="train:mse",value=llr_mse)
    run.log_metric(name="train:r2",value=llr_r2)
    #verificacion con datos de test
    knn_y_predict=knn_regression.predict(scaled_X_test)
    knn_mae=mean_absolute_error(y_test,knn_y_predict)
    knn_mse=np.sqrt(mean_squared_error(y_test,knn_y_predict))
    knn_r2=r2_score(y_test,knn_y_predict)
    
    run.log_metric(name="test:mae",value=llr_mae)
    run.log_metric(name="test:mse",value=llr_mse)
    run.log_metric(name="test:r2",value=llr_r2)

# Grid Search

In [None]:
run_name = "grid-knn-regression-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
with experiments.Run(experiment_name,run_name=run_name) as run :
    knn_reg_param_grid= {
    "n_neighbors" : [1,2,3,5,7,10,25,50],
    "algorithm" : ["auto","ball_tree","kd_tree","brute"],
    "metric": ['euclidean','manhattan','l1','l2']
    }

    knn_grid = GridSearchCV(estimator=neighbors.KNeighborsRegressor(),param_grid=knn_reg_param_grid)
    knn_grid.fit(scaled_X_train,y_train)
    print(knn_grid.best_params_)
    run.log_parameter(name="n_neighbors",value=knn_grid.best_params_['n_neighbors'])
    run.log_parameter(name="metric",value=knn_grid.best_params_['metric'])
    run.log_parameter(name="algorithm",value=knn_grid.best_params_['algorithm'])     

    #evaluar con datos de train
    knn_grid_preds = knn_grid.predict(scaled_X_train)
    knn_mae=mean_absolute_error(y_train,knn_grid_preds)
    knn_mse=np.sqrt(mean_squared_error(y_train,knn_grid_preds))
    knn_r2=r2_score(y_train,knn_grid_preds)
    
    run.log_metric(name="train:mae",value=knn_mae)
    run.log_metric(name="train:mse",value=knn_mse)
    run.log_metric(name="train:r2",value=knn_r2)
    #evaluar con datos de test
    knn_grid_preds = knn_grid.predict(scaled_X_test)
    knn_mse=mean_absolute_error(y_test,knn_grid_preds)
    bayeasian_mse=np.sqrt(mean_squared_error(y_test,knn_grid_preds))
    knn_r2=r2_score(y_test,knn_grid_preds)
    
    run.log_metric(name="test:mae",value=knn_mae)
    run.log_metric(name="test:mse",value=knn_mse)
    run.log_metric(name="test:r2",value=knn_r2)

# Decision Tree

# Entrenamiento

In [None]:
run_name = "tree-regression-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
with experiments.Run(experiment_name,run_name=run_name) as run :
    #entrenamiento
    decision_tree_reg = tree.DecisionTreeRegressor()
    decision_tree_reg.fit(scaled_X_train, y_train)
    print(decision_tree_reg.get_params())
    #verificacion con datos de train
    decision_tree_reg_y_pred=decision_tree_reg.predict(scaled_X_train)
    tree_mae=mean_absolute_error(y_train,decision_tree_reg_y_pred)
    tree_mse=np.sqrt(mean_squared_error(y_train,decision_tree_reg_y_pred))
    tree_r2=r2_score(y_train,decision_tree_reg_y_pred)
    
    run.log_metric(name="train:mae",value=tree_mae)
    run.log_metric(name="train:mse",value=tree_mse) 
    run.log_metric(name="train:r2",value=tree_r2)
    #verificacion con datos de test
    decision_tree_reg_y_pred=decision_tree_reg.predict(scaled_X_test)
    tree_mae=mean_absolute_error(y_test,decision_tree_reg_y_pred)
    tree_mse=np.sqrt(mean_squared_error(y_test,decision_tree_reg_y_pred))
    tree_r2=r2_score(y_test,decision_tree_reg_y_pred)
    
    run.log_metric(name="test:mae",value=tree_mae)
    run.log_metric(name="test:mse",value=tree_mse)
    run.log_metric(name="test:r2",value=tree_r2)

# Grid Search

In [None]:
run_name = "grid-tree-regression-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
with experiments.Run(experiment_name,run_name=run_name) as run :
    decision_tree_reg_param_grid= {
    "max_features" : [ "auto", "sqrt", "log2"],
    "criterion" : ["mse", "friedman_mse"],
    }

    decision_tree_grid = GridSearchCV(estimator=tree.DecisionTreeRegressor(),param_grid=decision_tree_reg_param_grid)
    decision_tree_grid.fit(scaled_X_train,y_train)
    print(decision_tree_grid.best_params_)
    run.log_parameter(name="max_features",value=decision_tree_grid.best_params_['max_features'])
    run.log_parameter(name="criterion",value=decision_tree_grid.best_params_['criterion'])

    #evaluar con datos de train
    tree_grid_preds = decision_tree_grid.predict(scaled_X_train)
    tree_mae=mean_absolute_error(y_train,tree_grid_preds)
    tree_mse=np.sqrt(mean_squared_error(y_train,tree_grid_preds))
    tree_r2=r2_score(y_train,tree_grid_preds)
    
    run.log_metric(name="train:mae",value=tree_mae)
    run.log_metric(name="train:mse",value=tree_mse)
    run.log_metric(name="train:r2",value=tree_r2)
    #evaluar con datos de test
    tree_grid_preds = decision_tree_grid.predict(scaled_X_test)
    tree_mae=mean_absolute_error(y_test,tree_grid_preds)
    tree_mse=np.sqrt(mean_squared_error(y_test,tree_grid_preds))
    tree_r2=r2_score(y_test,tree_grid_preds)
    
    run.log_metric(name="test:mae",value=tree_mae)
    run.log_metric(name="test:mse",value=tree_mse)
    run.log_metric(name="test:r2",value=tree_r2)

# Random Forest

# Entrenamiento

In [None]:
run_name = "randomforest-regression-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
with experiments.Run(experiment_name,run_name=run_name) as run :
    #entrenamiento
    rforest_regression = RandomForestRegressor()
    rforest_regression.fit(scaled_X_train, y_train)
    print(rforest_regression.get_params())
    #verificacion con datos de train
    rforest_regression_y_pred=rforest_regression.predict(scaled_X_train)
    rforest_mae=mean_absolute_error(y_train,rforest_regression_y_pred)
    rforest_mse=np.sqrt(mean_squared_error(y_train,rforest_regression_y_pred))
    rforest_r2=r2_score(y_train,rforest_regression_y_pred)
    
    run.log_metric(name="train:mae",value=rforest_mae)
    run.log_metric(name="train:mse",value=rforest_mse) 
    run.log_metric(name="train:r2",value=rforest_r2) 
    #verificacion con datos de test
    rforest_regression_y_pred=rforest_regression.predict(scaled_X_test)
    rforest_mae=mean_absolute_error(y_test,rforest_regression_y_pred)
    rforest_mse=np.sqrt(mean_squared_error(y_test,rforest_regression_y_pred))
    rforest_r2=r2_score(y_test,rforest_regression_y_pred)
    
    run.log_metric(name="test:mae",value=rforest_mae)
    run.log_metric(name="test:mse",value=rforest_mse)
    run.log_metric(name="test:r2",value=rforest_r2)

# Grid Search

In [None]:
run_name = "grid-randomforest-regression-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
with experiments.Run(experiment_name,run_name=run_name) as run :
    rforest_param_grid= {
    'n_estimators': [200, 500],
    'max_features': ['auto', 'sqrt', 'log2'],
    'max_depth' : [4,5,6,7,8],
    'criterion' :['gini', 'entropy']
    }

    rforest_grid = GridSearchCV(estimator=RandomForestRegressor(), param_grid=rforest_param_grid, cv= 5)
    rforest_grid.fit(scaled_X_train,y_train)
    print(rforest_grid.best_params_)
    run.log_parameter(name="n_estimators",value=rforest_grid.best_params_['n_estimators'])
    run.log_parameter(name="max_features",value=rforest_grid.best_params_['max_features'])
    run.log_parameter(name="max_depth",value=rforest_grid.best_params_['max_depth'])
    run.log_parameter(name="criterion",value=rforest_grid.best_params_['criterion'])
    
    #evaluar con datos de train
    rforest_regression_grid_pred=rforest_grid.predict(scaled_X_train)
    rforest_mae=mean_absolute_error(y_train,rforest_regression_grid_pred)
    rforest_mse=np.sqrt(mean_squared_error(y_train,rforest_regression_grid_pred))
    rforest_r2=r2_score(y_train,rforest_regression_grid_pred)
    
    run.log_metric(name="train:mae",value=rforest_mae)
    run.log_metric(name="train:mse",value=rforest_mse)
    run.log_metric(name="train:r2",value=rforest_r2)
    #verificacion con datos de test
    rforest_regression_grid_pred=rforest_grid.predict(scaled_X_test)
    rforest_mae=mean_absolute_error(y_test,rforest_regression_grid_pred)
    rforest_mse=np.sqrt(mean_squared_error(y_test,rforest_regression_grid_pred))
    rforest_r2=r2_score(y_test,rforest_regression_grid_pred)
    
    run.log_metric(name="test:mae",value=rforest_mae)
    run.log_metric(name="test:mse",value=rforest_mse)
    run.log_metric(name="test:r2",value=rforest_r2)

# XGBoost

# Entrenamiento

In [None]:
run_name = "xgboost-regression-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
with experiments.Run(experiment_name,run_name=run_name) as run :
    #entrenamiento
    xgb_regression = xgb.XGBRegressor()
    xgb_regression.fit(scaled_X_train, y_train)
    print(xgb_regression.get_params())
    #verificacion con datos de train
    xgb_regression_y_pred=xgb_regression.predict(scaled_X_train)
    xgb_mae=mean_absolute_error(y_train,xgb_regression_y_pred)
    xgb_mse=np.sqrt(mean_squared_error(y_train,xgb_regression_y_pred))
    xgb_r2=r2_score(y_train,xgb_regression_y_pred)
    
    run.log_metric(name="train:mae",value=xgb_mae)
    run.log_metric(name="train:mse",value=xgb_mse)
    run.log_metric(name="train:r2",value=xgb_r2)
    #verificacion con datos de test
    xgb_regression_y_pred=xgb_regression.predict(scaled_X_test)
    xgb_mae=mean_absolute_error(y_test,xgb_regression_y_pred)
    xgb_mse=np.sqrt(mean_squared_error(y_test,rforest_regression_y_pred))
    xgb_r2=r2_score(y_test,xgb_regression_y_pred)
    
    run.log_metric(name="test:mae",value=xgb_mae)
    run.log_metric(name="test:mse",value=xgb_mse)
    run.log_metric(name="test:r2",value=xgb_r2)

# Grid Search

In [None]:
from sklearn.model_selection import GridSearchCV
import xgboost as xgb
    xgb_param_grid= {
        "learning_rate": (0.05, 0.10, 0.15),
        "max_depth": [ 3, 4, 5, 6, 8],
        "min_child_weight": [ 1, 3, 5, 7],
        "gamma":[ 0.0, 0.1, 0.2],
        "colsample_bytree":[ 0.3, 0.4],
    }


    xgb_grid = GridSearchCV(estimator=MultiOutputRegressor(xgb.XGBRegressor()),param_grid=xgb_param_grid,cv=3, scoring='neg_mean_squared_error', verbose=0, n_jobs=-1)
    xgb_grid.fit(scaled_X_train,y_train)
    print(xgb_grid.best_params_)
    run.log_parameter(name="learning_rate",value=xgb_grid.best_params_['learning_rate'])
    run.log_parameter(name="max_depth",value=xgb_grid.best_params_['max_depth'])
    run.log_parameter(name="min_child_weight",value=xgb_grid.best_params_['min_child_weight'])
    run.log_parameter(name="gamma",value=xgb_grid.best_params_['gamma'])
    run.log_parameter(name="colsample_bytree",value=xgb_grid.best_params_['colsample_bytree'])
    
    #evaluar con datos de train
    xgb_grid_pred=xgb_grid.predict(scaled_X_train)
    xgb_mae=mean_absolute_error(y_train,xgb_grid_pred)
    xgb_mse=np.sqrt(mean_squared_error(y_train,xgb_grid_pred))
    xgb_r2=r2_score(y_train,xgb_grid_pred)
    
    run.log_metric(name="train:mae",value=xgb_mae)
    run.log_metric(name="train:mse",value=xgb_mse)
    run.log_metric(name="train:r2",value=xgb_r2)
    #verificacion con datos de test
    xgb_grid_pred=xgb_grid.predict(scaled_X_test)
    xgb_mae=mean_absolute_error(y_test,xgb_grid_pred)
    xgb_mse=np.sqrt(mean_squared_error(y_test,xgb_grid_pred))
    xgb_r2=r2_score(y_test,xgb_grid_pred)
    
    run.log_metric(name="test:mae",value=xgb_mae)
    run.log_metric(name="test:mse",value=xgb_mse)
    run.log_metric(name="test:r2",value=xgb_r2)


# Linear Support Vector Machine

# Entrenamiento

In [None]:
run_name = "linearsvm-regression-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
with experiments.Run(experiment_name,run_name=run_name) as run :
    #entrenamiento
    svm_regression = LinearSVR()
    svm_regression.fit(scaled_X_train, y_train)
    print(svm_regression.get_params())
    #verificacion con datos de train
    svm_regression_y_pred=svm_regression.predict(scaled_X_train)
    svm_mae=mean_absolute_error(y_train,svm_regression_y_pred)
    svm_mse=np.sqrt(mean_squared_error(y_train,svm_regression_y_pred))
    svm_r2=r2_score(y_train,svm_regression_y_pred)
    
    run.log_metric(name="train:mae",value=svm_mae)
    run.log_metric(name="train:mse",value=svm_mse) 
    run.log_metric(name="train:r2",value=svm_r2)
    #verificacion con datos de test
    svm_regression_y_pred=svm_regression.predict(scaled_X_test)
    svm_mae=mean_absolute_error(y_test,svm_regression_y_pred)
    svm_mse=np.sqrt(mean_squared_error(y_test,svm_regression_y_pred))
    svm_r2=r2_score(y_test,svm_regression_y_pred)
    
    run.log_metric(name="test:mae",value=svm_mae)
    run.log_metric(name="test:mse",value=svm_mse)
    run.log_metric(name="test:r2",value=svm_r2)

# Grid Search

In [None]:
run_name = "grid-linearsvm-regression-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
with experiments.Run(experiment_name,run_name=run_name) as run :
    svm_param_grid= {
    'C':[0.001,0.01,0.1,0.5,1],
    'epsilon':[0,0.01,0.1,0.5,1,2]  
    }

    svm_grid = GridSearchCV(estimator=LinearSVR(),param_grid=svm_param_grid)
    svm_grid.fit(scaled_X_train,y_train)
    print(svm_grid.best_params_)
    run.log_parameter(name="C",value=svm_grid.best_params_['C'])
    run.log_parameter(name="epsilon",value=svm_grid.best_params_['epsilon'])
    #evaluar con datos de train
    svm_grid_preds = svm_grid.predict(scaled_X_train)
    svm_mae=mean_absolute_error(y_train,svm_grid_preds)
    svm_mse=np.sqrt(mean_squared_error(y_train,svm_grid_preds))
    svm_r2=r2_score(y_train,svm_grid_preds)
    
    run.log_metric(name="train:mae",value=svm_mae)
    run.log_metric(name="train:mse",value=svm_mse)
    run.log_metric(name="train:r2",value=svm_r2)
    #evaluar con datos de test
    svm_grid_preds = svm_grid.predict(scaled_X_test)
    svm_mae=mean_absolute_error(y_test,svm_grid_preds)
    svm_mse=np.sqrt(mean_squared_error(y_test,svm_grid_preds))
    svm_r2=r2_score(y_test,svm_grid_preds)
    
    run.log_metric(name="test:mae",value=svm_mae)
    run.log_metric(name="test:mse",value=svm_mse)
    run.log_metric(name="test:r2",value=svm_r2)

# Support Vector Machine

# Entrenamiento

In [None]:
run_name = "svm-regression-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
with experiments.Run(experiment_name,run_name=run_name) as run :
    #entrenamiento
    svm_regression = SVR()
    svm_regression.fit(scaled_X_train, y_train)
    print(svm_regression.get_params())
    #verificacion con datos de train
    svm_regression_y_pred=svm_regression.predict(scaled_X_train)
    svm_mae=mean_absolute_error(y_train,svm_regression_y_pred)
    svm_mse=np.sqrt(mean_squared_error(y_train,svm_regression_y_pred))
    svm_r2=r2_score(y_train,svm_regression_y_pred)
    
    run.log_metric(name="train:mae",value=svm_mae)
    run.log_metric(name="train:mse",value=svm_mse) 
    run.log_metric(name="train:r2",value=svm_r2)
    #verificacion con datos de test
    svm_regression_y_pred=svm_regression.predict(scaled_X_test)
    svm_mae=mean_absolute_error(y_test,svm_regression_y_pred)
    svm_mse=np.sqrt(mean_squared_error(y_test,svm_regression_y_pred))
    svm_r2=r2_score(y_test,svm_regression_y_pred)
    
    run.log_metric(name="test:mae",value=svm_mae)
    run.log_metric(name="test:mse",value=svm_mse)
    run.log_metric(name="test:r2",value=svm_r2)

# Grid Search

In [None]:
run_name = "grid-svm-regression-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
with experiments.Run(experiment_name,run_name=run_name) as run :
    svm_param_grid= {
    'C':[0.001,0.01,0.1,0.5,1],
    'kernel':['linear','rbf','poly'],  
    'gamma':['scale','auto'],
    'degree':[2,3,4],  
    'epsilon':[0,0.01,0.1,0.5,1,2]  
    }

    svm_grid = GridSearchCV(estimator=SVR(),param_grid=svm_param_grid)
    svm_grid.fit(scaled_X_train,y_train)
    print(svm_grid.best_params_)
    run.log_parameter(name="C",value=svm_grid.best_params_['C'])
    run.log_parameter(name="kernel",value=svm_grid.best_params_['kernel'])
    run.log_parameter(name="gamma",value=svm_grid.best_params_['gamma'])
    run.log_parameter(name="degree",value=svm_grid.best_params_['degree'])
    run.log_parameter(name="epsilon",value=svm_grid.best_params_['epsilon'])
    #evaluar con datos de train
    svm_grid_preds = svm_grid.predict(scaled_X_train)
    svm_mae=mean_absolute_error(y_train,svm_grid_preds)
    svm_mse=np.sqrt(mean_squared_error(y_train,svm_grid_preds))
    svm_r2=r2_score(y_train,svm_grid_preds)
    
    run.log_metric(name="train:mae",value=svm_mae)
    run.log_metric(name="train:mse",value=svm_mse)
    run.log_metric(name="train:r2",value=svm_r2)
    #evaluar con datos de test
    svm_grid_preds = svm_grid.predict(scaled_X_test)
    svm_mae=mean_absolute_error(y_test,svm_grid_preds)
    svm_mse=np.sqrt(mean_squared_error(y_test,svm_grid_preds))
    svm_r2=r2_score(y_test,svm_grid_preds)
    
    run.log_metric(name="test:mae",value=svm_mae)
    run.log_metric(name="test:mse",value=svm_mse)
    run.log_metric(name="test:r2",value=svm_r2)

# Artificial Neural Network

# Entrenamiento

In [None]:
run_name = "ann-regression-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
with experiments.Run(experiment_name,run_name=run_name) as run :
    #entrenamiento
    ann_regression = neural_network.MLPRegressor(max_iter=5000,verbose=False)
    ann_regression.fit(scaled_X_train, y_train)
    print(ann_regression.get_params())
    
    #verificacion con datos de train
    ann_regression_y_pred=ann_regression.predict(scaled_X_train)
    ann_mae=mean_absolute_error(y_train,ann_regression_y_pred)
    ann_mse=np.sqrt(mean_squared_error(y_train,ann_regression_y_pred))
    ann_r2=r2_score(y_train,ann_regression_y_pred)
    
    run.log_metric(name="train:mae",value=ann_mae)
    run.log_metric(name="train:mse",value=ann_mse) 
    run.log_metric(name="train:r2",value=ann_r2) 
    #verificacion con datos de test
    ann_regression_y_pred=ann_regression.predict(scaled_X_test)
    ann_mae=mean_absolute_error(y_test,ann_regression_y_pred)
    ann_mse=np.sqrt(mean_squared_error(y_test,ann_regression_y_pred))
    ann_r2=r2_score(y_test,ann_regression_y_pred)
    
    run.log_metric(name="test:mae",value=ann_mae)
    run.log_metric(name="test:mse",value=ann_mse)
    run.log_metric(name="test:r2",value=ann_r2)

# Grid Search