#### Regression Benchmark Models
1. LOWESS
2. Smothing Splines
3. Deep feed-forward Neural Networks

## LOWESS

Training Function

In [None]:
def get_LOESS(data_x,data_x_test,data_y):
    # Initializations #
    #-----------------#
    from scipy.interpolate import interp1d
    import statsmodels.api as sm
    
    # Coerce Data #
    #=============#
    # Training Data
    data_y_vec = np.array(data_y)
    data_x_vec = np.array(data_x).reshape(-1,)
    # Testing Data
    data_x_test_vec = np.array(data_x_test).reshape(-1,)
    
    # Train LOESS #
    #=============#
    LOESS = sm.nonparametric.lowess
    f_hat_LOESS = LOESS(data_y_vec,data_x_vec.reshape(-1,))
    LOESS_x = list(zip(*f_hat_LOESS))[0]
    f_hat_LOESS = list(zip(*f_hat_LOESS))[1]
    
    # Get LOESS Prediction(s) #
    #-------------------------#
    # Train
    f = interp1d(LOESS_x, f_hat_LOESS, bounds_error=False)
    LOESS_prediction_train = f(data_x_vec)
    LOESS_prediction_train = np.nan_to_num(LOESS_prediction_train)
    # Test
    LOESS_prediction_test = f(data_x_test_vec)
    LOESS_prediction_test = np.nan_to_num(LOESS_prediction_test)
    
    # Return LOESS Outputs
    return LOESS_prediction_train, LOESS_prediction_test

Get predictions from training function

In [None]:
LOESS_prediction_train, LOESS_prediction_test = get_LOESS(data_x,data_x_test,data_y)

## Smoothing Splines

Training Function

In [None]:
def get_smooting_splines(data_x,data_x_test,data_y):
    # Imports #
    #---------#
    import rpy2.robjects as robjects # Work directly from R (since smoothing splines packages is better)

    # Coercion #
    #----------#
    # Training Data
    data_y_vec = np.array(data_y)
    data_x_vec = np.array(data_x).reshape(-1,)
    # Testing Data
    data_x_test_vec = np.array(data_x_test).reshape(-1,)
    r_y = robjects.FloatVector(data_y_vec)
    r_x = robjects.FloatVector(data_x_vec)

    # Training #
    #----------#
    r_smooth_spline = robjects.r['smooth.spline'] #extract R function# run smoothing function
    spline1 = r_smooth_spline(x=r_x, y=r_y, spar=0.7)
    f_hat_smoothing_splines=np.array(robjects.r['predict'](spline1,robjects.FloatVector(data_x_vec)).rx2('y'))

    # Prediction #
    #------------#
    # Train
    f_hat_smoothing_splines_train=np.array(robjects.r['predict'](spline1,robjects.FloatVector(data_x_vec)).rx2('y'))
    # Test
    f_hat_smoothing_splines_test=np.array(robjects.r['predict'](spline1,robjects.FloatVector(data_x_test_vec)).rx2('y'))

    # Return Outputs
    return f_hat_smoothing_splines_train, f_hat_smoothing_splines_test

 Get Predictions from training function

In [None]:
f_hat_smoothing_splines_train, f_hat_smoothing_splines_test = get_smooting_splines(data_x,data_x_test,data_y)

# Kernel Regression

# Gradient-Boosted Regression Trees

In [1]:
def get_GBRF(X_train,X_test,y_train):

    # Run Random Forest Util
    rand_forest_model_grad_boosted = GradientBoostingRegressor()

    # Grid-Search CV
    Random_Forest_GridSearch = RandomizedSearchCV(estimator = rand_forest_model_grad_boosted,
                                                  n_iter=n_iter_trees,
                                                  cv=KFold(CV_folds, random_state=2020, shuffle=True),
                                                  param_distributions=Rand_Forest_Grid,
                                                  return_train_score=True,
                                                  random_state=2020,
                                                  verbose=10,
                                                  n_jobs=n_jobs)

    random_forest_trained = Random_Forest_GridSearch.fit(X_train,y_train)
    random_forest_trained = random_forest_trained.best_estimator_

    #--------------------------------------------------#
    # Write: Model, Results, and Best Hyper-Parameters #
    #--------------------------------------------------#

    # Save Model
    # pickle.dump(random_forest_trained, open('./outputs/models/Gradient_Boosted_Tree/Gradient_Boosted_Tree_Best.pkl','wb'))

    # Save Readings
    cur_path = os.path.expanduser('./outputs/tables/best_params_Gradient_Boosted_Tree.txt')
    with open(cur_path, "w") as f:
        f.write(str(Random_Forest_GridSearch.best_params_))

    best_params_table_tree = pd.DataFrame({'N Estimators': [Random_Forest_GridSearch.best_params_['n_estimators']],
                                        'Min Samples Leaf': [Random_Forest_GridSearch.best_params_['min_samples_leaf']],
                                        'Learning Rate': [Random_Forest_GridSearch.best_params_['learning_rate']],
                                        'Max Depth': [Random_Forest_GridSearch.best_params_['max_depth']],
                                        })
    
    # Count Number of Parameters in Random Forest Regressor
    N_tot_params_per_tree = [ (x[0].tree_.node_count)*random_forest_trained.n_features_ for x in random_forest_trained.estimators_]
    N_tot_params_in_forest = sum(N_tot_params_per_tree)
    best_params_table_tree['N_parameters'] = [N_tot_params_in_forest]
    # Write Best Parameter(s)
    best_params_table_tree.to_latex('./outputs/tables/Best_params_table_Gradient_Boosted_Tree.txt')
    #---------------------------------------------#
    
    # Generate Prediction(s) #
    #------------------------#
    y_train_hat_random_forest_Gradient_boosting = random_forest_trained.predict(X_train)
    y_test_hat_random_forest_Gradient_boosting = random_forest_trained.predict(X_test)
    
    # Return Predictions #
    #--------------------#
    return y_train_hat_random_forest_Gradient_boosting, y_test_hat_random_forest_Gradient_boosting, random_forest_trained

In [None]:
GBRF_y_hat_train, GBRF_y_hat_test, GBRF_model = get_GBRF(data_x,data_x_test,data_y)

# Feed-Forward Neural Network

In [None]:
ffNN_y_hat_train,ffNN_y_hat_test = build_ffNN(n_folds = CV_folds, 
                                             n_jobs = n_jobs, 
                                             n_iter = n_iter, 
                                             param_grid_in = param_grid_Vanilla_Nets, 
                                             X_train = data_x, 
                                             y_train = data_y,
                                             X_test = data_x_test)