In [None]:
# Import libraries
import pandas as pd
import numpy as np

from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.preprocessing import MinMaxScaler

from random import choice

In [None]:
# Pick a random seed
SEED = choice(range(1, 1000, 1))

In [None]:
# Load your dataset
source = "[SOURCE]"
df = pd.read_csv(source)
df.head()

In [None]:
# Clean the dataset by trimming of the first/last rows, stock labels and dates
dataset = df[1:]
dataset = dataset.iloc[:, 2:]
dataset = dataset[:-1]

# Drop any remaining NaN values and filter out infinities
dataset = dataset.dropna(axis=1)
dataset = dataset.replace([np.inf, -np.inf], 0)

# Print the dataset's head
dataset.head()

In [None]:
# Value mapping function for predicting the direction of price movements
def mapDirection(value):
    return 1 if value >= 0 else 0

In [None]:
# Create input and output sets
y = dataset["Return"]
X = dataset.drop(["Return"], axis=1)

# Noramise the inputs
scaler = MinMaxScaler()
X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

In [None]:
# Create a random training and test split (9:1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=SEED)

# Output test stats about the testing set
print(f"Test mean: {np.mean(y_test)}")
print(f"Test stdv: {np.std(y_test)}")

In [None]:
# Create the paramter grid for cross-validation
param_grid = {
    'n_estimators': [16, 32, 64, 128],
    'min_samples_split': [2, 5, 10]
}

# Type 1 = random forest, type 2 = GBM
model_type = 1

# Define our model of the desired type
if (model_type == 1):
    model = RandomForestRegressor(random_state=SEED)
    
else:
    model = GradientBoostingRegressor(random_state=SEED)

In [None]:
# Do cross-validation
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=2, verbose=2)
grid_search.fit(X_train, y_train)

In [None]:
# Print the best parameters
best_params = grid_search.best_params_
print(best_params)

In [None]:
# Print the results of all cross-validation folds
results = grid_search.cv_results_

for mae, params in zip(results["mean_test_score"], results["params"]):
    print(f"MAE: {mae}, Parameters: {params}")