# Random Forest Training

---

### Import Libraries and Dependencies

In [1]:
# Import libraries and dependencies
import pandas as pd
import numpy as np
from pathlib import Path
%matplotlib inline

# For time stamps
from datetime import datetime

import warnings
warnings.filterwarnings('ignore')

### Read in CSV as Pandas DataFrame

In [2]:
# Set path to CSV and read in CSV
csv_path = Path('trading_signals.csv')
trading_signals_df=pd.read_csv(csv_path)
trading_signals_df.head()

Unnamed: 0,Date,High,Low,Open,Close,Volume,Adj Close,daily_return,fast_close,slow_close,...,vol_trend_long,vol_trend_short,vol_trend_signal,bollinger_mid_band,bollinger_std,bollinger_upper_band,bollinger_lower_band,bollinger_long,bollinger_short,bollinger_signal
0,2011-01-18,12.312857,11.642857,11.768571,12.166071,1880998000.0,10.495069,,12.166071,12.166071,...,0.0,0.0,0.0,,,,,0.0,0.0,0.0
1,2011-01-19,12.45,12.031429,12.441071,12.101429,1135613000.0,10.439302,-0.005313,12.131513,12.133526,...,0.0,0.0,0.0,,,,,0.0,0.0,0.0
2,2011-01-20,12.082143,11.79,12.015357,11.881429,764789200.0,10.249522,-0.01818,12.036366,12.048326,...,1.0,0.0,1.0,,,,,0.0,0.0,0.0
3,2011-01-21,11.96,11.665357,11.920357,11.668571,754401200.0,10.065897,-0.017915,11.924512,11.951404,...,1.0,0.0,1.0,,,,,0.0,0.0,0.0
4,2011-01-24,12.051786,11.668571,11.673929,12.051786,574683200.0,10.396482,0.032842,11.957463,11.972041,...,0.0,-1.0,-1.0,,,,,0.0,0.0,0.0


### Set Index, Infer DateTimeFormat, and Drop Extraneous Columns

In [3]:
# Set index as datetime object and drop extraneous columns
trading_signals_df.set_index(pd.to_datetime(trading_signals_df['Date'], infer_datetime_format=True), inplace=True)
trading_signals_df.drop(columns=['Date'], inplace=True)
trading_signals_df.head()

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close,daily_return,fast_close,slow_close,crossover_long,...,vol_trend_long,vol_trend_short,vol_trend_signal,bollinger_mid_band,bollinger_std,bollinger_upper_band,bollinger_lower_band,bollinger_long,bollinger_short,bollinger_signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2011-01-18,12.312857,11.642857,11.768571,12.166071,1880998000.0,10.495069,,12.166071,12.166071,0.0,...,0.0,0.0,0.0,,,,,0.0,0.0,0.0
2011-01-19,12.45,12.031429,12.441071,12.101429,1135613000.0,10.439302,-0.005313,12.131513,12.133526,0.0,...,0.0,0.0,0.0,,,,,0.0,0.0,0.0
2011-01-20,12.082143,11.79,12.015357,11.881429,764789200.0,10.249522,-0.01818,12.036366,12.048326,0.0,...,1.0,0.0,1.0,,,,,0.0,0.0,0.0
2011-01-21,11.96,11.665357,11.920357,11.668571,754401200.0,10.065897,-0.017915,11.924512,11.951404,0.0,...,1.0,0.0,1.0,,,,,0.0,0.0,0.0
2011-01-24,12.051786,11.668571,11.673929,12.051786,574683200.0,10.396482,0.032842,11.957463,11.972041,0.0,...,0.0,-1.0,-1.0,,,,,0.0,0.0,0.0


### Set X-Variable List and Filter to Obtain Associated Values

In [4]:
# Set x variable list of features
x_var_list = ['crossover_signal', 'vol_trend_signal', 'bollinger_signal']

# Filter by x-variable list
trading_signals_df[x_var_list].tail()

Unnamed: 0_level_0,crossover_signal,vol_trend_signal,bollinger_signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-01-11,1.0,1.0,-1.0
2021-01-12,1.0,1.0,-1.0
2021-01-13,1.0,1.0,-1.0
2021-01-14,1.0,1.0,-1.0
2021-01-15,1.0,1.0,-1.0


### Shift the DataFrame Index by 1

In [5]:
# Shift DataFrame values by 1 (this is so we can have a predictor as to whether or not it's a buy or sell)
trading_signals_df[x_var_list] = trading_signals_df[x_var_list].shift(1)
trading_signals_df[x_var_list].tail()

Unnamed: 0_level_0,crossover_signal,vol_trend_signal,bollinger_signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-01-11,1.0,1.0,-1.0
2021-01-12,1.0,1.0,-1.0
2021-01-13,1.0,1.0,-1.0
2021-01-14,1.0,1.0,-1.0
2021-01-15,1.0,1.0,-1.0


### Drop NAs and Replace Infs (Positive/Negative Infinity) 

In [6]:
# Drop NAs and replace positive/negative infinity values - because of the moving averages
trading_signals_df.dropna(subset=x_var_list, inplace=True)
trading_signals_df.dropna(subset=['daily_return'], inplace=True)
trading_signals_df = trading_signals_df.replace([np.inf, -np.inf], np.nan)
trading_signals_df.head()

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close,daily_return,fast_close,slow_close,crossover_long,...,vol_trend_long,vol_trend_short,vol_trend_signal,bollinger_mid_band,bollinger_std,bollinger_upper_band,bollinger_lower_band,bollinger_long,bollinger_short,bollinger_signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2011-01-19,12.45,12.031429,12.441071,12.101429,1135613000.0,10.439302,-0.005313,12.131513,12.133526,0.0,...,0.0,0.0,0.0,,,,,0.0,0.0,0.0
2011-01-20,12.082143,11.79,12.015357,11.881429,764789200.0,10.249522,-0.01818,12.036366,12.048326,0.0,...,1.0,0.0,0.0,,,,,0.0,0.0,0.0
2011-01-21,11.96,11.665357,11.920357,11.668571,754401200.0,10.065897,-0.017915,11.924512,11.951404,0.0,...,1.0,0.0,1.0,,,,,0.0,0.0,0.0
2011-01-24,12.051786,11.668571,11.673929,12.051786,574683200.0,10.396482,0.032842,11.957463,11.972041,0.0,...,0.0,-1.0,1.0,,,,,0.0,0.0,0.0
2011-01-25,12.194286,11.948929,12.011786,12.192857,546868000.0,10.518175,0.011705,12.011422,12.010131,1.0,...,0.0,-1.0,-1.0,,,,,0.0,0.0,0.0


### Construct the Dependent Variable

In [7]:
# Construct the dependent variable where if daily return is greater than 0, then 1, else, 0.
trading_signals_df['Positive Return'] = np.where(trading_signals_df['daily_return'] > 0, 1.0, 0.0)
trading_signals_df.head(10)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close,daily_return,fast_close,slow_close,crossover_long,...,vol_trend_short,vol_trend_signal,bollinger_mid_band,bollinger_std,bollinger_upper_band,bollinger_lower_band,bollinger_long,bollinger_short,bollinger_signal,Positive Return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2011-01-19,12.45,12.031429,12.441071,12.101429,1135613000.0,10.439302,-0.005313,12.131513,12.133526,0.0,...,0.0,0.0,,,,,0.0,0.0,0.0,0.0
2011-01-20,12.082143,11.79,12.015357,11.881429,764789200.0,10.249522,-0.01818,12.036366,12.048326,0.0,...,0.0,0.0,,,,,0.0,0.0,0.0,0.0
2011-01-21,11.96,11.665357,11.920357,11.668571,754401200.0,10.065897,-0.017915,11.924512,11.951404,0.0,...,0.0,1.0,,,,,0.0,0.0,0.0,0.0
2011-01-24,12.051786,11.668571,11.673929,12.051786,574683200.0,10.396482,0.032842,11.957463,11.972041,0.0,...,-1.0,1.0,,,,,0.0,0.0,0.0,1.0
2011-01-25,12.194286,11.948929,12.011786,12.192857,546868000.0,10.518175,0.011705,12.011422,12.010131,1.0,...,-1.0,-1.0,,,,,0.0,0.0,0.0,1.0
2011-01-26,12.342857,12.196429,12.248571,12.280357,506875600.0,10.593657,0.007176,12.067476,12.050358,1.0,...,0.0,-1.0,,,,,0.0,0.0,0.0,1.0
2011-01-27,12.310357,12.243929,12.277857,12.2575,285026000.0,10.57394,-0.001861,12.104183,12.077525,1.0,...,0.0,1.0,,,,,0.0,0.0,0.0,0.0
2011-01-28,12.3,11.911786,12.291786,12.003571,592057200.0,10.354887,-0.020716,12.085912,12.068845,1.0,...,0.0,1.0,,,,,0.0,0.0,0.0,0.0
2011-01-31,12.144286,11.939286,11.992857,12.118571,377246800.0,10.454092,0.009581,12.091549,12.074133,1.0,...,0.0,1.0,,,,,0.0,0.0,0.0,1.0
2011-02-01,12.344643,12.177857,12.189286,12.3225,426633200.0,10.630013,0.016828,12.129762,12.098309,1.0,...,0.0,1.0,,,,,0.0,0.0,0.0,1.0


### Assign Training and Testing Windows

In [53]:
# Set the total and training years
years = round((len(trading_signals_df)) / 252)
training_years = round((len(trading_signals_df) * .75) / 252)

In [54]:
# Set up End and Start times for for training and testing windows

# end = datetime.now() --> To use if end date is today
end = datetime(2021,1,5)
start = datetime(end.year - years, end.month, end.day) # Number of years prior to end date

In [55]:
# Construct training start and end dates
training_start =datetime(start.year, start.month, start.day).strftime(format= '%Y-%m-%d')

training_end = datetime(start.year + training_years, start.month, start.day).strftime(format= '%Y-%m-%d')

In [56]:
# Construct testing start and end dates
testing_start =  datetime(start.year + training_years, start.month, start.day + 1).strftime(format= '%Y-%m-%d')

testing_end = datetime(end.year, end.month, end.day).strftime(format= '%Y-%m-%d')

In [57]:
# Print training and testing start/end dates
print(f"Training Start: {training_start}")
print(f"Training End: {training_end}")
print(f"Testing Start: {testing_start}")
print(f"Testing End: {testing_end}")

Training Start: 2011-01-05
Training End: 2018-01-05
Testing Start: 2018-01-06
Testing End: 2021-01-05


### Separate X and y Training Datasets

In [None]:
# Construct the X_train and y_train datasets
X_train = trading_signals_df[x_var_list][training_start:training_end]
y_train = trading_signals_df['Positive Return'][training_start:training_end]

X_train.tail()

In [None]:
import matplotlib.pyplot as plt # Plot to show the number of instances of each value (-1, 0 or 1) for each of the columns
for each_column in X_train.columns: 
    X_train[each_column].hist()
    plt.show()

In [None]:
y_train.tail()

### Separate X and y Testing Datasets

In [None]:
# Construct the X test and y test datasets
X_test = trading_signals_df[x_var_list][testing_start:testing_end]
y_test = trading_signals_df['Positive Return'][testing_start:testing_end]

X_test.tail()

In [None]:
y_test.tail()

### Import SKLearn Library and Classes

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification

### Train Random Forest Model 

In [None]:
# Fit a SKLearn linear regression using just the training set (X_train, Y_train)
# Initiate and fit model:
model = RandomForestClassifier(n_estimators=100, max_depth=2, random_state=0) # Max Depth was 3
model.fit(X_train, y_train)

# Make a prediction of "y" values from the X_test dataset
predictions = model.predict(X_test)

# Assemble actual y data (Y_test) with predicted y data (from just above) into two columns in a dataframe:
Results = y_test.to_frame()
Results["Actual Value"] = predictions
Results.head(15)

In [None]:
trading_signals_df.head(5)

In [None]:
Results.head(5)

In [None]:
model.feature_importances_ # See how important each of the factors are

In [None]:
model.score(X_train, y_train) # Check on the model's effectiveness

In [None]:
model.score(X_test, y_test) # Check on the model's effectiveness

### Save Pre-Trained Model Using Joblib

In [None]:
# Save the pre-trained model
from joblib import dump, load
dump(model, 'random_forest_model-AAPL.joblib')

### Save X_test and Results dataframes as CSV files

In [None]:
X_test.to_csv(r'X_test.csv')

In [None]:
Results['Return']=trading_signals_df['daily_return'] # Add the daily_return column to Results dataframe
Results.head(20)

In [None]:
Results.to_csv(r'results.csv')