### Technicals to Add
#### RSI, MACD, CCI, Bollinger Bands, SMA 9, SMA 20, SMA 50, SMA 100, SMA 200

In [2]:
import pandas as pd
from pandas_datareader import data as web
from pandas_datareader import data as pdr
import matplotlib.pyplot as plt
import datetime as dt
import numpy as np
from pathlib import Path
import hvplot.pandas


%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

In [None]:
file_path = Path("Data/spy_data_pedro.csv")
spy_df = pd.read_csv(file_path)

In [None]:
spy_px = spy_df[["Date", "SPY Close", "High", "Low"]]
spy_px.head()

### Bolinger

In [None]:
bollinger_window = 20
spy_px['bollinger_mid_band'] = spy_px["SPY Close"].rolling(window=bollinger_window).mean()
spy_px.head()

In [4]:
bollinger_std = spy_px["SPY Close"].rolling(window=20).std()
spy_px["bollinger_upper_band"] = spy_px["bollinger_mid_band"] + bollinger_std
spy_px["bollinger_lower_band"] = spy_px["bollinger_mid_band"] - bollinger_std

NameError: name 'spy_px' is not defined

In [None]:
spy_px[["SPY Close",'bollinger_mid_band','bollinger_upper_band','bollinger_lower_band']].plot(figsize=(20,10))

In [None]:

spy_px['bollinger_long'] = np.where(spy_px['SPY Close'] < spy_px['bollinger_lower_band'], 1.0, 0.0)
spy_px['bollinger_short'] = np.where(spy_px['SPY Close'] > spy_px['bollinger_upper_band'], -1.0, 0.0)
spy_px['bollinger_signal'] = spy_px['bollinger_long'] + spy_px['bollinger_short']

In [None]:
spy_px.iloc[20:25, :]

### MACD

In [None]:

#macd_df = pd.DataFrame(spy_px[['Date', 'SPY Close']])

# Set short and long SMAs
short_sma = 12
long_sma = 26

# Construct a 'Fast' and 'Slow' Exponentiall Moving Average from short and long windows, respectively
spy_px['macd 12d sma'] = spy_px['SPY Close'].ewm(halflife=short_sma).mean()
spy_px['macd 26d sma'] = spy_px['SPY Close'].ewm(halflife=long_sma).mean()

# Construct a crossover trading signal
spy_px['MACD Long'] = np.where(spy_px['macd 12d sma'] > spy_px['macd 26d sma'], 1.0, 0.0)
spy_px['MACD Short'] = np.where(spy_px['macd 12d sma'] < spy_px['macd 26d sma'], -1.0, 0.0)

# Construct MACD Signal
spy_px['MACD Signal'] = spy_px['MACD Long'] + spy_px['MACD Short']

In [None]:
#macd_df.tail(20)
spy_px[["SPY Close",'macd 12d sma','macd 26d sma']].plot(figsize=(20,10))

### RSI

In [None]:
#rsi_df = pd.DataFrame(spy_px[['Date', 'SPY Close']])

def computeRSI(data, time_window):
    diff = data.diff(1).dropna()        # diff in one field(one day)

    #this preservers dimensions off diff values
    up_chg = 0 * diff
    down_chg = 0 * diff
    
    # up change is equal to the positive difference, otherwise equal to zero
    up_chg[diff > 0] = diff[diff > 0]
    
    # down change is equal to negative deifference, otherwise equal to zero
    down_chg[diff < 0] = diff[diff < 0]
    
    # check pandas documentation for ewm
    # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.ewm.html
    # values are related to exponential decay
    # we set com=time_window-1 so we get decay alpha=1/time_window
    up_chg_avg   = up_chg.ewm(com=time_window-1 , min_periods=time_window).mean()
    down_chg_avg = down_chg.ewm(com=time_window-1 , min_periods=time_window).mean()
    
    rs = abs(up_chg_avg/down_chg_avg)
    rsi = 100 - 100/(1+rs)
    return rsi

spy_px['RSI'] = computeRSI(spy_px['SPY Close'], 14)

In [None]:

# Construct a crossover trading signal
spy_px['RSI Long'] = np.where(spy_px['RSI'] < 30, 1.0, 0.0)
spy_px['RSI Short'] = np.where(spy_px['RSI'] > 70, -1.0, 0.0)

# Construct MACD Signal
spy_px['RSI Signal'] = spy_px['RSI Long'] + spy_px['RSI Short']

In [None]:
spy_px.iloc[14:30, :]

In [None]:
spy_px['RSI'].plot(figsize=(20,10))

### SMA

In [None]:
#sma_close = spy_df[["Date", "SPY Close"]].set_index("Date")

spy_sma_9 = spy_px['SPY Close'].rolling(window=9).mean()
spy_sma_20 = spy_px['SPY Close'].rolling(window=20).mean()
spy_sma_50 = spy_px['SPY Close'].rolling(window=50).mean()
spy_sma_100 = spy_px['SPY Close'].rolling(window=100).mean()
spy_sma_200 = spy_px['SPY Close'].rolling(window=200).mean()

#sma_df = pd.concat([[spy_sma_9, spy_sma_20, spy_sma_50, spy_sma_100, spy_sma_200]])#, index_col="Date")
#sma_df.head(10)
#print(spy_sma_9[:20])

spy_px['SMA 9'] = pd.DataFrame(spy_sma_9)
spy_px['SMA 20'] = pd.DataFrame(spy_sma_20)
spy_px['SMA 50'] = pd.DataFrame(spy_sma_50)
spy_px['SMA 100'] = pd.DataFrame(spy_sma_100)
spy_px['SMA 200'] = pd.DataFrame(spy_sma_200)

#spy_close.tail()
spy_px[["SPY Close",'SMA 9','SMA 20', 'SMA 50', 'SMA 100', 'SMA 200']].plot(figsize=(20,10))

In [None]:
# Construct a crossover trading signal
spy_px['SMA Long'] = np.where(spy_px['SPY Close'] < spy_px['SMA 200'], 1.0, 0.0)
spy_px['SMA Short'] = np.where(spy_px['SPY Close'] > spy_px['SMA 200'] + 20, -1.0, 0.0)

# Construct MACD Signal
spy_px['SMA Signal'] = spy_px['SMA Long'] + spy_px['SMA Short']

In [None]:
spy_px.iloc[240:250, :]

### CCI

In [None]:
# Commodity Channel Index Python Code

# Retrieve the SPY data from Yahoo finance:
data = spy_px
#data = pdr.get_data_yahoo("SPY", start="2014-01-01", end="2020-04-06") 
#data = pd.DataFrame(data)


# Commodity Channel Index 
def computeCCI(data, time_window): 
    TP = (data['High'] + data['Low'] + data['SPY Close']) / 3 
    CCI = pd.Series((TP - TP.rolling(time_window).mean()) / (0.015 * TP.rolling(time_window).std()), name = 'CCI')                   
    data = data.join(CCI) 
    data_df = pd.DataFrame(data['CCI'])
    return data_df


# Compute the Commodity Channel Index(CCI) for SPY based on the 14-day Moving average
#n = 14
#cci_df = CCI(data, n)


#cci_df[['CCI']].plot(figsize=(20,10))
spy_px['CCI'] = computeCCI(spy_px, 14)

In [None]:
# Construct a CCI trading signal
spy_px['CCI Long'] = np.where(spy_px['CCI'] < -175, 1.0, 0.0)
spy_px['CCI Short'] = np.where(spy_px['CCI'] > 150, -1.0, 0.0)

# Construct CCI Signal
spy_px['CCI Signal'] = spy_px['CCI Long'] + spy_px['CCI Short']

spy_px.tail(15)

### Trading Signals

In [None]:
spy_px.columns

In [None]:
spy_px[['RSI Signal', 'bollinger_signal', 'MACD Signal', 'SMA Signal', 'CCI Signal']]

In [None]:
plt = spy_px[['RSI Signal', 'bollinger_signal', 'MACD Signal', 'SMA Signal', 'CCI Signal']].sum(axis=1).plot(figsize=(15,10))
plt.hlines(3, xmin=0, xmax=1600)
plt.hlines(-3, xmin=0, xmax=1600)

In [None]:
plt = spy_px[['RSI Signal', 'bollinger_signal', 'MACD Signal', 'SMA Signal', 'CCI Signal']].sum(axis=1).plot(figsize=(15,10))
plt.hlines(1.5, xmin=0, xmax=1600)
plt.hlines(-1.5, xmin=0, xmax=1600)

In [None]:
spy_px['BUY_Signal'] = np.where(spy_px[['RSI Signal', 'bollinger_signal', 'MACD Signal', 'SMA Signal', 'CCI Signal']].sum(axis=1) >= 3, 1.0, 0.0)
spy_px['SELL_Signal'] = np.where(spy_px[['RSI Signal', 'bollinger_signal', 'MACD Signal', 'SMA Signal', 'CCI Signal']].sum(axis=1) <= -3, -1.0, 0.0)

In [None]:
print(spy_px.BUY_Signal.sum(), spy_px.SELL_Signal.sum())

In [None]:
adam_spy_px = spy_px.copy()
adam_spy_px.set_index('Date', inplace=True)
adam_spy_px.index = pd.to_datetime(adam_spy_px.index, infer_datetime_format=True)

In [None]:
security_close = adam_spy_px['SPY Close'].hvplot(
    line_color='lightgray',
    ylabel='Price in $',
    width=1000,
    height=400)



long = (adam_spy_px[adam_spy_px['BUY_Signal'] == 1.0]['SPY Close'] - 5).hvplot.scatter(
    marker="^",
    color='green',
    legend=False,
    ylabel='Price in $',
    width=1000,
    height=400)

short = (adam_spy_px[adam_spy_px['SELL_Signal'] == -1.0]['SPY Close'] + 5).hvplot.scatter(
    marker="v",
    color='red',
    legend=False,
    ylabel='Price in $',
    width=1000,
    height=400)


entry_exit_plot = long * security_close * short
#entry_exit_plot.opts()
entry_exit_plot

In [None]:
cum_indicators = adam_spy_px.BUY_Signal.cumsum() + adam_spy_px.SELL_Signal.cumsum()

In [None]:
adam_spy_px.index

In [None]:

trades = [0]
for i, position in enumerate(cum_indicators):
    if i == 0:
        continue
    
    elif adam_spy_px.BUY_Signal[i] > 0:
        trades.append(1)
        
    elif adam_spy_px.SELL_Signal[i] < 0:
        trades.append(-1)
        
    else:
        trades.append(trades[i-1])
    
indicators_df = pd.DataFrame(trades, index = adam_spy_px.index, columns=['trade'])

In [None]:
indicators_df['trade'].value_counts()

In [None]:
indicators_df = indicators_df.diff().clip(-1,1)

In [None]:
indicators_df.iloc[0,:] = 0

In [None]:
indicators_df['trade'].value_counts()

In [None]:
indicators_df

In [None]:
security_close = adam_spy_px['SPY Close'].hvplot(
    line_color='lightgray',
    ylabel='Price in $',
    width=1000,
    height=400)



long = (adam_spy_px[indicators_df.trade == 1]['SPY Close'] - 5).hvplot.scatter(
    marker="^",
    color='green',
    legend=False,
    ylabel='Price in $',
    width=1000,
    height=400)

short = (adam_spy_px[indicators_df.trade == -1]['SPY Close'] + 5).hvplot.scatter(
    marker="v",
    color='red',
    legend=False,
    ylabel='Price in $',
    width=1000,
    height=400)


entry_exit_plot = long * security_close * short
#entry_exit_plot.opts()
entry_exit_plot

In [None]:
position.index = adam_spy_px.index
position['2015-10-23']

In [None]:
position.plot()

In [None]:
adam_spy_px.SELL_Signal.cumsum()['2015-10-23']

In [None]:
adam_spy_px.loc['2018-10':'2018-11',['SPY Close','BUY Signal','RSI Signal', 'bollinger_signal', 'MACD Signal', 'SMA Signal', 'CCI Signal']]

## Reandom Forest Trading 

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification

In [None]:
adam_spy_px.tail()

In [None]:
signals_df = adam_spy_px[['RSI Signal', 'bollinger_signal', 'MACD Signal', 'SMA Signal', 'CCI Signal']]
signals_df.tail()

### Shift the DataFrame Index by 1

In [None]:
signals_shifted_df = signals_df.shift(1)
signals_shifted_df.tail()

## Drop NAs and Replace Infs (Positive/Negative Infinity)

In [None]:
# Drop NAs and replace positive/negative infinity values
signals_shifted_df.dropna(inplace=True)
#signals_shifted_df.dropna(subset=['daily_return'], inplace=True)
signals_inf_df = signals_shifted_df.replace([np.inf, -np.inf], np.nan)
signals_inf_df.head()

### Construct the Dependent Variable

In [None]:
signals_inf_df['Trading_Signal'] = indicators_df['trade']
signals_inf_df.tail()

In [None]:
#signals_inf_df['Trading_Signal'] = indicators_df['trade']
signals_inf_df['Trading_Signal'].value_counts()

### Traning and Testing

In [None]:
# Construct training start and end dates
training_start = signals_inf_df.index.min().strftime(format= '%Y-%m-%d')
training_end = '2018-05-17'

# Construct testing start and end dates
testing_start =  '2018-05-18'
testing_end = signals_inf_df.index.max().strftime(format= '%Y-%m-%d')

# Print training and testing start/end dates
print(f"Training Start: {training_start}")
print(f"Training End: {training_end}")
print(f"Testing Start: {testing_start}")
print(f"Testing End: {testing_end}")

### Separate X and Y Training Datasets

In [None]:

# Construct the x train and y train datasets
x_train = signals_inf_df[['RSI Signal', 'bollinger_signal', 'MACD Signal', 'SMA Signal', 'CCI Signal']][training_start:training_end]
y_train = signals_inf_df['Trading_Signal'][training_start:training_end]

x_train.tail()

In [None]:
y_train.tail()

### Separate X and Y Testing Datasets

In [None]:
# Construct the x train and y train datasets
x_test = signals_inf_df[['RSI Signal', 'bollinger_signal', 'MACD Signal', 'SMA Signal', 'CCI Signal']][testing_start:testing_end]
y_test = signals_inf_df['Trading_Signal'][testing_start:testing_end]

x_test.tail()

In [None]:
y_test.tail()

### Import SKLearn Library and Classes

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification

### Train Random Forest Model

In [None]:
# Fit a SKLearn linear regression using just the training set (X_train, Y_train):
model = RandomForestClassifier(n_estimators=100, max_depth=3, random_state=0)
model.fit(x_train, y_train)

# Make a prediction of "y" values from the x test dataset
predictions = model.predict(x_test)

# Assemble actual y data (Y_test) with predicted y data (from just above) into two columns in a dataframe:
Results = y_test.to_frame()
Results["Predicted_Signals"] = predictions
Results.tail()

### Save Pre-Trained Model Using Joblib

In [None]:
# Save the pre-trained model
from joblib import dump, load
dump(model, 'random_forest_model.joblib')

In [None]:
x_test.head()

In [None]:
y_test.head()

In [None]:
trading_df = adam_spy_px[["SPY Close"]].loc["2018-05-18":"2020-04-03"]
trading_df.head()

In [None]:
trading_df["Return"] = trading_df["SPY Close"].pct_change()
trading_df.fillna(0, inplace=True)
trading_df.head()

In [None]:
trading_df.columns

### Compare Predicted Results to Y-Test (Actual Results)

In [None]:
#recreate results from the exercise
trading_df["Trading_Signal"] = Results['Trading_Signal']
trading_df['Predicted_Signal'] = Results['Predicted_Signals']
trading_df.head()

### Plot Cumulative Return of Random Forest Model (In Terms of Capital)

In [None]:

# Set initial capital allocation
initial_capital = 100000

# Plot cumulative return of model in terms of capital
cumulative_return_capital = initial_capital * (1 + (trading_df['Return'] * trading_df['Predicted_Signal'])).cumprod()
cumulative_return_capital.plot()

In [None]:
Results.head()

In [None]:
signals_inf_df.head()

In [None]:
signals_inf_df.tail()

In [None]:

signals_inf_df["RSI Signal"].count()

In [None]:

signals_inf_df.iloc[1100:1102, :]

In [None]:
1574 * .7