In [39]:
# Import libraries and dependencies
import numpy as np
import pandas as pd
import os
import requests
from pathlib import Path
%matplotlib inline
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")
from MCForecastTools import MCSimulation

In [40]:
# Import API data
from dotenv import load_dotenv
import alpaca_trade_api as tradeapi

In [41]:
# Load .env enviroment variables
load_dotenv("ProjectAPI.env")

True

In [4]:
# Set the tickers
tickers = ["SPY", "IWM", "EZU", "EWU", "EWJ", "EWZ", "EWA", "EWC", "MCHI", "EWW"]

# Create the shares DataFrame
df_etf = pd.DataFrame(index=tickers)

In [5]:
# Set Alpaca API key and secret
alpaca_api_key = os.getenv("Alpaca_API_Key")
alpaca_secret_key = os.getenv("Alpaca_Secret_Key")

# Verify that Alpaca key and secret were correctly loaded
print(f"Alpaca Key type: {type(alpaca_api_key)}")
print(f"Alpaca Secret Key type: {type(alpaca_secret_key)}")

Alpaca Key type: <class 'str'>
Alpaca Secret Key type: <class 'str'>


In [6]:
# Create the Alpaca API object
alpaca = tradeapi.REST(
    alpaca_api_key,
    alpaca_secret_key,
    api_version="v2")

In [7]:
# Format current date as ISO format
# Set start and end datetimes of 18 months
start_date = pd.Timestamp("2022-04-01", tz="America/New_York").isoformat()
end_date = pd.Timestamp("2023-10-01", tz="America/New_York").isoformat()

In [8]:
# Set timeframe to 18 months for the Alpaca API
timeframe = "1Day"

In [10]:
#Get 18 months of pricing data for ETF portfolio
df_portfolio = alpaca.get_bars(tickers, timeframe, start=start_date, end=end_date).df

df_portfolio

Unnamed: 0_level_0,close,high,low,trade_count,open,volume,vwap,symbol
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2022-04-01 04:00:00+00:00,26.69,26.7000,26.475,7069,26.61,4285764,26.579325,EWA
2022-04-04 04:00:00+00:00,26.88,26.9050,26.630,7695,26.69,3288300,26.806707,EWA
2022-04-05 04:00:00+00:00,26.74,27.1600,26.650,11594,27.00,6864193,26.829930,EWA
2022-04-06 04:00:00+00:00,26.56,26.7450,26.410,10866,26.71,3946883,26.574937,EWA
2022-04-07 04:00:00+00:00,26.53,26.6100,26.290,9064,26.44,4980263,26.453717,EWA
...,...,...,...,...,...,...,...,...
2023-09-25 04:00:00+00:00,432.23,432.2700,428.720,488035,429.17,70874824,430.900652,SPY
2023-09-26 04:00:00+00:00,425.88,429.8200,425.020,633690,429.09,96172890,427.150722,SPY
2023-09-27 04:00:00+00:00,426.05,427.6700,422.290,693420,427.09,104707452,425.420576,SPY
2023-09-28 04:00:00+00:00,428.52,430.2499,424.870,616077,425.48,92258310,427.858964,SPY


In [11]:
#Drop unnecessary columns for our portfolio dataframe
df_portfolio.drop(columns=['high','low','trade_count','vwap'], axis=1, inplace=True)

#Rearrange columns
df_portfolio = df_portfolio[['symbol', 'close', 'open', 'volume']]

df_portfolio

Unnamed: 0_level_0,symbol,close,open,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-04-01 04:00:00+00:00,EWA,26.69,26.61,4285764
2022-04-04 04:00:00+00:00,EWA,26.88,26.69,3288300
2022-04-05 04:00:00+00:00,EWA,26.74,27.00,6864193
2022-04-06 04:00:00+00:00,EWA,26.56,26.71,3946883
2022-04-07 04:00:00+00:00,EWA,26.53,26.44,4980263
...,...,...,...,...
2023-09-25 04:00:00+00:00,SPY,432.23,429.17,70874824
2023-09-26 04:00:00+00:00,SPY,425.88,429.09,96172890
2023-09-27 04:00:00+00:00,SPY,426.05,427.09,104707452
2023-09-28 04:00:00+00:00,SPY,428.52,425.48,92258310


In [12]:
# Reorganize the DataFrame and separate ticker data
# Order:  SPY, EWA, EWC, EWJ, EWU, EWZ, EWW, EZU, IWM, MCHI

# Separate ticker data
SPY = df_portfolio[df_portfolio["symbol"]=="SPY"].drop("symbol", axis=1)
EWA = df_portfolio[df_portfolio["symbol"]=="EWA"].drop("symbol", axis=1)
EWC = df_portfolio[df_portfolio["symbol"]=="EWC"].drop("symbol", axis=1)
EWJ = df_portfolio[df_portfolio["symbol"]=="EWJ"].drop("symbol", axis=1)
EWU = df_portfolio[df_portfolio["symbol"]=="EWU"].drop("symbol", axis=1)
EWZ = df_portfolio[df_portfolio["symbol"]=="EWZ"].drop("symbol", axis=1)
EWW = df_portfolio[df_portfolio["symbol"]=="EWW"].drop("symbol", axis=1)
EZU = df_portfolio[df_portfolio["symbol"]=="EZU"].drop("symbol", axis=1)
IWM = df_portfolio[df_portfolio["symbol"]=="IWM"].drop("symbol", axis=1)
MCHI = df_portfolio[df_portfolio["symbol"]=="MCHI"].drop("symbol", axis=1)

# Concatenate the ticker DataFrames
df_portreo = pd.concat([SPY, EWA, EWC, EWJ, EWU, EWZ, EWW, EZU, IWM, MCHI], axis=1, keys=['SPY', 'EWA', 'EWC', 'EWJ', 'EWU', 'EWZ', 'EWW', 'EZU', 'IWM', 'MCHI'])

# Display sample data
df_portreo

Unnamed: 0_level_0,SPY,SPY,SPY,EWA,EWA,EWA,EWC,EWC,EWC,EWJ,...,EWW,EZU,EZU,EZU,IWM,IWM,IWM,MCHI,MCHI,MCHI
Unnamed: 0_level_1,close,open,volume,close,open,volume,close,open,volume,close,...,volume,close,open,volume,close,open,volume,close,open,volume
timestamp,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2022-04-01 04:00:00+00:00,452.92,453.31,90526299,26.69,26.61,4285764,40.22,40.40,4957883,61.93,...,1798341,43.66,43.620,5484519,207.57,206.18,26893201,54.75,55.29,7382913
2022-04-04 04:00:00+00:00,456.80,453.13,61390954,26.88,26.69,3288300,40.62,40.46,6214440,62.43,...,3232716,43.67,43.410,3563681,207.91,207.87,21021131,56.49,56.02,7068617
2022-04-05 04:00:00+00:00,451.03,455.22,75760010,26.74,27.00,6864193,40.33,40.74,8828963,60.80,...,2301442,42.88,43.200,10249067,202.99,207.96,27050940,54.92,56.16,8027718
2022-04-06 04:00:00+00:00,446.52,446.89,107084639,26.56,26.71,3946883,39.90,40.26,5466978,59.89,...,3050678,42.04,42.030,9638908,200.11,201.27,34969069,54.02,54.42,7041040
2022-04-07 04:00:00+00:00,448.77,445.59,78235837,26.53,26.44,4980263,39.79,39.82,4362696,59.68,...,1793140,42.16,42.270,7227281,199.46,200.11,29692911,53.07,53.67,7808268
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-09-25 04:00:00+00:00,432.23,429.17,70874824,21.62,21.53,1974029,34.10,33.85,1229511,61.72,...,2105042,42.42,42.275,23485612,177.46,175.69,22416152,43.41,43.14,5561411
2023-09-26 04:00:00+00:00,425.88,429.09,96172890,21.34,21.49,2408206,33.56,33.79,1609404,60.77,...,2054052,41.80,41.990,4721707,174.36,175.50,30476036,42.91,42.94,3113411
2023-09-27 04:00:00+00:00,426.05,427.09,104707452,21.25,21.42,2527763,33.39,33.68,1311190,61.03,...,2409395,41.66,41.900,5250708,175.99,175.45,37295094,43.05,43.06,3678446
2023-09-28 04:00:00+00:00,428.52,425.48,92258310,21.63,21.39,1440039,33.73,33.39,1741141,61.15,...,2262146,42.10,41.740,8352776,177.58,176.09,31372443,42.96,42.70,3651899


In [13]:
#Export clean dataframe to .csv file for backup
#df_portfolio.to_csv('UpDownFlat.csv')

df_portreo.to_csv(r'Desktop\UpDownFlatData.csv')

In [45]:
#Create the Monte Carlo Simulation for the next day
MC_df = df_portreo[["SPY"]]

monte_predict = MCSimulation(
    portfolio_data = MC_df,
    num_simulation = 1000,
    num_trading_days = 1
)

#Check it worked
monte_predict.portfolio_data.head()

Unnamed: 0_level_0,SPY,SPY,SPY,SPY
Unnamed: 0_level_1,close,open,volume,daily_return
timestamp,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2022-04-01 04:00:00+00:00,452.92,453.31,90526299,
2022-04-04 04:00:00+00:00,456.8,453.13,61390954,0.008567
2022-04-05 04:00:00+00:00,451.03,455.22,75760010,-0.012631
2022-04-06 04:00:00+00:00,446.52,446.89,107084639,-0.009999
2022-04-07 04:00:00+00:00,448.77,445.59,78235837,0.005039


In [46]:
# Run a Monte Carlo simulation to forecast tomorrow's movement
from MCForecastTools import MCSimulation

monte_predict.calc_cumulative_return()

Running Monte Carlo simulation number 0.
Running Monte Carlo simulation number 10.
Running Monte Carlo simulation number 20.
Running Monte Carlo simulation number 30.
Running Monte Carlo simulation number 40.
Running Monte Carlo simulation number 50.
Running Monte Carlo simulation number 60.
Running Monte Carlo simulation number 70.
Running Monte Carlo simulation number 80.
Running Monte Carlo simulation number 90.
Running Monte Carlo simulation number 100.
Running Monte Carlo simulation number 110.
Running Monte Carlo simulation number 120.
Running Monte Carlo simulation number 130.
Running Monte Carlo simulation number 140.
Running Monte Carlo simulation number 150.
Running Monte Carlo simulation number 160.
Running Monte Carlo simulation number 170.
Running Monte Carlo simulation number 180.
Running Monte Carlo simulation number 190.
Running Monte Carlo simulation number 200.
Running Monte Carlo simulation number 210.
Running Monte Carlo simulation number 220.
Running Monte Carlo si

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,990,991,992,993,994,995,996,997,998,999
0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,0.993322,1.010689,0.996585,1.008786,0.985032,0.993917,1.008216,1.005497,0.995297,0.991919,...,1.009048,1.01346,0.991915,0.99859,1.018973,0.995116,0.983042,1.021073,1.010622,0.996856


In [47]:
#Show what the prediction average and std are to give an idea of tomorrow's likelyhood of up/down
MC_tbl = monte_predict.summarize_cumulative_return()
MC_tbl

count           1000.000000
mean               0.999360
std                0.012192
min                0.953158
25%                0.991153
50%                0.999179
75%                1.007804
max                1.050256
95% CI Lower       0.975739
95% CI Upper       1.022123
Name: 1, dtype: float64

In [None]:
#based on the table output, display the mean and 1.5 std up and down to show the target outcomes.


In [None]:
#define variables for regression

X=df_portfolio[["IWM", "EZU", "EWU", "EWJ", "EWZ", "EWA", "EWC", "MCHI", "EWW"]]

y=df_portfolio['SPY']

In [None]:
#set up test and train section of the df
X_train,X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1, random_state = 0)

In [None]:
#run regression and predict
reg = LinearRegression()
 
# Fit the model to the data
reg_model = LinearRegression().fit(X_train, y_train)

# Print the coefficients of the model to determine influences on X
coeffs = reg_model.coef_
ticks = ["IWM", "EZU", "EWU", "EWJ", "EWZ", "EWA", "EWC", "MCHI", "EWW"]
count = 0
for t in coeffs:
    print(f"{ticks[count]} beta is {round(coeffs[count],4)}")
    count +=1

In [None]:
#Predicting the Test and Train set result 

y_pred= reg_model.predict(X_test)  
x_pred= reg_model.predict(X_train)

#finding the difference between predicted and actual values- for reference of predictive accuracy of model
reg_model_diff = pd.DataFrame({'Actual value': y_test, 'Predicted value': y_pred})
reg_model_diff.plot(figsize=(20,10))

In [None]:
#display the regression output into the GUI for analysis
