In [126]:
from polygon import RESTClient
import pandas as pd
from dotenv import load_dotenv
import os
import requests
import json
import datetime
import numpy as np
load_dotenv()

portfolio_data = pd.DataFrame()


## Importing Polyon Stock Data

In [127]:
def load_portfolio_data(ticker : str, name: str, type : str, portfolio_data: pd.DataFrame,):
    client = RESTClient(os.environ['polygon_api_key'])
    aggs = client.get_aggs(
        ticker,
        1,
        "day",
        "2023-01-01",
        str(datetime.date.today()),
        limit=5000
    )
    df = pd.DataFrame(aggs).assign(ticker=ticker, name=name, type=type)
    return pd.concat([portfolio_data, df])

In [128]:
portfolio_data = load_portfolio_data(
    ticker="TSM",
    name="Taiwan Semiconductor Mfg. Co. Ltd.",
    type="stock",
    portfolio_data=portfolio_data
)

In [129]:
portfolio_data = load_portfolio_data(
    ticker="AMZN",
    name="Amazon.com Inc",
    type="stock",
    portfolio_data=portfolio_data
)

In [130]:
portfolio_data = load_portfolio_data(
    ticker="NVDA",
    name="NVIDIA Corp",
    type="stock",
    portfolio_data=portfolio_data
)

In [131]:
portfolio_data = load_portfolio_data(
    ticker="AXP",
    name="American Express Company",
    type="stock",
    portfolio_data=portfolio_data
)

In [132]:
portfolio_data = load_portfolio_data(
    ticker="CELH",
    name="Celsius Holdings, Inc.",
    type="stock",
    portfolio_data=portfolio_data
)

In [133]:
portfolio_data = load_portfolio_data(
    ticker="C:USDJPY",
    name="USD/JPY Foreign Exchange",
    type="forex",
    portfolio_data=portfolio_data
)

In [134]:
portfolio_data = load_portfolio_data(
    ticker="SPY",
    name="SPDR S&P 500 ETF Trust",
    type="etf",
    portfolio_data=portfolio_data
)

In [244]:
client = RESTClient(os.environ['polygon_api_key'])
NDX_data = pd.DataFrame(client.get_aggs(
    'I:NDX',
    1,
    "day",
    "2023-01-01",
    str(datetime.date.today()),
    limit=5000
))

In [135]:
# Check to make sure all stock data is loaded in
portfolio_data["ticker"].unique()

array(['TSM', 'AMZN', 'NVDA', 'AXP', 'CELH', 'C:USDJPY', 'SPY'],
      dtype=object)

## Data Cleaning

In [136]:
# resetting the index
portfolio_data.reset_index(drop=True, inplace=True)

In [137]:
# dropping the otc column
portfolio_data.drop(columns=["otc"], inplace=True)

In [138]:
# converting epoch times to dates
portfolio_data["timestamp"] = pd.to_datetime(portfolio_data["timestamp"], unit="ms").dt.date
portfolio_data = portfolio_data.rename(columns={'timestamp': 'date'})
portfolio_data["date"] = pd.to_datetime(portfolio_data["date"])


In [139]:
# formatting numbers
portfolio_data = portfolio_data.round(2)
portfolio_data["volume"] = portfolio_data["volume"].astype(int)

In [245]:
# cleaning index data
NDX_data.drop(columns=["volume","vwap","transactions","otc"], inplace=True)
NDX_data = NDX_data.rename(columns={'timestamp' : 'date'})
NDX_data['date'] = pd.to_datetime(NDX_data['date'], unit='ms').dt.date
NDX_data['date'] = pd.to_datetime(NDX_data['date'])
NDX_data = NDX_data.round(2)

NDX_data

Unnamed: 0,open,high,low,close,date
0,12085.67,12156.21,12006.04,12066.27,2023-02-22
1,11979.80,12018.32,11900.84,11969.65,2023-02-24
2,12106.79,12159.64,12034.61,12057.79,2023-02-27
3,12041.75,12146.52,12021.32,12042.12,2023-02-28
4,12026.72,12054.48,11906.58,11937.48,2023-03-01
...,...,...,...,...,...
344,20224.13,20406.99,20201.50,20391.97,2024-07-05
345,20393.89,20455.38,20363.37,20439.54,2024-07-08
346,20504.17,20543.90,20395.57,20453.02,2024-07-09
347,20533.27,20690.97,20479.94,20675.38,2024-07-10


In [246]:
NDX_data.describe()

Unnamed: 0,open,high,low,close,date
count,349.0,349.0,349.0,349.0,349
mean,15924.988911,16023.05298,15831.376476,15935.867822,2023-10-31 21:47:57.936962816
min,11752.1,11908.39,11695.41,11830.28,2023-02-22 00:00:00
25%,14642.08,14774.16,14557.83,14694.24,2023-06-29 00:00:00
50%,15561.15,15618.85,15429.36,15508.24,2023-10-30 00:00:00
75%,17815.32,17864.16,17676.35,17783.17,2024-03-06 00:00:00
max,20661.41,20690.97,20479.94,20675.38,2024-07-11 00:00:00
std,2102.702704,2101.548954,2096.389267,2102.681396,


## Outlier Cleaning

In [140]:
# Checking for outliers in transactions and volume
portfolio_data.loc[portfolio_data["ticker"] == "C:USDJPY"].describe()

Unnamed: 0,open,high,low,close,volume,vwap,date,transactions
count,499.0,499.0,499.0,499.0,499.0,499.0,499,499.0
mean,144.369739,144.882766,143.845511,144.437735,152266.703407,144.411463,2023-09-30 19:51:49.418837504,152266.703407
min,127.35,127.88,127.35,127.87,1.0,127.88,2023-01-01 00:00:00,1.0
25%,136.595,137.505,136.285,136.97,109867.0,136.805,2023-05-13 12:00:00,109867.0
50%,145.71,146.22,144.94,145.73,180092.0,145.57,2023-09-27 00:00:00,180092.0
75%,150.095,150.56,149.8,150.135,213513.0,150.105,2024-02-17 00:00:00,213513.0
max,161.58,162.0,161.3,161.59,357136.0,161.59,2024-07-11 00:00:00,357136.0
std,8.498044,8.437833,8.542451,8.499978,86947.436292,8.487131,,86947.436292


In [141]:
# Removing all weekend days
portfolio_data.drop(index=portfolio_data[portfolio_data["date"].dt.day_of_week>4].index, inplace=True)

# Removing all days the stock market is not open
no_holidays = portfolio_data['date'].value_counts() == 7
portfolio_data = portfolio_data[portfolio_data['date'].isin(no_holidays[no_holidays].index)]


## Aggregations

Calculating how many shares of what to buy (buying based off vwap and partial shares are allowed)
- 40% SPY
- 10% USDJPY
- 10% AMZN
- 10% TSM
- 10% CELH
- 10% AXP
- 10% NVDA

In [142]:
SPY_shares = 40000/portfolio_data[portfolio_data["ticker"] == "SPY"]["vwap"].iat[0]
USDJPY_shares = 10000/portfolio_data[portfolio_data["ticker"]=="C:USDJPY"]["vwap"].iat[0]
AMZN_shares = 10000/portfolio_data[portfolio_data["ticker"]=="AMZN"]["vwap"].iat[0]
TSM_shares = 10000/portfolio_data[portfolio_data["ticker"]=="TSM"]["vwap"].iat[0]
CELH_shares = 10000/portfolio_data[portfolio_data["ticker"]=="CELH"]["vwap"].iat[0]
AXP_shares = 10000/portfolio_data[portfolio_data["ticker"]=="AXP"]["vwap"].iat[0]
NVDA_shares = 10000/portfolio_data[portfolio_data["ticker"]=="NVDA"]["vwap"].iat[0]

Creating dataframes for each investment

In [143]:
cols = ['date','open','high','low','close','volume','vwap','transactions']
SPY_etf_data = portfolio_data[portfolio_data["ticker"] == "SPY"][cols].reset_index(drop=True)
USDJPY_forex_data = portfolio_data[portfolio_data["ticker"] == "C:USDJPY"][cols].reset_index(drop=True)
AMZN_stock_data = portfolio_data[portfolio_data["ticker"] == "AMZN"][cols].reset_index(drop=True)
TSM_stock_data = portfolio_data[portfolio_data["ticker"] == "TSM"][cols].reset_index(drop=True)
AXP_stock_data = portfolio_data[portfolio_data["ticker"] == "AXP"][cols].reset_index(drop=True)
CELH_stock_data = portfolio_data[portfolio_data["ticker"] == "CELH"][cols].reset_index(drop=True)
NVDA_stock_data = portfolio_data[portfolio_data["ticker"] == "NVDA"][cols].reset_index(drop=True)

In [144]:

portfolio = pd.concat([
    portfolio_data[portfolio_data["ticker"] == "SPY"][['ticker','name','type']].head(1),
    portfolio_data[portfolio_data["ticker"] == "C:USDJPY"][['ticker','name','type']].head(1),
    portfolio_data[portfolio_data["ticker"] == "AMZN"][['ticker','name','type']].head(1),
    portfolio_data[portfolio_data["ticker"] == "TSM"][['ticker','name','type']].head(1),
    portfolio_data[portfolio_data["ticker"] == "AXP"][['ticker','name','type']].head(1),
    portfolio_data[portfolio_data["ticker"] == "CELH"][['ticker','name','type']].head(1),
    portfolio_data[portfolio_data["ticker"] == "NVDA"][['ticker','name','type']].head(1)
]).reset_index(drop=True)

portfolio

Unnamed: 0,ticker,name,type
0,SPY,SPDR S&P 500 ETF Trust,etf
1,C:USDJPY,USD/JPY Foreign Exchange,forex
2,AMZN,Amazon.com Inc,stock
3,TSM,Taiwan Semiconductor Mfg. Co. Ltd.,stock
4,AXP,American Express Company,stock
5,CELH,"Celsius Holdings, Inc.",stock
6,NVDA,NVIDIA Corp,stock


Performing Stock/ETF/Forex aggregations

In [249]:
rf = 0.0546

SPY_etf_data['value'] = (SPY_etf_data['vwap'] * SPY_shares).round(2)
SPY_etf_data['cumulative return'] = (SPY_etf_data['value'] - SPY_etf_data['value'].iat[0])/SPY_etf_data['value'].iat[0]
SPY_etf_data['volatility'] = SPY_etf_data['vwap'].rolling(len(SPY_etf_data),min_periods=2).std()
SPY_etf_data['sharpe'] = (SPY_etf_data['cumulative return'] - rf)/(SPY_etf_data['cumulative return'].rolling(len(SPY_etf_data),min_periods=2).std())

AMZN_stock_data['value'] = (AMZN_stock_data['vwap'] * AMZN_shares).round(2)
AMZN_stock_data['cumulative return'] = (AMZN_stock_data['value'] - AMZN_stock_data['value'].iat[0])/AMZN_stock_data['value'].iat[0]
AMZN_stock_data['volatility'] = AMZN_stock_data['vwap'].rolling(len(AMZN_stock_data),min_periods=2).std()
AMZN_stock_data['sharpe'] = (AMZN_stock_data['cumulative return'] - rf)/(AMZN_stock_data['cumulative return'].rolling(len(AMZN_stock_data),min_periods=2).std())

TSM_stock_data['value'] = (TSM_stock_data['vwap'] * TSM_shares).round(2)
TSM_stock_data['cumulative return'] = (TSM_stock_data['value'] - TSM_stock_data['value'].iat[0])/TSM_stock_data['value'].iat[0]
TSM_stock_data['volatility'] = TSM_stock_data['vwap'].rolling(len(TSM_stock_data),min_periods=2).std()
TSM_stock_data['sharpe'] = (TSM_stock_data['cumulative return'] - rf)/(TSM_stock_data['cumulative return'].rolling(len(TSM_stock_data),min_periods=2).std())

AXP_stock_data['value'] = (AXP_stock_data['vwap'] * AXP_shares).round(2)
AXP_stock_data['cumulative return'] = (AXP_stock_data['value'] - AXP_stock_data['value'].iat[0])/AXP_stock_data['value'].iat[0]
AXP_stock_data['volatility'] = AXP_stock_data['vwap'].rolling(len(AXP_stock_data),min_periods=2).std()
AXP_stock_data['sharpe'] = (AXP_stock_data['cumulative return'] - rf)/(AXP_stock_data['cumulative return'].rolling(len(AXP_stock_data),min_periods=2).std())

CELH_stock_data['value'] = (CELH_stock_data['vwap'] * CELH_shares).round(2)
CELH_stock_data['cumulative return'] = (CELH_stock_data['value'] - CELH_stock_data['value'].iat[0])/CELH_stock_data['value'].iat[0]
CELH_stock_data['volatility'] = CELH_stock_data['vwap'].rolling(len(CELH_stock_data),min_periods=2).std()
CELH_stock_data['sharpe'] = (CELH_stock_data['cumulative return'] - rf)/(CELH_stock_data['cumulative return'].rolling(len(CELH_stock_data),min_periods=2).std())

NVDA_stock_data['value'] = (NVDA_stock_data['vwap'] * NVDA_shares).round(2)
NVDA_stock_data['cumulative return'] = (NVDA_stock_data['value'] - NVDA_stock_data['value'].iat[0])/NVDA_stock_data['value'].iat[0]
NVDA_stock_data['volatility'] = NVDA_stock_data['vwap'].rolling(len(NVDA_stock_data),min_periods=2).std()
NVDA_stock_data['sharpe'] = (NVDA_stock_data['cumulative return'] - rf)/(NVDA_stock_data['cumulative return'].rolling(len(NVDA_stock_data),min_periods=2).std())

USDJPY_forex_data['value'] = (USDJPY_forex_data['vwap'] * USDJPY_shares).round(2)
USDJPY_forex_data['cumulative return'] = (USDJPY_forex_data['value'] - USDJPY_forex_data['value'].iat[0])/USDJPY_forex_data['value'].iat[0]
USDJPY_forex_data['percent change'] = USDJPY_forex_data['value'].pct_change()
USDJPY_forex_data.loc[0, 'percent change'] = 0

SPY_etf_data

Unnamed: 0,date,open,high,low,close,volume,vwap,transactions,value,cumulative return,volatility,sharpe
0,2023-01-03,384.37,386.43,377.83,380.82,74850731,380.96,590240,40000.00,0.000000,,
1,2023-01-04,383.18,385.88,380.00,383.76,85934098,383.15,632808,40229.95,0.005749,1.548564,-12.017586
2,2023-01-05,381.72,381.84,378.76,379.38,76275354,380.26,530896,39926.50,-0.001837,1.507658,-14.260526
3,2023-01-06,382.61,389.25,379.41,388.08,104052662,385.25,687390,40450.44,0.011261,2.261128,-7.301836
4,2023-01-09,390.37,393.70,387.67,387.86,73978071,390.36,549428,40986.98,0.024675,4.060903,-2.807361
...,...,...,...,...,...,...,...,...,...,...,...,...
376,2024-07-03,548.69,551.83,548.65,551.46,32745296,550.30,253834,57780.34,0.444508,45.661683,3.253046
377,2024-07-05,551.77,555.05,551.12,554.64,40482332,553.57,339615,58123.69,0.453092,45.878605,3.308941
378,2024-07-08,555.44,556.25,554.19,555.28,35042786,555.17,315477,58291.68,0.457292,46.100938,3.327688
379,2024-07-09,556.26,557.18,555.52,555.82,27267633,556.10,298706,58389.33,0.459733,46.324822,3.331682


Performing portfolio aggregations

In [227]:
portfolio_aggs = pd.DataFrame(SPY_etf_data['date'])
portfolio_aggs['value'] = (
    SPY_etf_data['value'] +
    AMZN_stock_data['value'] +
    TSM_stock_data['value'] +
    AXP_stock_data['value'] +
    CELH_stock_data['value'] +
    NVDA_stock_data['value'] +
    USDJPY_forex_data['value']
)
portfolio_aggs['cumulative return'] = (portfolio_aggs['value'] - portfolio_aggs['value'].iat[0])/portfolio_aggs['value'].iat[0]

portfolio_aggs['annualized return'] = (
    (1 + portfolio_aggs['cumulative return']).pow(365/(portfolio_aggs['date'] - portfolio_aggs['date'].iat[0]).dt.days) - 1
)

less_than_1_year = (portfolio_aggs['date'] - portfolio_aggs['date'].iat[0]).dt.days < 365
# Global Investment Performance Standards dictate that returns of portfolios or composites for periods of less than one year may not be annualized
portfolio_aggs.loc[less_than_1_year, 'annualized return'] = 0

portfolio_aggs['volatility'] = portfolio_aggs['value'].rolling(len(portfolio_aggs),min_periods=2).std()

portfolio_aggs

Unnamed: 0,date,value,cumulative return,annualized return,volatility
0,2023-01-03,100000.00,0.000000,0.000000,
1,2023-01-04,100821.29,0.008213,0.000000,580.739728
2,2023-01-05,100104.03,0.001040,0.000000,447.176589
3,2023-01-06,101043.70,0.010437,0.000000,518.134235
4,2023-01-09,103385.28,0.033853,0.000000,1369.403536
...,...,...,...,...,...
376,2024-07-03,237825.46,1.378255,0.782659,38122.953031
377,2024-07-05,240030.02,1.400300,0.789854,38317.841564
378,2024-07-08,241367.70,1.413677,0.790769,38517.195700
379,2024-07-09,242538.41,1.425384,0.794606,38719.994541
