<a href="https://colab.research.google.com/github/JiahuaZhang/machine-learning-trading/blob/main/Untitled0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from datetime import datetime
from dateutil import relativedelta

%matplotlib inline

In [14]:
matplotlib.rcParams['figure.figsize'] = (15, 13)

In [25]:
def get_time_preset(time_preset):
  """Get time range for preset
  time_preset -- number + date range format
  e.g. 1d for 1 day, 2mo for 2 months, 3yr for 3years
  """
  now = datetime.today()

  if 'd' in time_preset:
    days = int(time_preset.replace('d', ''))
    start = now + relativedelta.relativedelta(days=-days)
    return start.timestamp(), now.timestamp()
  elif 'mo' in time_preset:
    months = int(time_preset.replace('mo', ''))
    start = now + relativedelta.relativedelta(months=-months)
  elif 'yr' in time_preset:
    years = int(time_preset.replace('yr', ''))
    start = now + relativedelta.relativedelta(years=-years)

  return start.timestamp(), now.timestamp()

In [26]:
def build_yahoo_finance_url(symbol, time_preset='3mo', interval='daily', events='history', start=None, end=None, includeAdjustedClose=True):
  """build yahoo finance url
  Keyword arguments:
  symbol: stock symbol, e.g. AAPL, SPY
  time_preset: time ranges by preset, e.g. 1d for 1 day, 2mo for 2 months, 3yr for 3 years.
    it would ignore start, end if this is not None
  interval: get stock frequency, e.g. daily, weekly, monthly
  events: history | div, historic stock price or dividend
  start, end: start or end date in milliseconds format
  includeAdjustedClose: boolean
  """
  api = 'https://query1.finance.yahoo.com/v7/finance/download'

  if start and end:
    if type(start) == float or type(start) == int:
      period1, period2 = start, end
    else:
      period1 = datetime.strptime(start, '%Y-%m-%d').timestamp()
      period2 = datetime.strptime(end, '%Y-%m-%d').timestamp()
  else:
    period1, period2 = get_time_preset(time_preset=time_preset)

  interval = {'daily': '1d', 'weekly': '1wk', 'monthly': '1mo'}[interval]

  return '{}/{}?period1={}&period2={}&interval={}&events={}&includeAdjustedClose={}'.format(api, symbol, int(period1), int(period2), interval, events, includeAdjustedClose)

In [17]:
def rolling_analysis(series, window=20, nstd=2, plot=True):
  rolling = series.rolling(window=window)
  mean = rolling.mean()
  upper_bound = mean + nstd * rolling.std()
  lower_bound = mean - nstd * rolling.std()
  
  mean.name = '{} rolling mean'.format(mean.name)
  upper_bound.name = '{} upper bound'.format(mean.name)
  lower_bound.name = '{} lower bound'.format(mean.name)

  if plot:
    pd.concat([series, mean, upper_bound, lower_bound], axis=1).plot()
    plt.show()

  return mean, lower_bound, upper_bound

In [18]:
def rolling_trade(series, lower, upper):
  buy = series[lower > series]
  sell = series[series > upper]

  trades = []
  while True:
    if buy.size == 0:
      break

    current_trade = {}
    current_buy_time = buy.index[0]
    current_buy_price = buy[current_buy_time]
    current_trade['buy'] = {
        'time': current_buy_time,
        'price': current_buy_price
    }

    sell = sell[sell.index > current_buy_time]
    sell = sell[sell > current_buy_price]
    if sell.size == 0:
      trades.append(current_trade)
      break

    current_sell_time = sell.index[0]
    current_sell_price = sell[current_sell_time]
    current_trade['sell'] = {
        'time': current_sell_time,
        'price': current_sell_price
    }
    current_trade['change'] = {
        'time': current_sell_time - current_buy_time,
        'price': current_sell_price - current_buy_price
    }
    trades.append(current_trade)

    buy = buy[buy.index > current_sell_time]
  return trades

In [19]:
# AAPL
# symbol = 'SPY'
# url = build_yahoo_finance_url(symbol=symbol)
# stock = pd.read_csv(url, index_col='Date', parse_dates=True)

In [20]:
# adjust_close = stock['Adj Close']

In [21]:
# mean, lower, upper = rolling_analysis(adjust_close)

In [22]:
# rolling_trade(adjust_close, lower, upper)

In [23]:
# for window in [7, 14, 21, 28]:
#   print('For {} window size:'.format(window))
#   mean, lower, upper = rolling_analysis(adjust_close, window=window)
#   for trade in rolling_trade(adjust_close, lower, upper):
#     for key in ['buy', 'sell', 'change']:
#       if (key in trade):
#         print('{} - {}'.format(key, trade[key]))
#   print()

In [34]:
symbol = 'AAPL'
time_preset = '5yr'
price_url = build_yahoo_finance_url(symbol=symbol, time_preset=time_preset)
dividend_url = build_yahoo_finance_url(symbol=symbol, time_preset=time_preset, events='div')

In [35]:
price = pd.read_csv(price_url, index_col='Date', parse_dates=True)
div = pd.read_csv(dividend_url, index_col='Date', parse_dates=True)

In [48]:
div

Unnamed: 0_level_0,Dividends
Date,Unnamed: 1_level_1
2016-11-03,0.1425
2017-05-11,0.1575
2018-11-08,0.1825
2019-11-07,0.1925
2017-11-10,0.1575
2018-05-11,0.1825
2019-05-10,0.1925
2020-05-08,0.205
2020-11-06,0.205
2021-05-07,0.22


In [49]:
price.Close[div.index]

Date
2016-11-03     27.457500
2017-05-11     38.487499
2018-11-08     52.122501
2019-11-07     64.857498
2017-11-10     43.667500
2018-05-11     47.147499
2019-05-10     49.294998
2020-05-08     77.532501
2020-11-06    118.690002
2021-05-07    130.210007
2017-02-09     33.105000
2017-08-10     38.830002
2018-02-09     39.102501
2018-08-10     51.882500
2019-02-08     42.602501
2019-08-09     50.247501
2020-02-07     80.007500
2020-08-07    111.112503
2021-02-05    136.759995
2016-08-04     26.467501
Name: Close, dtype: float64

In [53]:
analysis = div.join(price.Close[div.index])

In [54]:
analysis['Earn'] = analysis.Dividends * analysis.Close

In [55]:
analysis

Unnamed: 0_level_0,Dividends,Close,Earn
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2016-11-03,0.1425,27.4575,3.912694
2017-05-11,0.1575,38.487499,6.061781
2018-11-08,0.1825,52.122501,9.512356
2019-11-07,0.1925,64.857498,12.485068
2017-11-10,0.1575,43.6675,6.877631
2018-05-11,0.1825,47.147499,8.604419
2019-05-10,0.1925,49.294998,9.489287
2020-05-08,0.205,77.532501,15.894163
2020-11-06,0.205,118.690002,24.33145
2021-05-07,0.22,130.210007,28.646202


In [56]:
analysis.sort_index()

Unnamed: 0_level_0,Dividends,Close,Earn
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2016-08-04,0.1425,26.467501,3.771619
2016-11-03,0.1425,27.4575,3.912694
2017-02-09,0.1425,33.105,4.717462
2017-05-11,0.1575,38.487499,6.061781
2017-08-10,0.1575,38.830002,6.115725
2017-11-10,0.1575,43.6675,6.877631
2018-02-09,0.1575,39.102501,6.158644
2018-05-11,0.1825,47.147499,8.604419
2018-08-10,0.1825,51.8825,9.468556
2018-11-08,0.1825,52.122501,9.512356
