<a href="https://colab.research.google.com/github/JiahuaZhang/machine-learning-trading/blob/main/Untitled0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from datetime import datetime
from dateutil import relativedelta

%matplotlib inline

In [2]:
matplotlib.rcParams['figure.figsize'] = (15, 13)

In [3]:
def get_time_preset(time_preset):
  """Get time range for preset
  time_preset -- number + date range format
  e.g. 1d for 1 day, 2mo for 2 months, 3yr for 3years
  """
  now = datetime.today()

  if 'd' in time_preset:
    days = int(time_preset.replace('d', ''))
    start = now + relativedelta.relativedelta(days=-days)
    return start.timestamp(), now.timestamp()
  elif 'mo' in time_preset:
    months = int(time_preset.replace('mo', ''))
    start = now + relativedelta.relativedelta(months=-months)
  elif 'yr' in time_preset:
    years = int(time_preset.replace('yr', ''))
    start = now + relativedelta.relativedelta(years=-years)

  return start.timestamp(), now.timestamp()

In [4]:
def build_yahoo_finance_url(symbol, time_preset='3mo', interval='daily', events='history', start=None, end=None, includeAdjustedClose=True):
  """build yahoo finance url
  Keyword arguments:
  symbol: stock symbol, e.g. AAPL, SPY
  time_preset: time ranges by preset, e.g. 1d for 1 day, 2mo for 2 months, 3yr for 3 years.
    it would ignore start, end if this is not None
  interval: get stock frequency, e.g. daily, weekly, monthly
  events: history | div, historic stock price or dividend
  start, end: start or end date in milliseconds format
  includeAdjustedClose: boolean
  """
  api = 'https://query1.finance.yahoo.com/v7/finance/download'

  if start and end:
    if type(start) == float or type(start) == int:
      period1, period2 = start, end
    else:
      period1 = datetime.strptime(start, '%Y-%m-%d').timestamp()
      period2 = datetime.strptime(end, '%Y-%m-%d').timestamp()
  else:
    period1, period2 = get_time_preset(time_preset=time_preset)

  interval = {'daily': '1d', 'weekly': '1wk', 'monthly': '1mo'}[interval]

  return '{}/{}?period1={}&period2={}&interval={}&events={}&includeAdjustedClose={}'.format(api, symbol, int(period1), int(period2), interval, events, includeAdjustedClose)

In [5]:
def get_stock_dataframe(symbol, **kwargs):
  url = build_yahoo_finance_url(symbol=symbol, **kwargs)
  stock = pd.read_csv(url, index_col='Date', parse_dates=True)
  return stock

In [6]:
def rolling_analysis(series, window=20, nstd=2, plot=True):
  rolling = series.rolling(window=window)
  mean = rolling.mean()
  upper_bound = mean + nstd * rolling.std()
  lower_bound = mean - nstd * rolling.std()
  
  mean.name = '{} rolling mean'.format(mean.name)
  upper_bound.name = '{} upper bound'.format(mean.name)
  lower_bound.name = '{} lower bound'.format(mean.name)

  if plot:
    pd.concat([series, mean, upper_bound, lower_bound], axis=1).plot()
    plt.show()

  return mean, lower_bound, upper_bound

In [7]:
def rolling_trade(series, lower, upper):
  buy = series[lower > series]
  sell = series[series > upper]

  trades = []
  while True:
    if buy.size == 0:
      break

    current_trade = {}
    current_buy_time = buy.index[0]
    current_buy_price = buy[current_buy_time]
    current_trade['buy'] = {
        'time': current_buy_time,
        'price': current_buy_price
    }

    sell = sell[sell.index > current_buy_time]
    sell = sell[sell > current_buy_price]
    if sell.size == 0:
      trades.append(current_trade)
      break

    current_sell_time = sell.index[0]
    current_sell_price = sell[current_sell_time]
    current_trade['sell'] = {
        'time': current_sell_time,
        'price': current_sell_price
    }
    current_trade['change'] = {
        'time': current_sell_time - current_buy_time,
        'price': current_sell_price - current_buy_price
    }
    trades.append(current_trade)

    buy = buy[buy.index > current_sell_time]
  return trades

In [8]:
def get_price_earning_ratio(symbol='', time_preset='7yr', **kwargs):
  price_url = build_yahoo_finance_url(symbol=symbol, time_preset=time_preset, **kwargs)
  dividend_url = build_yahoo_finance_url(symbol=symbol, events='div', time_preset=time_preset, **kwargs)

  price = pd.read_csv(price_url, index_col='Date', parse_dates=True)
  div = pd.read_csv(dividend_url, index_col='Date', parse_dates=True)

  analysis = div.join(price.Close[div.index])
  analysis['PER'] = analysis.Close / analysis.Dividends / 4
  analysis['Divident Yield %'] = 1 / analysis.PER * 100
  return analysis.sort_index(ascending=False)

In [9]:
# AAPL
# symbol = 'SPY'
# url = build_yahoo_finance_url(symbol=symbol)
# stock = pd.read_csv(url, index_col='Date', parse_dates=True)

In [10]:
# adjust_close = stock['Adj Close']

In [11]:
# mean, lower, upper = rolling_analysis(adjust_close)

In [12]:
# rolling_trade(adjust_close, lower, upper)

In [13]:
# for window in [7, 14, 21, 28]:
#   print('For {} window size:'.format(window))
#   mean, lower, upper = rolling_analysis(adjust_close, window=window)
#   for trade in rolling_trade(adjust_close, lower, upper):
#     for key in ['buy', 'sell', 'change']:
#       if (key in trade):
#         print('{} - {}'.format(key, trade[key]))
#   print()

In [19]:
# get_price_earning_ratio('mo')

In [20]:
# mo = get_stock_dataframe('mo', time_preset='7yr', interval='monthly')
# mo['Adj Close'].plot()