# The Stock: General

### Installing

In [None]:
% pip install pandas==1.3.5 numpy==1.21.4 matplotlib==3.5.1 mplfinance==0.12.8b6 yfinance==0.1.63 scikit-learn==1.0.1 mlxtend==0.19.0 statsmodels==0.11.1 pmdarima==1.8.0 prophet==1.0.1 tensorflow==2.7.0 torch==1.9.0 bs4==0.0.1 

### Connect Drive

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

### Settings

In [None]:
current_directory = ''

### Importing

In [None]:
import pandas as pd
from pandas.plotting import autocorrelation_plot 
import numpy as np 
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.dates as mdates
import mplfinance as mpf
import yfinance as yf
from sklearn import preprocessing, ensemble, model_selection
from mlxtend.regressor import StackingCVRegressor
from statsmodels.tsa.arima_model import ARIMA
import pmdarima as pm 
from prophet import Prophet
from prophet.diagnostics import cross_validation, performance_metrics
import tensorflow as tf 
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader 
from bs4 import BeautifulSoup
import time, math, itertools, os, random, re, json, requests, copy
import warnings; warnings.filterwarnings('ignore')

### Data

In [None]:
# ticker_strings = ['MSFT', 'AAPL', 'GOOGL', 'FB', 'NVDA', 'TSLA', 'AMZN']

# temp_df = list()
# for ticker in ticker_strings:
#     data = yf.download(ticker, group_by='Ticker', start='2000-07-07', end="2021-07-08")
#     data['Ticker'] = ticker  
#     temp_df.append(data)

# stock_df = pd.concat(temp_df)
# stock_df.to_csv(current_directory + 'datasets/stock.csv')

### Financial Data

In [None]:
def ticker_financials(ticker:str):
    input_ticker = copy.copy(ticker.upper())
    url = 'https://finance.yahoo.com/quote/{}/financials?p={}'.format(input_ticker, input_ticker)
    headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36' } 
    response = requests.get(url, headers=headers, timeout=5)
    soup = BeautifulSoup(response.text, 'html.parser')
    pattern = re.compile(r'\s--\sData\s--\s')
    script_data = soup.find('script', text=pattern).contents[0]
    start = script_data.find("context")-2
    json_data = json.loads(script_data[start:-12])
    
    annual_income_statement = json_data['context']['dispatcher']['stores']['QuoteSummaryStore']['incomeStatementHistory']['incomeStatementHistory']
    quarterly_income_statement = json_data['context']['dispatcher']['stores']['QuoteSummaryStore']['incomeStatementHistoryQuarterly']['incomeStatementHistory']
    annual_cashflow_statement = json_data['context']['dispatcher']['stores']['QuoteSummaryStore']['cashflowStatementHistory']['cashflowStatements']
    quarterly_cashflow_statement = json_data['context']['dispatcher']['stores']['QuoteSummaryStore']['cashflowStatementHistoryQuarterly']['cashflowStatements']
    annual_balance_sheet = json_data['context']['dispatcher']['stores']['QuoteSummaryStore']['balanceSheetHistory']['balanceSheetStatements']
    quarterly_balance_sheet = json_data['context']['dispatcher']['stores']['QuoteSummaryStore']['balanceSheetHistoryQuarterly']['balanceSheetStatements']
    
    a_income_statement, q_income_statement, a_cashflow_statement, \
    q_cashflow_statement, a_balance_sheet, q_balance_sheet = ([] for i in range(6))

    for s1, s2, s3, s4, s5, s6 in zip(annual_income_statement, 
                                      quarterly_income_statement, 
                                      annual_cashflow_statement, 
                                      quarterly_cashflow_statement, 
                                      annual_balance_sheet, 
                                      quarterly_balance_sheet):
        
        statement1, statement2, statement3, statement4, statement5, statement6 = ({} for i in range(6))
        
        for (k1, v1), (k2, v2), (k3, v3), (k4, v4), (k5, v5), (k6, v6) in zip(s1.items(), 
                                                                              s2.items(), 
                                                                              s3.items(), 
                                                                              s4.items(), 
                                                                              s5.items(), 
                                                                              s6.items()):
            try:
                statement1[k1] = v1['longFmt']
                statement2[k2] = v2['longFmt']
                statement3[k3] = v3['longFmt']
                statement4[k4] = v4['longFmt']
                statement5[k5] = v5['longFmt']
                statement6[k6] = v6['longFmt']
                
            except TypeError:
                continue
                
            except KeyError:
                continue
                
        a_income_statement.append(statement1)
        q_income_statement.append(statement2)
        a_cashflow_statement.append(statement3)
        q_cashflow_statement.append(statement4)
        a_balance_sheet.append(statement5)
        q_balance_sheet.append(statement6)
        
    a_income_statement = pd.DataFrame(a_income_statement)
    q_income_statement = pd.DataFrame(q_income_statement)  
    a_cashflow_statement = pd.DataFrame(a_cashflow_statement)
    q_cashflow_statement = pd.DataFrame(q_cashflow_statement)
    a_balance_sheet = pd.DataFrame(a_balance_sheet)
    q_balance_sheet = pd.DataFrame(q_balance_sheet)
    
    return a_income_statement, q_income_statement, a_cashflow_statement, q_cashflow_statement, a_balance_sheet, q_balance_sheet

a_is, q_is, a_cs, q_cs, a_bs, q_bs = ticker_financials(ticker='GOOGL')

display(a_is) # Annual Income Statement
display(q_is) # Quarter Income Statement
display(a_cs) # Annual Cashflow Statement
display(q_cs) # Quarter Cashflow Statement
display(a_bs) # Annual Balancesheet Statement
display(q_bs) # Quarter Balancesheet Statement

In [None]:
import yfinance as yf
import pandas as pd

def ticker_everything(ticker:str):
    ticker = yf.Ticker(ticker)
    ticker_info = pd.Series(ticker.info).to_frame().rename(columns={0: 'Value'}).rename_axis('Info')
    ticker_actions = ticker.actions
    ticker_afinancials = ticker.financials
    ticker_qfinancials = ticker.quarterly_financials
    ticker_abalancesheet = ticker.balancesheet
    ticker_qbalancesheet = ticker.quarterly_balancesheet
    ticker_cashflowstatement = ticker.cashflow
    ticker_qcashflowstatement = ticker.quarterly_cashflow
    ticker_earnings = ticker.earnings
    ticker_sus = ticker.sustainability
    ticker_recom = ticker.recommendations
    ticker_cal = ticker.calendar
    display(ticker_info,
            ticker_actions,
            ticker_afinancials,
            ticker_qfinancials,
            ticker_abalancesheet,
            ticker_qbalancesheet,
            ticker_cashflowstatement,
            ticker_qcashflowstatement,
            ticker_earnings,
            ticker_sus,
            ticker_recom,
            ticker_cal)
    
ticker_everything(ticker='GOOGL')

### Statistic Data

In [None]:
def ticker_statistics(ticker:str):
    input_ticker = copy.copy(ticker.upper())
    url = 'https://finance.yahoo.com/quote/{}/key-statistics?p={}'.format(input_ticker, input_ticker)
    headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36' } 
    response = requests.get(url, headers=headers, timeout=5)
    soup = BeautifulSoup(response.text, 'html.parser')
    pattern = re.compile(r'\s--\sData\s--\s')
    script_data = soup.find('script', text=pattern).contents[0]
    start = script_data.find("context")-2
    json_data = json.loads(script_data[start:-12])
    ticker_stats = json_data['context']['dispatcher']['stores']['QuoteSummaryStore']['defaultKeyStatistics']
    ticker_stats = pd.DataFrame(ticker_stats).T
    
    return ticker_stats
    
ticker_stats = ticker_statistics(ticker='GOOGL')
ticker_stats

### Intrinsic Value 

To purchase a stock at the 'right place', intrinsic value or underlying value is one of the metrics to estimate does the current price of specific stock is underweight or overvalued based on the fundamental metrics of the company. Abundance formula has been formed to calculate the intrinsic value, here a simple formula to calculate the intrinsic value. The formula requires following value to compute the intrinsic value of the business which encompassing:

|      Params     |                  Name                 |   Unit   | Timeframe |         From        |
|:---------------:|:-------------------------------------:|:--------:|:---------:|:-------------------:|
|      T_OCF      |          Operating Cash Flow          | Millions |  Last 4Q  | Cash Flow Statement |
|   T_Total_Debt  |               Total Debt              | Millions |   Last Q  |    Balance Sheet    |
|      T_CSTI     |      Cash & Short Term Investment     | Millions |   Last Q  |    Balance Sheet    |
|     T_CFGR1     |  Cash flow Growth rate next 1-5 years |  Percent |    ANY    |     EPS Estimate    |
|     T_CFGR2     | Cash flow Growth rate next 6-10 years |  Percent |    ANY    |     EPS Estimate    |
|      T_NSO      |         No. Shares Outstanding        |    No.   |   Recent  |          -          |
|   T_Last_Close  |               Last Close              |   Price  | Yesterday |          -          |
| T_Discount_Rate |             Discount Rate             |  Percent |    ANY    |         Beta        |
|  T_next_n_year  |            Projected Years            |   Year   |    ANY    |         Self        |

In [None]:
def intrinsic_value(T_OCF, T_Total_Debt, T_CSTI, T_CFGR1, T_CFGR2, T_NSO, T_Last_Close, T_Discount_Rate, T_next_n_year):
    # Initialization

    OCF = T_OCF                                              
    Total_Debt = T_Total_Debt                      
    CSTI = T_CSTI  
    CFGR1 = T_CFGR1/100                         
    CFGR2 = T_CFGR2/100                         
    NSO = T_NSO                           
    Last_Close = T_Last_Close                     
    Discount_Rate = T_Discount_Rate/100                 
    
    next_n_year = T_next_n_year                         
    POCF = list()                            
    POCF.append(OCF * ( 1 + CFGR1 ) ) 
    Discount_Factor = list()                                                  
    Discount_Factor.append( 1 / ( 1 + Discount_Rate ) )
    Discount_Value = list()                                                   
    Discount_Value.append( POCF[-1] * Discount_Factor [-1] )

    for i in range(next_n_year - 1):
        POCF.append( POCF[-1] * ( 1 + CFGR1 ) )
        Discount_Factor.append( Discount_Factor[-1] * ( 1 / ( 1 + Discount_Rate ) ) )
        Discount_Value.append( POCF[-1] * Discount_Factor [-1] )
        
    total_n_year_CF = sum(Discount_Value)
                          
    INTRINSIC_VALUE_before_CashDebt = total_n_year_CF / NSO
    Debt_per_share = Total_Debt / NSO
    Cash_per_share = CSTI / NSO
                          
    INTRINSIC_VALUE = INTRINSIC_VALUE_before_CashDebt - Debt_per_share + Cash_per_share
    Discount_OR_Premium = ( (Last_Close - INTRINSIC_VALUE) / INTRINSIC_VALUE ) * 100
    
    print('Current Instrinsic Value: {}'.format(round(INTRINSIC_VALUE, 4)))
    print('Discount/Premium: {}'.format(round(Discount_OR_Premium, 4)))
    
#                                   NAME                                      UNIT       Timeframe    FROM
intrinsic_value(T_OCF = ,           # Operating Cash Flow                     Millions   Last 4Q      Cash Flow Statement
                T_Total_Debt = ,    # Total Debt                              Millions   Last Q       Balance Sheet
                T_CSTI = ,          # Cash & Short Term Investment            Millions   Last Q       Balance Sheet
                T_CFGR1 = ,         # Cash flow Growth rate next 1-5 years    Percent    ANY          EPS Estimate
                T_CFGR2 = ,         # Cash flow Growth rate next 6-10 years   Percent    ANY          EPS Estimate
                T_NSO = ,           # No. Shares Outstanding                  No.        Recent       -
                T_Last_Close = ,    # Last Close                              Price      Yesterday    -
                T_Discount_Rate = , # Discount Rate                           Percent    ANY          Beta
                T_next_n_year = ,   # Projected Years                         Year       ANY          Self
                )

# T for temporary (ignore it)

# Beta -> Discount Rate Conversion
# <.8 = 4.6%
# 1 ~ 1.5 = 5.6% ~ 8.1% | +0.5% per 0.1Beta
# > 1.6 = 8.6% 

### Reading

In [None]:
def read_data():
    data = pd.read_csv(current_directory + 'datasets/stock.csv')
    data['Date'] = pd.to_datetime(data['Date'], dayfirst=True)
    data.set_index(['Date'], inplace=True)
    display(data.head(1))
    assert type(data.index) == pd.core.indexes.datetimes.DatetimeIndex
    
    return data

data = read_data()
data.dropna(inplace=True)

In [None]:
data.isnull().sum()

### Plotting Style

In [None]:
custom_style = {
    'figure.autolayout': True,      # Figure automatically adjust layout
    'figure.titlesize': 20,         # Figure suptitle font size
    'figure.figsize': (10, 5),      # Figure figsize
    'figure.dpi': 100,              # Figure dots per inch
    'axes.spines.top': False,       # Draw Axis spines top
    'axes.spines.left': False,      # Draw Axis spin left  
    'axes.titlesize': 10,           # Axes title font size
    'axes.titlelocation': 'left',   # Axes title alignment
    'axes.labelsize': 14,           # Axes label size
    'axes.grid': True,              # Axes grid
    'grid.color': '#969696',        # Axes grid col
    'xtick.direction': 'inout',     # Xtick direction
    'ytick.direction': 'inout',     # Ytick direction
    'xtick.minor.visible': True,    # Draw Xtick minor 
    'ytick.minor.visible': True,    # Draw Ytick minor 
    'ytick.right': True,            # Draw ticks right
    'ytick.left': False,            # Draw ticks left
    'ytick.labelright': True,       # Draw ticks label right
    'ytick.labelleft': False,       # Draw ticks label left
    'xaxis.labellocation': 'right', # Xaxis alignment
    'yaxis.labellocation': 'top',   # Yaxis alignment
    'font.family': 'serif',    # Figure font 
    'legend.fontsize': 10,          # Legend font size
    'legend.loc': 'best',           # Legend location
}

print('========================================')
print('Auto Configured:')
print('========================================')
for k, v in zip(list(custom_style.keys()), list(custom_style.values())):
    n = 30 - len(k)
    print(str(k) + str(' '*n) + str(v))
print('========================================')
print('Further Configure: ')
print('========================================')
print('ax.yaxis.set_label_position("right")')
print('mpl.rcParams.update(mpl.rcParamsDefault)')
  
# %matplotlib inline
# %config InlineBackend.figure_format='svg'

plt.style.use(custom_style)

### History

In [None]:
def history_performance(df):
    scaled_df = list()
    floating = df.select_dtypes(include=['float64']).columns.values # Select columns with floating data type
    scaler = preprocessing.MinMaxScaler()
    for t in df['Ticker'].unique():
        temp = df[df['Ticker'] == t].copy()
        temp[floating] = scaler.fit_transform(temp[floating])
        scaled_df.append(temp)
        
    scaled_df = pd.concat(scaled_df)
    
    color_list = ['#00a4ef', '#a2aaad', '#fbbc05', '#4267b2', '#76b900', '#e82127', '#ff9900', '#00a270']
    fig, ax = plt.subplots()
    for t, c in zip(scaled_df['Ticker'].unique(), color_list):
        plt.plot(scaled_df[scaled_df['Ticker'] == t].index, scaled_df[scaled_df['Ticker'] == t]['Close'], c=c, label=t)
    plt.suptitle('History', ha='left', x=.015, y=1); plt.title('Established')
    plt.legend(); plt.ylabel('Scaled Values'); plt.xlabel('Date'); ax.yaxis.set_label_position("right")
    plt.show()
    
history_performance(data)

In [None]:
def plot_ohlc(df, ticker):
    df = df[df['Ticker'] == ticker].tail(30).reset_index()
    x = np.arange(0,len(df))
    fig, (ax, ax2) = plt.subplots(2, figsize=(10,5), gridspec_kw={'height_ratios': [4, 1]}, dpi=100)
    for idx, value in df.iterrows():
        color = '#2CA453'
        if value['Open'] > value['Close']: color= '#F04730'
        ax.plot([x[idx], x[idx]], [value['Low'], value['High']], color=color)
        ax.plot([x[idx], x[idx]-0.1], [value['Open'], value['Open']], color=color)
        ax.plot([x[idx], x[idx]+0.1], [value['Close'], value['Close']], color=color)
               
    ax2.bar(x, df['Volume'], color='lightgrey')
    max_ = df['Volume'].max()*1.1
    yticks_ax2 = np.arange(0, max_+1, max_/4)
    yticks_labels_ax2 = ['{:.2f} M'.format(i/1000000) for i in yticks_ax2]
    plt.yticks(yticks_ax2[1:-1], yticks_labels_ax2[1:-1]); plt.ylim(0,max_)
    
    ax.set_xticks(x, minor=True); ax.set_ylabel('Price');
    ax2.set_xticks(x[::3]); ax2.set_xticklabels(df.Date.dt.date[::3]); ax2.set_ylabel('Volume'); ax2.yaxis.tick_right() 
    
    ax.grid(axis='y'); ax2.grid(False)
    
    plt.suptitle('Candlestick: {}'.format(ticker), ha='left', x=.015, y=1)
    plt.subplots_adjust(wspace=0, hspace=0)
    fig.autofmt_xdate()
               
def plot_candlestick(df, ticker):
    df = df[df['Ticker'] == t].tail(180)
    mpf.plot(df, figratio=(10,5), type='candle', mav=(7, 21), volume=True, title=ticker, style='classic')
    
for t in data['Ticker'].unique():
    # plot_ohlc(data, ticker=t)
    plot_candlestick(data, ticker=t)

In [None]:
mpl.rcParams.update(mpl.rcParamsDefault)
plt.style.use(custom_style)

### Technical Indicators

In [None]:
def some_indicators(df, o, c, h, l, v):
    result = list()
    for t in df['Ticker'].unique():
        temp = df[df['Ticker'] == t].copy()
    
    # Trend
    
        # Simple Moving Average
        temp['SMA20'] = temp[c].rolling(window = 20).mean()
        temp['SMA50'] = temp[c].rolling(window = 50).mean()
        temp['SMA150'] = temp[c].rolling(window = 150).mean()
        temp['SMA200'] = temp[c].rolling(window = 200).mean()
        
        # Exponential Moving Average
        temp['EMA20'] = temp[c].ewm(span = 20).mean()
        temp['EMA40'] = temp[c].ewm(span = 40).mean()
        
        # Weighted Moving Average
        weights = np.array([0.4, 0.2, 0.2, 0.1, 0.1])
        sum_weights = np.sum(weights)
        temp['WMA5'] = (temp[c].rolling(window = 5).apply(lambda x: np.sum(weights * x) / sum_weights, raw = False))
        
        # Moving Average Convergence Divergence 
        temp['EMA26'] = temp[[c]].ewm(span = 26).mean()
        temp['EMA12'] = temp[[c]].ewm(span = 12).mean()
        temp['MACD'] = temp['EMA12'] - temp['EMA26']
        
    # Volatility
    
        # Bollinger Band
        temp['20SD'] = temp[c].rolling(window=20).std() 
        temp['UB'] = temp['SMA20'] + (temp['20SD'] * 2)
        temp['LB'] = temp['SMA20'] - (temp['20SD'] * 2)
        
        # Average True Range
        atr_window = 14
        Previous_close = temp[c].shift(1)
        true_range = pd.DataFrame(data={'tr1': temp[h] - temp[l],
                                        'tr2': (temp[h] - Previous_close).abs(),
                                        'tr3': (temp[l] - Previous_close).abs()}).max(axis=1)
        ATR = np.zeros(len(temp[c]))
        ATR[atr_window - 1] = true_range[0: atr_window].mean()
        for i in range(atr_window, len(ATR)):
            ATR[i] = (ATR[i - 1] * (atr_window - 1) + true_range.iloc[i]) / float(atr_window)
        ATR = pd.Series(data = ATR, index=true_range.index)
        temp.insert(temp.shape[1], 'ATR', ATR)
        
    # Momentum
    
        # Stochastic Oscillator 
        stoch_window = 14; stoch_smooth_window = 3; stoch_periods = 14
        S_min = temp[l].rolling(stoch_window, min_periods = stoch_periods).min()
        S_max = temp[h].rolling(stoch_window, min_periods = stoch_periods).max()
        STOCH = 100 * (temp[c] - S_min) / (S_max - S_min)
        temp.insert(temp.shape[1], 'STOCH', STOCH)
        
        # Relative Strength Index
        rsi_periods = 14; diff = temp[c].diff(1)
        UP_direction = diff.where(diff > 0, 0.0); 
        DOWN_direction = -diff.where(diff < 0, 0.0)
        EMA_up = UP_direction.ewm(alpha = 1 / rsi_periods, min_periods = rsi_periods, adjust = rsi_periods).mean()
        EMA_down = DOWN_direction.ewm(alpha = 1 / rsi_periods, min_periods = rsi_periods, adjust = rsi_periods).mean()
        RSI = pd.Series(np.where(EMA_down == 0, 100, 100 - (100 / (1 + (EMA_up / EMA_down)))), index = temp[c].index)
        temp.insert(temp.shape[1], 'RSI', RSI)
        
    # Volume
    
        # Accumulation/Distribution Index
        CLV = (((temp[c] - temp[l]) - (temp[h] - temp[c]))/(temp[h] - temp[l])).fillna(0.0)
        ADI = CLV * temp[v]
        temp['ADI'] = ADI.cumsum()
        
        # On-Balance Volume
        OBV = np.where(temp[c] < temp[c].shift(1), -temp[v], temp[v])
        OBV = pd.Series(OBV, index = temp[c].index).cumsum()
        temp.insert(temp.shape[1], 'OBV', OBV)
           
            
        result.append(temp)

    result = pd.concat(result)
    return result

data = some_indicators(df=data, o='Open', c='Close', h='High', l='Low', v='Volume')

In [None]:
def technical_chart(ticker_name, ticker_df, last_n, plot=True):
    assert type(ticker_name) == str
    
    if plot:
      ticker_df = ticker_df.tail(last_n)
      fig, ax = plt.subplots(figsize=(10,5), dpi=150)
      ax.plot(ticker_df.index, ticker_df['Close'], c='black', label='Close')
      ax.plot(ticker_df.index, ticker_df['SMA50'], c='b', label='SMA50')
      ax.plot(ticker_df.index, ticker_df['SMA150'], c='g', label='SMA150')
      ax.plot(ticker_df.index, ticker_df['SMA200'], c='r', label='SMA200')
      ax.fill_between(ticker_df.index, ticker_df['LB'], ticker_df['UB'], alpha=0.35, label='Bollinger Band')
      ax.legend(loc='lower right')

      zoom_df = ticker_df.tail(last_n-math.ceil(last_n*0.9))
      ax2 = fig.add_axes([0.01, 0.55, 0.25, 0.30]) # left, bottom, width, height
      ax2.plot(zoom_df.index, zoom_df['Close'], label='Close')
      ax2.plot(zoom_df.index, zoom_df['EMA20'], label='EMA20')
      ax2.plot(zoom_df.index, zoom_df['EMA40'], label='EMA40')
      ax2.legend(prop={'size': 5})
      ax2.xaxis.set_major_locator(mdates.MonthLocator(interval=12))
      
      plt.suptitle('{}'.format(ticker_name), ha='left', x=.015, y=1); plt.title('Short Term Performance')

for t in data['Ticker'].unique():
    technical_chart(t, data[data['Ticker'] == t], last_n=999, plot=False)

In [None]:
def technical_chart2(ticker_name, ticker_df, last_n, plot=True):
    assert type(ticker_name) == str
    
    if plot:
      ticker_df = ticker_df.tail(last_n)
      fig, axs = plt.subplots(3, 2, figsize=(10,7), dpi=100)
      axs[0,0].plot(ticker_df.index, ticker_df['MACD']); axs[0,0].set_title('Moving Average Convergence Divergence')
      axs[0,1].plot(ticker_df.index, ticker_df['ATR']); axs[0,1].set_title('Average True Range')
      axs[1,0].plot(ticker_df.index, ticker_df['STOCH']); axs[1,0].set_title('Stochastic Oscillator')
      axs[1,1].plot(ticker_df.index, ticker_df['RSI']); axs[1,1].set_title('Relative Strength Index')
      axs[2,0].plot(ticker_df.index, ticker_df['ADI']); axs[2,0].set_title('Accumulation/Distribution Index')
      axs[2,1].plot(ticker_df.index, ticker_df['OBV']); axs[2,1].set_title('On-Balance Volume')
      plt.suptitle('Technical Indicator: {}'.format(ticker_name), ha='left', x=.015, y=1)

for t in data['Ticker'].unique():
    technical_chart2(t, data[data['Ticker'] == str(t)], last_n=100, plot=False)

### Separation

In [None]:
def ticker_separation(data):
    msft = data[data['Ticker'] == 'MSFT'].drop(columns='Ticker')
    aapl = data[data['Ticker'] == 'AAPL'].drop(columns='Ticker')
    googl = data[data['Ticker'] == 'GOOGL'].drop(columns='Ticker')
    fb = data[data['Ticker'] == 'FB'].drop(columns='Ticker')
    nvda = data[data['Ticker'] == 'NVDA'].drop(columns='Ticker')
    tsla = data[data['Ticker'] == 'TSLA'].drop(columns='Ticker')
    amzn = data[data['Ticker'] == 'AMZN'].drop(columns='Ticker')

    ticker_dict = {'MSFT': msft, 'AAPL': aapl, 'GOOGL': googl, 'FB': fb, 'NVDA': nvda, 'TSLA': tsla, 'AMZN': amzn}
    
    return ticker_dict

ticker_dict = ticker_separation(data=data)

### Fourier Transform

In [None]:
def fourier_transformation(df, tn, plot=True):
    assert type(tn) == str
    
    fft_df = pd.DataFrame({'fft': np.fft.fft(np.asarray(df['Close']))})
    fft_df['Absolute'] = fft_df['fft'].apply(lambda x: np.abs(x))
    fft_df['Angle'] = fft_df['fft'].apply(lambda x: np.angle(x))
    
    if plot:
        fig, ax = plt.subplots(figsize=(10,5), dpi=100)
        fftlist = np.asarray(fft_df['fft'])
        for n in [3, 6, 9, 78]:
            fftlistc = np.copy(fftlist); fftlistc[n:-n] = 0
            ax.plot(np.fft.ifft(fftlistc), label='FT Component {}'.format(n))
        ax.plot(df['Close'],  label='Close'); 
        ax.yaxis.set_label_position("right"); 
        plt.legend(); plt.xlabel('Index'); plt.ylabel('Close'); 
        plt.suptitle('Fourier Tranform: {}'.format(tn), ha='left', x=.015, y=1)
        plt.show()
    
for tn,tdf in zip(list(ticker_dict.keys()), list(ticker_dict.values())):
    fourier_transformation(df=tdf.reset_index()[['Date', 'Close']], tn=tn, plot=False)

### Hyperparameter Tuning for ARIMA

In [None]:
def optimal_arima(df, tn, cn):
    assert type(tn) == str
    
    print('Ticker: {}'.format(tn))
    oarima_model = pm.auto_arima(df[cn], start_p=1, start_q=1, test='adf', max_p=5, max_q=5, max_d=5,
                                 seasonal=False, trace=True, random_state=7)
    # print('')
    # display(oarima_model.summary())
    # return oarima_model.order
    
arima_params = dict()
for tn,tdf in zip(list(ticker_dict.keys()), list(ticker_dict.values())):
    optimal_arima(df=tdf, tn=tn, cn='Close')
    # arima_params[tn] = optimal_arima(df=tdf, tn=tn, cn='Close')

### ARIMA Model Forecast

In [None]:
def arima_model(df, tn, cn, params, n_days, forecast_days, plot=True):
    assert type(tn) == str
    assert type(cn) == str
    
    if n_days > len(df):
        raise Exception('n_days > length of data')
    else:
        warnings.filterwarnings('ignore')
        start = time.time()

        recent = df.tail(n_days).copy()
        series = recent[cn]
        arima = ARIMA(series, order=params)
        arima_fitted = arima.fit(disp=0)
        # print(arima_fitted.summary())
        # autocorrelation_plot(series)
        X = series.values; split = int(len(X)*0.70)
        train, test = X[0:split], X[split:len(X)]
        history = [x for x in train]
        predictions = list()
        for t in range(len(test)):
            arima = ARIMA(history, order=(5,1,0))
            arima_fitted = arima.fit(disp=0)
            predictions.append(arima_fitted.forecast()[0])
            obs = test[t]
            history.append(obs)

        error = np.round(np.mean((test-predictions)**2),4);
    
        forecast, se, ci = arima_fitted.forecast(forecast_days, alpha=0.05)
        forecast_date = pd.date_range(recent.index[-1], periods=forecast_days).tolist()
        
        end = time.time()
        
        if plot:
            fig, ax = plt.subplots(figsize=(10,5), dpi=100)
            ax.plot(recent.index[:len(train)], train, label='Training')
            ax.plot(recent.index[-len(test):], test, label='Testing')
            ax.plot(recent.index[-len(test):], predictions, color='r', label='Predicted')
            ax.plot(forecast_date, forecast, c='black', label='Forecast')
            ax.fill_between(forecast_date, ci[:, 0], ci[:, 1], alpha=.35)
            ax.yaxis.set_label_position("right"); 
            plt.legend(loc='upper left'); plt.xlabel('Days'); plt.ylabel(cn); 
            plt.suptitle('ARIMA Model: {}'.format(tn), ha='left', x=.015, y=1)
            plt.title('MSE: {} | Run Time: {} | Params(p,q,d): {} | Forecast days: {}'\
                      .format(error, round(end-start,6), params, forecast_days), c='grey')
            plt.show()

for tn,tdf,params in zip(list(ticker_dict.keys()), list(ticker_dict.values()), list(arima_params.values())):
    arima_model(df=tdf, tn=tn, cn='Close', params=params, n_days=200, forecast_days=30, plot=False)

### Prophet Model Forecast

In [None]:
def prophet_model(df, tn, cn, n_days, forecast_days, plot=True):
    assert type(tn) == str
    assert type(cn) == str
    
    short = df.tail(n_days).copy()
    
    # param_grid = {  
    #     'changepoint_prior_scale': [0.001, 0.01, 0.1],
    #     'seasonality_prior_scale': [0.01, 0.1, 1.0]}
    # all_params = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())]
    # errors = list()
    # cutoffs = pd.to_datetime(['2018-07-08', '2019-01-01', '2019-07-08'])

    # for params in all_params:
    #      fbp = Prophet(**params, daily_seasonality=True)\
    #     .fit(short.reset_index().rename(columns={'Date': 'ds', cn: 'y'})[['ds', 'y']])
    #     cv = cross_validation(fbp, cutoffs=cutoffs, horizon='365 days')
    #     pm = performance_metrics(cv, rolling_window=1)
    #     errors.append(pm['rmse'].values[0])
        
    # tuning_results = pd.DataFrame(all_params)
    # tuning_results['rmse'] = errors
    # display(tuning_results)

    start = time.time()

    fbp = Prophet(changepoint_prior_scale=0.001, seasonality_prior_scale=0.01, daily_seasonality=True)
    fbp.fit(short.reset_index().rename(columns={'Date': 'ds', cn: 'y'})[['ds', 'y']])
    future = fbp.make_future_dataframe(periods=forecast_days)
    forecast = fbp.predict(future)
    
    end = time.time()
    
    if plot:
        fig, ax = plt.subplots()
        ax.plot(forecast['ds'][:-forecast_days], forecast['yhat'][:-forecast_days], label='Training', c='black')
        ax.plot(forecast['ds'][-forecast_days:], forecast['yhat'][-forecast_days:], label='Forecast', c='#0072b2')
        ax.fill_between(forecast['ds'][-forecast_days:], 
                        forecast['yhat_lower'][-forecast_days:], 
                        forecast['yhat_upper'][-forecast_days:], label='CI', color='#bedaea')
        ax.yaxis.set_label_position("right"); plt.xlabel('Date'); plt.ylabel(cn); plt.legend(); 
        plt.suptitle('Prophet: {}'.format(tn), ha='left', x=.015, y=1); 
        plt.title('Commence: {} | End: {} | Run Time: {} | Forecast days: {}'\
                  .format(str(short.index.min())[:10], str(short.index.max())[:10], round(end-start,6), forecast_days), 
                  c='grey')
        plt.show()

for tn,tdf in zip(list(ticker_dict.keys()), list(ticker_dict.values())):
    prophet_model(df=tdf, tn=tn, cn='Close', n_days=1000, forecast_days=300, plot=False)

### Feature Selection for Autoencoder

In [None]:
def feature_selection(df, tn, cn, ntf, plot=True):
    assert type(tn) == str
    assert type(cn) == str
    
    X = df.drop(columns=[cn]).fillna(0)
    y = df[cn].values

    etc = ensemble.ExtraTreesRegressor(random_state=7)
    etc.fit(X, y)
    feature_table = pd.DataFrame({'Count': [f for f in range(len(etc.feature_importances_))], 
                                  'FI': etc.feature_importances_.tolist(), 'Label': [f for f in df.columns if f !=str(cn)]})

    if plot:
        fig, ax = plt.subplots(figsize=(10,5), dpi=100)
        ax.bar(feature_table['Count'], feature_table['FI'], tick_label=feature_table['Label'])
        plt.suptitle('Feature Importances: {}'.format(tn), ha='left', x=.015, y=1)
        plt.title('With Extra Tree Regressor', c='grey')
    
    return feature_table.sort_values(by=['FI'], ascending=False)['Label'].tolist()[:ntf]
    
selected_feature_dict = {}
for tn,tdf in zip(list(ticker_dict.keys()), list(ticker_dict.values())):
    selected_feature = feature_selection(df=tdf, tn=tn, cn='Close', ntf=8, plot=False)
    selected_feature.append('Close')
    selected_feature_dict[tn] = selected_feature

### Autoencoder Dimentionality Reduction and Anomaly Detection

In [None]:
def autoencoder_model(df, tn, cn, plot=True):
    assert type(tn) == str
    assert type(cn) == str
    
    trainX = df.sample(frac=.8, random_state=7); testX = df.drop(trainX.index)   
    
    scaler = preprocessing.MinMaxScaler()
    trainX_scaled = scaler.fit_transform(trainX.copy())
    testX_scaled = scaler.transform(testX.copy())

    tf.random.set_seed(7) # set seed for reproducibility
    
    class AutoEncoder(tf.keras.Model):
        def __init__(self, output_units, code_size=8):
            super().__init__()
            self.encoder = tf.keras.Sequential([
                tf.keras.layers.Dense(64, activation='relu'),
                tf.keras.layers.Dropout(0.1),
                tf.keras.layers.Dense(32, activation='relu'),
                tf.keras.layers.Dropout(0.1),
                tf.keras.layers.Dense(16, activation='relu'),
                tf.keras.layers.Dropout(0.1),
                tf.keras.layers.Dense(code_size, activation='relu')
            ])
            self.decoder = tf.keras.Sequential([
                tf.keras.layers.Dense(16, activation='relu'),
                tf.keras.layers.Dropout(0.1),
                tf.keras.layers.Dense(32, activation='relu'),
                tf.keras.layers.Dropout(0.1),
                tf.keras.layers.Dense(64, activation='relu'),
                tf.keras.layers.Dropout(0.1),
                tf.keras.layers.Dense(output_units, activation='sigmoid')
            ])
        def call(self, inputs):
            encoded = self.encoder(inputs)
            decoded = self.decoder(encoded)
            return decoded
        
    
    model = AutoEncoder(output_units=trainX_scaled.shape[1])
    model.compile(loss='msle', metrics=['mse'], optimizer='adam') # Mean Squared Logarithmic loss robust to outliers
    history = model.fit(trainX_scaled, trainX_scaled, verbose=0, epochs=20, batch_size=512, 
                        validation_data=(testX_scaled, testX_scaled))

    if plot:
      fig, ax = plt.subplots(figsize=(10,5), dpi=100)
      ax.plot(history.history['loss'], label='Training Loss')
      ax.plot(history.history['val_loss'], label='Validation Loss')
      ax.yaxis.set_label_position("right")
      plt.legend() 
      plt.xlabel('Epochs')
      plt.ylabel('MSLE Loss') 
      plt.title('History Loss')
      plt.suptitle('AutoEncoder Model: {}'.format(tn), ha='left', x=.015, y=1)

    whole = np.concatenate((trainX_scaled, testX_scaled), axis=0) # Concat train & test
    reconstructed = model.predict(whole)
    reconstructed_errors = tf.keras.losses.msle(whole, reconstructed)
    outliers_threshold = np.mean(reconstructed_errors.numpy()) + np.std(reconstructed_errors.numpy())
    anomaly_mask = pd.Series(reconstructed_errors) > outliers_threshold
    reconstructed_df = pd.DataFrame(reconstructed, columns=df.columns)
    reconstructed_df['Outliers'] = anomaly_mask.map(lambda x: 'Anomaly' if x == True else 'Normal')
    
    if plot:
      print(reconstructed_df['Outliers'].value_counts())
    
    if plot:
      fig, ax = plt.subplots(figsize=(10,5), dpi=100)
      bar = reconstructed_df['Outliers'].value_counts().plot(kind='bar', title='Outliers Counts', ylabel='Counts')
      ax.yaxis.set_label_position("right");
      for b in bar.patches:
          bar.annotate(format(b.get_height(),'.2f'), (b.get_x()+b.get_width()/2, b.get_height()), 
                      ha='center', va='center', size=10, xytext=(0, 8), textcoords='offset points')
    
for tn,tdf in zip(list(ticker_dict.keys()), list(ticker_dict.values())):
    autoencoder_model(df=tdf.reset_index(drop=True)[selected_feature].fillna(0), tn=tn, cn='Close', plot=False)

### Time Series Generative Adversarial Networks

In [None]:
filtered = data.loc['2017-01-01':]
filtered_ticker_dict = dict(tuple(filtered.groupby('Ticker')))

In [None]:
r = pd.DataFrame()
for tn,tdf in zip(list(filtered_ticker_dict.keys()), list(filtered_ticker_dict.values())):
    r = r.add(tdf.iloc[:, 0:6], fill_value=0)

In [None]:
ngpu = torch.cuda.device_count()
for i in range(ngpu):
    print("GPU {}: {}".format(i+1, torch.cuda.get_device_name(i)))
    
device = torch.device('cuda:0' if (torch.cuda.is_available() and ngpu > 0) else 'cpu')
num_epochs = 100000
evaluation_epoch_num = 10000
batch_size = 64
optimizer_betas = (0.9, 0.999)
learning_rate = 5.125e-4

In [None]:
class TimeseriesDataset(Dataset):
    def __init__(self, data_frame, sequence_length=2):
        self.data = torch.tensor(data_frame.values)
        self.sequence_length = sequence_length

    def __len__(self):
        return self.data.shape[0] - self.sequence_length + 1

    def __getitem__(self, index):
        return self.data[index: index + self.sequence_length].float()

In [None]:
training_columns_list = ['Close', 'Open', 'High', 'Low']
data_dimension = len(training_columns_list)
sequence_length = 7

train_data, evaluation_data = model_selection.train_test_split(r[training_columns_list], 
                                                               test_size=0.2, shuffle=False)

scaler = preprocessing.MinMaxScaler()
scaler.fit(train_data)
train_data[train_data.columns] = scaler.transform(train_data)
evaluation_data[evaluation_data.columns] = scaler.transform(evaluation_data)

validation_data, test_data = model_selection.train_test_split(evaluation_data, test_size=0.5, shuffle=False)

train_dataset = TimeseriesDataset(train_data, sequence_length)
test_dataset = TimeseriesDataset(test_data, sequence_length)
validation_dataset = TimeseriesDataset(validation_data, sequence_length)

train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size = batch_size, shuffle = True)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size = batch_size)
validation_dataloader = torch.utils.data.DataLoader(validation_dataset, batch_size = batch_size)

In [None]:
class Generator(nn.Module):
    def __init__(self, hidden_size):
        super(Generator, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size = data_dimension, hidden_size = hidden_size, num_layers=1, dropout=0.2, batch_first=True)
        self.linear = nn.Linear(hidden_size, data_dimension)
        self.drop = nn.Dropout(0.2)

    def forward(self, input_sequences):
        input_sequences = self.drop(input_sequences)
        lstm_output, hidden_cell = self.lstm(input_sequences)
        res = self.linear(hidden_cell[0][-1])
        res = res.view(res.shape[0], 1, -1)
        return res

In [None]:
class Discriminator(nn.Module):
    def __init__(self, hidden_size):
        super(Discriminator, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size = data_dimension, hidden_size = hidden_size, num_layers=1, dropout=0.2, batch_first=True)
        self.linear = nn.Sequential(
            nn.Linear(hidden_size, 1),
            nn.Sigmoid() 
        )
        self.drop = nn.Dropout(0.2)

    def forward(self, input_sequences):
        input_sequences = self.drop(input_sequences)
        lstm_output, hidden_cell = self.lstm(input_sequences)
        res = self.linear(hidden_cell[0][-1])
        res = res.view(res.shape[0], 1, -1)
        return res

In [None]:
def model_rmse(model, dataloader, epoch, plot_graph=True, plot_title='TSGAN Prediction', show_preds=True):
    rmse = 0
    squared_error_list = []
    actual_data_list = []
    predicted_data_list = []
    file_title = plot_title.lower().replace(" ", "_")
    
    for i, sequence_batch in enumerate(dataloader):
        with torch.no_grad():
            real_sequence = sequence_batch
            generator_input_sequence = sequence_batch[:, :-1].to(device)
            real_values = sequence_batch[:, -1:]
            predicted_values = generator(generator_input_sequence).cpu()
            actual_data_list.append(real_values)
            predicted_data_list.append(predicted_values)
    
    real_data = torch.cat(actual_data_list, 0)
    predicted_data = torch.cat(predicted_data_list, 0)
    
    df_pred = pd.DataFrame(predicted_data.view(-1, len(training_columns_list)), columns = training_columns_list)
    df_pred_unscaled = pd.DataFrame(scaler.inverse_transform(df_pred), columns = training_columns_list)
    df_real = pd.DataFrame(real_data.view(-1, len(training_columns_list)), columns = training_columns_list)
    df_real_unscaled = pd.DataFrame(scaler.inverse_transform(df_real), columns = training_columns_list)
    
    if plot_graph:
        if not os.path.exists('./plots_gan/'):
            os.makedirs('./plots_gan/')
        
        for column in training_columns_list:
            fig, ax = plt.subplots(figsize=(10,5), dpi=100)
            plt.xlabel('Date'); plt.ylabel(column)
            plt.title('Time Series Generative Adversarial Network' + f" -{column}-")
            plt.plot(df_real_unscaled[column], label = 'Actual')
            plt.plot(df_pred_unscaled[column], label = 'Predicted')
            plt.legend(); ax.yaxis.set_label_position("right"); 
            
            if show_preds and column == 'Close':
                plt.show()
            # fig.savefig(f'./plots_gan/{file_title}_plt_{column}_e{epoch}.png')
            plt.close(fig)
            
    rmse_results = {}
    for column in training_columns_list:
        rmse = np.sqrt(((df_real_unscaled[column] - df_pred_unscaled[column])**2).mean())
        rmse_results[column] = rmse
    return rmse_results

In [None]:
generator = Generator(hidden_size = data_dimension * 2).to(device)
discriminator = Discriminator(hidden_size = data_dimension * 2).to(device)

In [None]:
criterion = nn.BCELoss()
optimizer_generator = optim.Adam(generator.parameters(), lr=learning_rate, betas=optimizer_betas)
optimizer_discriminator = optim.Adam(discriminator.parameters(), lr=learning_rate, betas=optimizer_betas)

real_label = 1.
fake_label = 0.

In [None]:
if not os.path.exists('./models_gan/'):
    os.makedirs('./models_gan/')

In [None]:
best_predictor = None
min_close_rmse = math.inf

evaluation_metrics = {'Generator_loss':[], 'Discriminator_loss':[], 'rmse_values':{}}
for column in training_columns_list:
        evaluation_metrics['rmse_values'][column] = []
                      
print('Training started !')

for epoch in range(num_epochs):
    for i, sequence_batch in enumerate(train_dataloader):
            discriminator.zero_grad()
            real_sequence = sequence_batch.to(device)
            batch_size = real_sequence.size(0)
            real_labels = torch.full((batch_size,), real_label, dtype = torch.float, device = device)
            discriminator_output_real = discriminator(real_sequence).view(-1)
            discriminator_error_real = criterion(discriminator_output_real, real_labels)
            discriminator_error_real.backward()

            generator_input_sequence = sequence_batch[:,:-1].to(device)
            generated_values = generator(generator_input_sequence)
            fake_labels = torch.full((batch_size,), fake_label, dtype = torch.float, device = device)
            generator_result_concat = torch.cat((generator_input_sequence, generated_values.detach()), 1)
            discriminator_output_fake = discriminator(generator_result_concat).view(-1)
            discriminator_error_fake = criterion(discriminator_output_fake, fake_labels)
            discriminator_error_fake.backward()
            discriminator_error = discriminator_error_real + discriminator_error_fake
            optimizer_discriminator.step()

            generator.zero_grad()
            real_labels = torch.full((batch_size,), real_label, dtype = torch.float, device=device)
            generator_result_concat_grad = torch.cat((generator_input_sequence, generated_values), 1)
            discriminator_output_fake = discriminator(generator_result_concat_grad).view(-1)
            generator_error = criterion(discriminator_output_fake, real_labels)
            generator_error.backward()
            optimizer_generator.step()
            
    if (epoch+1) % evaluation_epoch_num == 0 or epoch+1 == 1:
        rmse_values = model_rmse(generator, validation_dataloader, epoch = (epoch+1), plot_graph=False, show_preds=False)
        if rmse_values['Close'] < min_close_rmse:
            min_close_rmse = rmse_values['Close']
            best_predictor = epoch+1
            
        for column in training_columns_list:
            evaluation_metrics['rmse_values'][column].append(rmse_values[column])
            
        evaluation_metrics['Generator_loss'].append(generator_error.item())
        evaluation_metrics['Discriminator_loss'].append(discriminator_error.item())
        
        print('\n[{}/{}]\tDiscriminator Loss: {:.4f}\tGenerator Loss: {:.4f}'
                  .format(epoch+1, num_epochs, discriminator_error.item(), generator_error.item()))
        
        for col_name, rmse in rmse_values.items():
            print(f"{col_name} RMSE: {rmse:.4f}")
        save_path = os.path.join("./models_gan/","model_epoch_{}.pt".format(epoch+1))
        
        torch.save({
            'epoch': epoch+1,
            'generator_model_state_dict': generator.state_dict(),
            'discriminator_model_state_dict': discriminator.state_dict(),
            'optimizer_generator_state_dict': optimizer_generator.state_dict(),
            'optimizer_discriminator_state_dict': optimizer_discriminator.state_dict(),
            'discriminator_loss': discriminator_error,
            'generator_loss': generator_error,
            }, save_path)