<a href="https://colab.research.google.com/gist/EvanMarie/0e3e401c7d3f0333d6d37c316586eabd/stock_helpers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
pd.options.display.float_format = '{:,.3f}'.format

OUTTER_BACK = '#222222'
INNER_BACK = '#333333'
FOREGROUND = 'white'

In [None]:
# dataframe display formatting

def color_negative_red(val):
    color = 'red' if val < 0 else 'black'
    return 'color: %s' % color

def highlight_max(s):
    is_max = s == s.max()
    return ['color: blue' if v else '' for v in is_max]

def style_data(data):
    return data.style.\
    applymap(color_negative_red).\
    apply(highlight_max).\
    set_table_attributes('style="font-size: 14px"').\
    format(thousands = ',', precision = '2')

In [None]:
def display_me(df, num, title=None):
    if type(df) != pd.core.frame.DataFrame:
        df = df.to_frame()
    print("")
    highlight(title.upper(), 'cyan', 'black', 3)
    highlight(f'(FIRST {num} ROWS of DATA)', 'cyan', 'black', 3)
    display(style_data(df.head(num)))
    print("")
    highlight(f'(LAST {num} ROWS of DATA)', 'cyan', 'black', 3)
    display(style_data(df.tail(num)))
    print("")

In [None]:
def head2(data, num_rows):
   return display(style_data(data.head(num_rows)))

def tail2(data, num_rows):
   return display(style_data(data.tail(num_rows)))

def sample2(data, num_rows):
   return display(style_data(data.sample(num_rows)))

In [None]:
def colorprint(text, color='#0038ff', fontsize = 3):
    from IPython.display import HTML as html_print
    return display(html_print("<b><font size={}><text style=color:{}>{}</text></font></b>".format(fontsize, color, text)))

def redprint(text, fontsize = 3):
    from IPython.display import HTML as html_print
    return display(html_print("<b><font size={}><text style=color:{}>{}</text></font></b>".format(fontsize, '#ff0000', text)))

def blueprint(text, fontsize = 3):
    from IPython.display import HTML as html_print
    return display(html_print("<b><font size={}><text style=color:{}>{}</text></font></b>".format(fontsize, 'blue', text)))

def purpleprint(text, fontsize = 3):
    from IPython.display import HTML as html_print
    return display(html_print("<b><font size={}><text style=color:{}>{}</text></font></b>".format(fontsize, '#8d00ff', text)))

def greenprint(text, fontsize = 3):
    from IPython.display import HTML as html_print
    return display(html_print("<b><font size={}><text style=color:{}>{}</text></font></b>".format(fontsize, '#00b648', text)))

In [None]:
def highlight(text, highlight_color='yellow', 
                text_color='black', fontsize=2):
    from IPython.display import HTML as html_print
    return display(html_print("<span style='background-color:{}; padding: 0.5em 0.5em;'><b><font size={}><text style=color:{}>{}</text></font></b></span>".format(highlight_color, fontsize, text_color, text)))

In [None]:
def display_rows(display_list, num_cols, title):
    print("........" * num_cols)
    highlight(f"{title.upper()}:", 'cyan', 'black', 3)
    current = 0
    length = len(display_list)
    num_rows = round(length / num_cols)
    for i in range(num_rows + 1):
        print('\t'.join([str(x) for x in  list(display_list[current:current+num_cols])]))
        current = current + num_cols
    print("........" * num_cols)
    print('')

In [None]:
def rows_cols(df, df_name):
    df_rows = df.shape[0]
    df_cols = df.shape[1]
    highlight(f'📊 "{df_name}" has {df_rows:,} rows and {df_cols} columns.\n', 
             'yellow', 'black', 3)

In [None]:
def print_date_range(df, data_name = None):
    start_year, start_month, start_day = (df.index.min().year, 
                                          df.index.min().month_name(), 
                                          df.index.min().day)

    end_year, end_month, end_day = (df.index.max().year, 
                                    df.index.max().month_name(), 
                                    df.index.max().day)
    
    if data_name != None:
        highlight(f'⏰ "{data_name}" time range: {start_month} {start_day}, {start_year} to {end_month} {end_day}, {end_year}', 'yellow', 'black', 3)
    else:
       highlight(f"⏰ Time range: {start_month} {start_day}, {start_year} to {end_month} {end_day}, {end_year}", 'yellow', 'black', 3)

In [None]:
def get_df_range(df, start, end):
    return df.loc[start : end]

In [None]:
def annualized_data(returns, title = None):
    annualized = returns.agg(['mean', 'std']).T
    annualized['return'] = annualized["mean"] * 250
    annualized['risk'] = annualized['std'] * np.sqrt(250)
    annualized.drop(columns = ['mean', 'std'], inplace = True)
    if title != None:
        highlight(f"💰 Annualized Return and Risk: {title}", 
                          "lime", "black",fontsize=3)
    else:
        highlight(f"💰 Annualized Return and Risk", 
                          "lime", "black",fontsize=3)
    display(annualized)
    return annualized

In [None]:
def line_graph(data, start=None, end=None, close_column="close",
               title=None, xlabel=None, ylabel=None):
    
    import matplotlib as mpl
    import matplotlib.pyplot as plt
    import random
    colors = ['tomato', 'yellow', 'peachpuff', 'lime', 'turquoise',
          'aqua', 'deepskyblue', 'dodgerblue', 'cornflowerblue', 'lavender',
          'violet', 'fuchsia', 'deeppink']
    
    if (start != None) & (end != None):
        data = data.loc[start : end, close_column].to_frame()
    else:
        data = data
    
    mpl.rcParams['font.family'] = 'monospace'
    fig, ax = plt.subplots(facecolor = OUTTER_BACK, figsize = (13, 7))
    plt.style.use("ggplot");
    ax.plot(data, color = random.choice(colors))
    ax.set_facecolor(INNER_BACK)
    ax.grid(color=FOREGROUND, linestyle=':', linewidth=0.65, alpha = 1)

    plt.tick_params(labelrotation = 40);
    plt.title(title, fontsize = 23, pad = 20, color="white");
    plt.ylabel(ylabel, fontsize = 18, color=FOREGROUND);
    plt.xlabel(xlabel, fontsize = 18, color=FOREGROUND);
    plt.xticks(fontsize=12, color="white")
    plt.yticks(fontsize=12, color="white")
    
    print_date_range(data)
    print("")

In [None]:
def plot_threshold_strategies(df, start_threshold=0.00, return_column='dj_return', 
                  close_column='dj_close', increase_increment=0.01, 
                  iterations=10, start = None, end = None):
    import warnings
    warnings.filterwarnings("ignore")
    
    if (start != None) & (end != None):
        df = df.loc[start : end, close_column].to_frame()
    else:
        df = df
    
    return_trigger = start_threshold
    results_log = []
    
    for iteration in range(iterations):
        df["position"] = np.where(df[return_column] > return_trigger, -1, 1)
        df["strategy_return"] = df["position"].shift() * df[return_column]
        df['strategy'] = (df.strategy_return.add(1,
                                                    fill_value = 0).cumprod() * \
                                                    df.iloc[0, 0])
        current_data = (return_trigger, df[['dj_close', 'strategy']])
        results_log.append(current_data)
        return_trigger += increase_increment
    
    rows = round(len(results_log) / 2)
    fig, axs = plt.subplots(nrows=rows, ncols=2, 
                    figsize = (13, rows*4), facecolor=OUTTER_BACK, 
                       constrained_layout=True)
    
    print_date_range(df)
    print("")
    plt.style.use("ggplot");
    plt.suptitle("Simple Strategy Selling Threshold", color = "white", size = 24)
    for idx, i in enumerate(results_log):
        return_trigger = results_log[idx][0]
        close = results_log[idx][1][close_column]
        returns = results_log[idx][1]['strategy']
        ax = plt.axes();
        ax.remove()
        plt.subplot(rows, 2, idx+1, facecolor = INNER_BACK)
        close.plot(color = 'cyan')
        returns.plot(color = 'yellow')
        plt.grid(color=FOREGROUND, linestyle=':', linewidth=0.65, alpha = 1)
        plt.legend(loc = 2, facecolor = "#555555", labelcolor = "white")
        plt.yticks(color = FOREGROUND)
        plt.xticks(color = FOREGROUND)
        plt.xlabel("years", color="white", size = 10)
        plt.ylabel('')
        plt.title(f"Strategy: {(return_trigger*100):.0f}% Sell Threshold", 
                  color = "white", size = 15, pad = 5)
        
    return results_log    

In [None]:
def try_strategy(df, return_column="dj_return", close_column="dj_close", 
                 strategy = 'momentum', threshold = None, start = None,
                 end = None):
    
    if (start != None) & (end != None):
        df = df.loc[start : end, close_column].to_frame()
    else:
        df = df
        
    if strategy == 'momentum':
        df['position'] = np.sign(df[return_column])
    elif strategy == 'contrarian':
        df["position"] = -np.sign(df[return_column])
    else:
        df["position"] = np.where(df[return_column] > threshold, -1, 1)    
        
    df["strategy_return"] = df["position"].shift() * df['dj_return']
    df['strategy'] = df.strategy_return.add(1, 
                                            fill_value = 0).cumprod() * df.iloc[0,0]
    plt.figure(figsize = (13, 7), facecolor = OUTTER_BACK);
    ax = plt.axes();
    ax.set_facecolor(INNER_BACK)
    ax.grid(color=FOREGROUND, linestyle=':', linewidth=0.75, alpha = 0.75)
    
    df[close_column].plot(color = 'deeppink')
    df['strategy'].plot(color = 'yellow')
    plt.legend(loc = 2, facecolor = "white", labelcolor = "black")
    plt.yticks(color = FOREGROUND, size = 10)
    plt.xticks(color = FOREGROUND, size = 10)
    plt.xlabel("years", color="white", size = 14)
    plt.ylabel('')
    if threshold != None:
        plt.title(f"Strategy: {(threshold*100):.0f}% Sell Threshold", 
                  color = "white", size = 22, pad = 20,)
    else:
         plt.title(f"Strategy: {strategy.capitalize()}", 
                  color = "white", size = 22, pad = 20,)
            
    
    print_date_range(df)
    print("") 
    
    annualized = annualized_data(df[[return_column, 'strategy_return']])
       
    return df, annualized       


In [None]:
def rolling_average(df, close_column='dj_close', window=50,
                   start=None, end=None, legend_loc = 2):
    
    if (start != None) & (end != None):
        df = df.loc[start : end, close_column].to_frame()
    else:
        df = df
                    
    rolling_col = "SMA" + (str(window))
    df[rolling_col] = df[close_column].rolling(window = window).mean()
    close = df[close_column]
    rolling = df[rolling_col]
               
    fig, ax = plt.subplots(facecolor = OUTTER_BACK, figsize = (13, 7))
    plt.style.use("ggplot");
    ax.set_facecolor(INNER_BACK)
    ax.grid(color=FOREGROUND, linestyle=':', linewidth=0.75, alpha = 0.75)

    plt.tick_params(labelrotation = 40);
    plt.title(f"Rolling Averge: {window} day window", fontsize = 21, pad = 20, color="white");
    plt.ylabel('', fontsize = 18, color=FOREGROUND);
    plt.xlabel('', fontsize = 18, color=FOREGROUND);
    plt.xticks(fontsize=12, color=FOREGROUND)
    plt.yticks(fontsize=12, color=FOREGROUND)
    close.plot(color='deeppink', linewidth=5, alpha = 0.7)
    rolling.plot(color='cyan')              
    plt.legend(fontsize = 13, facecolor = INNER_BACK, 
               labelcolor = FOREGROUND, loc = legend_loc)
    
    print_date_range(df)
    print("") 

In [None]:
def multiple_rolling_averages(df, close_column='dj_close', 
                              window_list=[50], start=None,
                              end=None, legend_loc = 2):
    import random
    
    colors = ['tomato', 'yellow', 'peachpuff', 'lime', 'turquoise',
      'aqua', 'deepskyblue', 'dodgerblue', 'cornflowerblue', 'lavender',
      'violet', 'fuchsia', 'deeppink']
    
    color_list = random.sample(colors, len(window_list))
    
    if (start != None) & (end != None):
        df = df.loc[start : end, close_column].to_frame()
    else:
        df = df
        
    close = df[close_column]
    window_log = []
    
    for i in window_list:
        rolling_col = "SMA" + (str(i))
        rolling = df[close_column].rolling(window = i).mean()
        current_data = (rolling_col, rolling)
        window_log.append(current_data)
               
    fig, ax = plt.subplots(facecolor = OUTTER_BACK, figsize = (13, 7))
    plt.style.use("ggplot");
    ax.set_facecolor(INNER_BACK)
    ax.grid(color=FOREGROUND, linestyle=':', linewidth=0.75, alpha = 0.75)
    plt.tick_params(labelrotation = 40);
    
    if len(window_list) < 5:
        title_string = ', '.join(str(i) for i in window_list)
        plt.title(f"Rolling Averges Compared: {title_string} days", fontsize = 20, pad = 20, color="white");
    else:
        plt.title(f"Rolling Averges Compared", fontsize = 22, pad = 20, color="white");
        
    plt.ylabel('', fontsize = 18, color=FOREGROUND);
    plt.xlabel('', fontsize = 18, color=FOREGROUND);
    plt.xticks(fontsize=12, color=FOREGROUND)
    plt.yticks(fontsize=12, color=FOREGROUND)    
    labels = [x[0] for x in window_log]
    
    for idx, i in enumerate(window_log):
        ax.plot(i[1], color = color_list[idx], label = i[0])
    
    plt.legend(loc = legend_loc, fontsize = 13, 
               facecolor = INNER_BACK, labelcolor = FOREGROUND)
    
    print_date_range(df)
    print("") 

In [None]:
def fancy_plot(data, kind = "line", title = None, legend_loc = 'upper right', 
               start=None, end=None, xlabel=None, ylabel=None, logy=False,
               cmap = 'viridis', label_rot = None):
    
    import random 
    import matplotlib as mpl
    import matplotlib.pyplot as plt
    
    if (start != None) & (end != None):
        data = data.loc[start : end]
    else:
        data = data
        
    mpl.rcParams['xtick.color'] = OUTTER_BACK
    mpl.rcParams['ytick.color'] = OUTTER_BACK
    mpl.rcParams['font.family'] = 'monospace'
    fig = plt.subplots(facecolor = OUTTER_BACK, figsize = (13, 7))
    ax = plt.axes();
    if kind == 'line':
        data.plot(kind='line', ax = ax, rot = label_rot, cmap = cmap)
    else:
        data.plot(kind = kind, ax = ax, rot = label_rot, cmap = cmap);
    plt.style.use("ggplot");
    ax.set_facecolor(INNER_BACK)
    ax.grid(color=FOREGROUND, linestyle=':', linewidth=0.75, alpha = 0.75)
    plt.tick_params(labelrotation = 40);
    plt.title(title, fontsize = 23, pad = 20, color="white");
    plt.ylabel(ylabel, fontsize = 18, color=FOREGROUND);
    plt.xlabel(xlabel, fontsize = 18, color=FOREGROUND);
    plt.xticks(fontsize=10, color=FOREGROUND)
    plt.yticks(fontsize=10, color=FOREGROUND)
    plt.legend(labels = data.columns, fontsize = 10, loc = legend_loc,
               facecolor = INNER_BACK, labelcolor = FOREGROUND)

In [None]:
def correlation_heatmap(df, cmap, annot = True, fmt = '.1g',
                        vmin = 0.2, vmax = 0.8,
                       title = None, ):
    import seaborn as sns
    correlation_map = df.corr()
    plt.figure(figsize = (13,9), facecolor = OUTTER_BACK)

    sns.heatmap(correlation_map, cmap = cmap, annot = annot, vmin = vmin,
               vmax = vmax, linewidth=0.25, linecolor = "white", fmt = fmt)                
    plt.title(title, fontsize = 23, pad = 20, color="white");
    plt.xticks(fontsize=11, color="white")
    plt.yticks(fontsize=11, color="white")

In [None]:
def portfolio_returns(weights, portfolio):
    return portfolio.dot(weights)


def tracking_error(weights, portfolio, index_data):
    result =  portfolio_returns(weights, portfolio).sub(index_data).std() * np.sqrt(250)
    colorprint(f'The annualized tracking error for the portfolio is {(result*100):.2f}%', fontsize = 3)
    return result
 
    
def tracking_error_general(data, portfolio_list, weights, 
                           index, start, end, title = None,
                           printout=True):
    
    results = data.loc[start:end, portfolio_list].dot(weights).\
                sub(data.loc[start:end, index]).std() * np.sqrt(250)
    if printout is True:
        highlight(title, 'yellow', 'black', 2)
        highlight(f'Period: {start} to {end}', 'yellow', 'black', 2)
        highlight(f'Overall tracking error: {(results *100): .2f}%', 'yellow', 'black', 2)
    else:
        return results

In [None]:
def visualize_tracking(data, portfolio_list, weights, index, start, end, 
              title=None, xlabel=None, ylabel=None, legend_loc=1, 
              cmap = 'rainbow', legend_labels = None, color1='red',
              color2 = 'cyan'):
    track = data.loc[start:end, portfolio_list].dot(weights).add(1).cumprod().mul(100)
    track.name = 'Portfolio'
    index = data.loc[start:end, index].add(1).cumprod().mul(100)
    mpl.rcParams['xtick.color'] = OUTTER_BACK
    mpl.rcParams['ytick.color'] = OUTTER_BACK
    mpl.rcParams['font.family'] = 'monospace'
    fig = plt.subplots(facecolor = OUTTER_BACK, figsize = (13, 7))
    ax = plt.axes();
    track.plot(ax = ax, color = color1)
    index.plot(ax = ax, color = color2)
    plt.legend(fontsize = 15, loc = 2)
    plt.title(title, fontsize = 22, pad = 20)
    plt.style.use("ggplot");
    ax.set_facecolor(INNER_BACK)
    ax.grid(color=FOREGROUND, linestyle=':', linewidth=0.75, alpha = 0.75)
    plt.tick_params(labelrotation = 40);
    plt.title(title, fontsize = 23, pad = 20, color="white");
    plt.ylabel(ylabel, fontsize = 18, color=FOREGROUND);
    plt.xlabel(xlabel, fontsize = 18, color=FOREGROUND);
    plt.xticks(fontsize=10, color=FOREGROUND)
    plt.yticks(fontsize=10, color=FOREGROUND)
    plt.legend(labels = legend_labels, fontsize = 11, loc = legend_loc,
               facecolor = INNER_BACK, labelcolor = FOREGROUND)
    plt.show()   

In [None]:
def random_portfolios(num_portfolios, total_assets,
                      returns_df, stocks_list, seed = 123):
    
    np.random.seed(seed)
    random_error_collection = np.empty(25000)
    random_stocks = np.random.choice(stocks_list, size = total_assets, replace = False)
    for i in range(25000):
        random_numbers = np.random.random(total_assets)
        random_weights = random_numbers / random_numbers.sum()
        random_stock = np.random.choice(stocks_list, size = total_assets, replace = False)

        random_error_collection[i] = \
                                    tracking_error_general(returns_df,
                                                             random_stocks,
                                                             random_weights,
                                                             'DJI', '2020', 
                                                             '2022', 
                                                              printout = False)
    return random_error_collection

In [None]:
def distro_histo(data, num_bins, color='cyan', edgecolor='black',
                title=None, xlabel=None, ylabel=None, legend_loc=None,
                legend_labels=None):
    plt.figure(figsize = (12, 8), facecolor = OUTTER_BACK)
    ax = plt.axes();
    ax.set_facecolor(INNER_BACK)
    plt.hist(data, bins = num_bins, edgecolor = edgecolor, 
             linewidth = 0.5, color = "cyan")
    plt.title(title, fontsize = 22, pad = 20)
    plt.style.use("ggplot");
    ax.grid(color=FOREGROUND, linestyle=':', linewidth=0.75, alpha = 0.75)
    plt.tick_params(labelrotation = 40);
    plt.title(title, fontsize = 21, pad = 20, color="white");
    plt.ylabel(ylabel, fontsize = 18, color=FOREGROUND);
    plt.xlabel(xlabel, fontsize = 18, color=FOREGROUND);
    plt.xticks(fontsize=10, color=FOREGROUND)
    plt.yticks(fontsize=10, color=FOREGROUND)
    if legend_labels != None:
        plt.legend(labels = legend_labels, fontsize = 13, loc = legend_loc,
                   facecolor = INNER_BACK, labelcolor = FOREGROUND)
    plt.show()


def compare_random_optimized(random_error_data, optimized_tracking_data, num_portfolios):
    # Retrieving the results of random portfolios and comparing to optimized 
    # weight strategy
    highlight(f'The optimized strategy outperformed {num_portfolios:,.0f} \
    random portfolios and weights {((1 - (random_error_data < optimized_tracking_data).mean()) \
                                    * 100):.2f}% of the time.', fontsize = 3)
    highlight(f'{num_portfolios:,.0f} random portfolios & weights tracking error: \
    {(random_error_data.mean() * 100): .2f}%', fontsize = 3)
    highlight(f'Optimized portfolio & weights tracking error: {(optimized_tracking_data \
                                                                * 100):.2f}%', fontsize = 3)
    print("")

    # Plotting the distribution of tracking error across the random portfolios
    distro_histo(random_error_data, 100, "cyan", "black",
                    title = f'Tracking Errors Across {num_portfolios:,.0f} \
                    Random Portfolios',
                    xlabel = "tracking errors across random portfolios",
                    ylabel = "count for tracking errors")