In [17]:
%load_ext autoreload
%autoreload 2

import pandas as pd
from ipywidgets import IntSlider, HBox, interactive_output
import ipywidgets as widgets
import os
import matplotlib.pyplot as plt
from IPython.display import display
from scipy.stats import skew, kurtosis
import numpy as np
from pandas.tseries.holiday import USFederalHolidayCalendar
import seaborn as sns
from functools import reduce

# from utils import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Initial Visualizations

In [2]:
# Load data
data_path = os.path.dirname(os.getcwd()) + '/data'
data_file = data_path + '/fpi_raw_data.xlsx'
data = pd.read_excel(data_file, sheet_name='Universe of broad assets', index_col=0, parse_dates=True)

In [3]:
def plot_histogram(series, bin_width, start_date, end_date):
    # Filter data by selected date range and series
    series_data = data[series]
    if start_date and end_date:
        filtered_data = series_data[(series_data.index >= pd.to_datetime(start_date)) & 
                                    (series_data.index <= pd.to_datetime(end_date))]
    else:
        filtered_data = series_data
    
    # Plot histogram
    plt.figure(figsize=(10, 6))
    plt.hist(filtered_data.dropna(), bins=bin_width, edgecolor='black')
    plt.title(f'Histogram of {series}')
    plt.xlabel('Value')
    plt.ylabel('Frequency')
    plt.show()

def summary_table(series, start_date, end_date):
    # Filter data by selected date range and series
    series_data = data[series]
    if start_date and end_date:
        filtered_data = series_data[(series_data.index >= pd.to_datetime(start_date)) & 
                                    (series_data.index <= pd.to_datetime(end_date))]
    else:
        filtered_data = series_data
        
    # Display summary statistics
    stats = {
        'Count': filtered_data.count(),
        'Mean': filtered_data.mean(),
        'Standard Deviation': filtered_data.std(),
        'Skewness': skew(filtered_data.dropna()),
        'Kurtosis': kurtosis(filtered_data.dropna()),
        'Minimum': filtered_data.min(),
        'Maximum': filtered_data.max()
    }
    
    # Print stats in a table format
    stats_df = pd.DataFrame(stats, index=[0]).T.rename(columns={0: 'Value'})
    display(stats_df)

In [4]:
# Widget controls
series_selector = widgets.Dropdown(options=data.columns, description='Series')
bin_slider = widgets.IntSlider(value=50, min=10, max=100, description='Bin Count')
start_date_picker = widgets.DatePicker(description='Start Date')
end_date_picker = widgets.DatePicker(description='End Date')

# Display widgets
controls = widgets.HBox([series_selector, bin_slider, start_date_picker, end_date_picker])

# Interactive plot output
out1 = widgets.interactive_output(plot_histogram, {
    'series': series_selector,
    'bin_width': bin_slider,
    'start_date': start_date_picker,
    'end_date': end_date_picker
})

out2 = widgets.interactive_output(summary_table, {
    'series': series_selector,
    'start_date': start_date_picker,
    'end_date': end_date_picker
})

ui = widgets.VBox([controls, out1])
widgets.HBox([ui, out2])


HBox(children=(VBox(children=(HBox(children=(Dropdown(description='Series', options=('Asset 1', 'Asset 2', 'As…

# Aggregating Returns up to Weekly

In [5]:
broad_assets = data.copy()
broad_assets.head()

Unnamed: 0_level_0,Asset 1,Asset 2,Asset 3,Asset 4,Asset 5,Asset 6,Asset 7,Asset 8,Asset 9,Asset 10,Asset 11
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2000-07-31,0.015154,0.075677,-0.002266,0.000897,0.042162,0.004952,0.037313,0.002093,-0.002477,-0.008385,-0.031787
2000-08-01,0.009898,-0.04914,0.007874,0.006272,-0.011478,0.040433,-0.029227,0.010693,0.009594,0.000455,0.014613
2000-08-02,0.000718,-0.017754,0.00153,0.003732,0.009817,-0.009512,-0.000527,0.000973,0.011188,0.001176,0.018693
2000-08-03,0.019197,0.076083,0.007209,0.003099,-0.003059,-0.020954,-0.030686,-0.009466,0.006064,-0.026208,0.007214
2000-08-04,0.014248,-0.002954,0.006556,0.006231,0.016388,-0.024891,0.012934,0.004234,-0.001179,-0.003552,0.032614


In [6]:
broad_assets.index = pd.to_datetime(broad_assets.index)
broad_assets['weekday'] = broad_assets.index.day_name()
broad_assets.head() #['2000-08-27':'2000-09-10']

Unnamed: 0_level_0,Asset 1,Asset 2,Asset 3,Asset 4,Asset 5,Asset 6,Asset 7,Asset 8,Asset 9,Asset 10,Asset 11,weekday
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2000-07-31,0.015154,0.075677,-0.002266,0.000897,0.042162,0.004952,0.037313,0.002093,-0.002477,-0.008385,-0.031787,Monday
2000-08-01,0.009898,-0.04914,0.007874,0.006272,-0.011478,0.040433,-0.029227,0.010693,0.009594,0.000455,0.014613,Tuesday
2000-08-02,0.000718,-0.017754,0.00153,0.003732,0.009817,-0.009512,-0.000527,0.000973,0.011188,0.001176,0.018693,Wednesday
2000-08-03,0.019197,0.076083,0.007209,0.003099,-0.003059,-0.020954,-0.030686,-0.009466,0.006064,-0.026208,0.007214,Thursday
2000-08-04,0.014248,-0.002954,0.006556,0.006231,0.016388,-0.024891,0.012934,0.004234,-0.001179,-0.003552,0.032614,Friday


In [7]:
broad_assets['trading_day'] = np.where((broad_assets.weekday == 'Tuesday'), 1, 0)   # Start of return calc periods are typically Tuesday
broad_assets['trading_day'] = np.where((broad_assets.weekday == 'Wednesday') & 
                                       (broad_assets.weekday.shift(1) == 'Monday'), 
                                       1, broad_assets.trading_day) # This filter sets Wednesday as the first trading day when Tuesday is a holiday
broad_assets['trading_day'] = np.where((broad_assets.weekday == 'Wednesday') & 
                                       (broad_assets.weekday.shift(1) == 'Friday'), 
                                       1, broad_assets.trading_day) # There are a few cases where markets were shut down both Monday and Tuesday
# The 9/11 Terrorist attacks were on a Tuesday and shut down markets for a week. Markets re-opened on 9/17, so assuming we place trades 
# that day and calculate returns through the following EOD Monday. This is the longest holding window in the dataset.
# TODO: Question, should we just drop 9/17/2001 from the dataset?
broad_assets['trading_day'] = np.where(broad_assets.index == '2001-09-17', 1, broad_assets.trading_day)
broad_assets['trading_day'] = np.where(broad_assets.index == '2001-09-18', 0, broad_assets.trading_day)
broad_assets['week'] = broad_assets.trading_day.cumsum()
broad_assets['2000-08-27':'2000-09-12']

Unnamed: 0_level_0,Asset 1,Asset 2,Asset 3,Asset 4,Asset 5,Asset 6,Asset 7,Asset 8,Asset 9,Asset 10,Asset 11,weekday,trading_day,week
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2000-08-28,0.009907,0.011431,-0.011551,-0.006292,0.00484,-0.004696,0.017255,-0.007905,0.003144,-0.000994,0.030595,Monday,0,4
2000-08-29,-0.005726,-0.001428,-0.005991,-0.004429,0.011857,-0.016923,-0.028685,0.004255,0.013718,-0.005372,0.007735,Tuesday,1,5
2000-08-30,-0.009353,0.00824,0.002775,0.000891,0.010276,0.023121,0.003055,-0.012985,-0.00097,0.00522,0.033015,Wednesday,0,5
2000-08-31,0.019846,0.054594,0.014681,0.011561,0.020727,-0.010972,0.013995,-0.001245,0.008997,0.026358,-0.005365,Thursday,0,5
2000-09-01,0.00385,0.010384,0.003376,0.007755,0.014758,0.002049,0.042495,0.000655,-0.021759,-0.008183,-0.003105,Friday,0,5
2000-09-05,-0.018275,-0.055039,0.000885,0.000264,-0.011136,0.010078,-0.017364,-0.004304,0.019478,-0.008936,0.024536,Tuesday,1,6
2000-09-06,-0.019759,-0.075212,-0.009674,-0.005023,-0.009874,0.004712,-0.039244,-0.011284,0.030516,-0.008248,0.017216,Wednesday,0,6
2000-09-07,0.013537,0.060054,-0.004105,-0.003164,0.023932,0.013575,0.017056,-0.018844,-0.007099,-0.005001,0.013816,Thursday,0,6
2000-09-08,-0.010919,-0.07105,0.005894,0.003386,-0.029024,0.031138,-0.042704,-0.007504,0.012403,-0.005743,-0.048884,Friday,0,6
2000-09-11,-0.007251,-0.056224,-0.007845,-0.004096,-0.005023,0.014795,-0.013164,-0.023644,0.018496,0.002519,0.051176,Monday,0,6


In [8]:
broad_assets.week.value_counts()    # The one long return period is 9/11

week
59     6
629    5
705    5
712    5
711    5
      ..
700    4
699    4
695    4
682    4
0      1
Name: count, Length: 1258, dtype: int64

In [9]:
# Showing how 9/11 is being handled
broad_assets.loc[(broad_assets.week == 59) | (broad_assets.week == 58)]

Unnamed: 0_level_0,Asset 1,Asset 2,Asset 3,Asset 4,Asset 5,Asset 6,Asset 7,Asset 8,Asset 9,Asset 10,Asset 11,weekday,trading_day,week
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2001-09-04,-0.001256,-0.062164,-0.030723,-0.018517,-0.006977,-0.004223,-0.054898,0.011222,0.039585,-0.020181,-0.015338,Tuesday,1,58
2001-09-05,-0.00185,-0.012526,0.003154,0.00308,-0.019005,-0.005365,-0.01026,-0.01412,-0.001585,0.002805,0.0011,Wednesday,0,58
2001-09-06,-0.044849,-0.075897,0.017594,0.014829,-0.039374,-0.025737,-0.03064,-0.02592,-0.010916,0.007947,0.0104,Thursday,0,58
2001-09-07,-0.037398,-0.011037,0.010878,0.008597,-0.036303,-0.018334,-0.066198,-0.007238,-0.016389,-0.000138,0.013082,Friday,0,58
2001-09-10,0.012343,0.016289,-0.011736,-0.003933,-0.020093,-0.00772,0.038647,-0.017213,0.009577,-0.010384,-0.010573,Monday,0,58
2001-09-17,-0.097882,-0.16519,0.013855,0.03999,-0.104084,-0.075047,-0.115303,-0.032963,-0.042481,0.130143,-0.011503,Monday,1,59
2001-09-18,-0.011715,-0.044917,-0.032494,-0.010723,-0.028863,0.01903,-0.030498,0.010262,-0.003618,-0.011864,-0.039678,Tuesday,0,59
2001-09-19,-0.032236,-0.02691,-0.001095,0.002691,-0.041062,-0.023215,-0.01817,0.01481,-0.004693,0.019003,-0.01423,Wednesday,0,59
2001-09-20,-0.062195,-0.06956,-0.017388,-0.007711,-0.07712,-0.050275,-0.087383,-0.056703,0.005272,-0.01284,-0.009203,Thursday,0,59
2001-09-21,-0.038168,-0.067525,0.009839,0.007077,-0.04519,-0.033071,-0.080264,-0.053164,0.022525,0.019297,-0.017084,Friday,0,59


In [10]:
broad_assets.drop(columns=['weekday', 'trading_day'], inplace=True)
broad_assets

Unnamed: 0_level_0,Asset 1,Asset 2,Asset 3,Asset 4,Asset 5,Asset 6,Asset 7,Asset 8,Asset 9,Asset 10,Asset 11,week
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2000-07-31,0.015154,0.075677,-0.002266,0.000897,0.042162,0.004952,0.037313,0.002093,-0.002477,-0.008385,-0.031787,0
2000-08-01,0.009898,-0.049140,0.007874,0.006272,-0.011478,0.040433,-0.029227,0.010693,0.009594,0.000455,0.014613,1
2000-08-02,0.000718,-0.017754,0.001530,0.003732,0.009817,-0.009512,-0.000527,0.000973,0.011188,0.001176,0.018693,1
2000-08-03,0.019197,0.076083,0.007209,0.003099,-0.003059,-0.020954,-0.030686,-0.009466,0.006064,-0.026208,0.007214,1
2000-08-04,0.014248,-0.002954,0.006556,0.006231,0.016388,-0.024891,0.012934,0.004234,-0.001179,-0.003552,0.032614,1
...,...,...,...,...,...,...,...,...,...,...,...,...
2024-08-30,0.020258,0.025782,-0.014374,-0.004687,0.013532,0.019439,0.004269,0.003950,0.007001,-0.014495,-0.023913,1256
2024-09-03,-0.042311,-0.063077,0.022496,0.007809,-0.061795,0.001921,-0.038360,-0.039868,0.002796,-0.008582,-0.039190,1257
2024-09-04,-0.003333,-0.004047,0.024987,0.009675,-0.003905,0.004290,-0.005114,-0.000679,-0.009153,0.002046,-0.018557,1257
2024-09-05,-0.006169,0.000871,0.015276,0.003401,-0.012329,-0.005856,-0.005124,0.002617,-0.004517,0.016654,0.004423,1257


In [11]:
def comp_ret(series):
    return reduce(lambda x, y: (1 + x) * (1 + y) - 1, series)

grouped = broad_assets.groupby(['week']).agg(comp_ret)
grouped_dates = broad_assets.reset_index().loc[:, ['Date', 'week']]
grouped_dates['Date'] = grouped_dates['Date'].shift(1)
grouped_dates = grouped_dates.groupby(['week']).max()
weekly_returns = pd.merge(left=grouped_dates, right=grouped, right_index=True, left_index=True)
weekly_returns.dropna(inplace=True)

In [12]:
weekly_returns.to_excel(data_path + '/aggregated_returns.xlsx', sheet_name='broad_assets_weekly_returns')

### ROC & MACD Output

In [21]:
asset_columns = [col for col in weekly_returns.columns if col.startswith('Asset')]
roc_df = calculate_roc(weekly_returns, asset_columns, period=12)
macd_df = calculate_macd(weekly_returns, asset_columns)

roc_df.to_excel(data_path + '/broad_assets_roc_12m.xlsx', sheet_name='broad_assets_roc')
macd_df.to_excel(data_path + '/broad_assets_macd.xlsx', sheet_name='broad_assets_macd')

Unnamed: 0_level_0,ROC_Asset 1,ROC_Asset 2,ROC_Asset 3,ROC_Asset 4,ROC_Asset 5,ROC_Asset 6,ROC_Asset 7,ROC_Asset 8,ROC_Asset 9,ROC_Asset 10,ROC_Asset 11
week,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
12,-0.064685,-0.151054,0.069096,0.087260,-0.059901,-0.145106,-0.151251,-0.328336,0.173836,-0.066813,0.262075
13,-0.122176,-0.352268,0.035567,0.054987,-0.121135,-0.116239,-0.140700,-0.328864,0.142877,-0.083184,0.113209
14,-0.095107,-0.267369,-0.010228,0.033191,-0.060475,-0.129013,-0.112804,-0.295853,0.104139,-0.086322,0.070796
15,-0.203360,-0.490012,0.010544,0.051024,-0.167073,-0.129441,-0.154346,-0.357337,0.095689,-0.094052,0.055295
16,-0.229168,-0.541254,0.027738,0.059481,-0.220466,-0.079121,-0.178590,-0.375760,0.127116,-0.072784,0.033779
...,...,...,...,...,...,...,...,...,...,...,...
1253,0.001929,-0.032813,0.113344,0.070770,-0.055479,0.161844,-0.091630,-0.068392,-0.017687,0.019346,-0.131626
1254,0.105014,0.082740,0.137695,0.078034,0.076758,0.276994,0.004722,0.033111,-0.044353,0.129082,-0.133747
1255,0.116886,0.078618,0.129629,0.074088,0.137144,0.309085,0.031506,0.054505,-0.052817,0.125326,-0.054588
1256,0.097234,0.031748,0.112931,0.072248,0.169111,0.313138,0.053312,0.047592,-0.055034,0.153037,-0.100221
