In [2]:
import requests
import zipfile
import io
import pandas as pd

# URL for the zipped CSV file (Fama/French 5 Factors (2x3) [Daily])
url = 'https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_5_Factors_2x3_daily_CSV.zip'

# Download the zip file
response = requests.get(url)
response.raise_for_status()  # ensure we notice bad responses

# Use BytesIO to handle the downloaded content as a file-like object
with zipfile.ZipFile(io.BytesIO(response.content)) as z:
    # List files in the zip archive (typically there's one CSV file)
    file_list = z.namelist()
    print("Files in zip:", file_list)
    
    # Open the first (or appropriate) CSV file from the archive
    with z.open(file_list[0]) as f:
        # Often the file may contain header rows that you might need to skip.
        # Adjust 'skiprows' based on the file's structure.
        df = pd.read_csv(f, skiprows=3)

# Display the first few rows of the DataFrame
print(df.head())


Files in zip: ['F-F_Research_Data_5_Factors_2x3_daily.CSV']
   Unnamed: 0  Mkt-RF   SMB   HML   RMW   CMA     RF
0    19630701   -0.67  0.02 -0.35  0.03  0.13  0.012
1    19630702    0.79 -0.28  0.28 -0.08 -0.21  0.012
2    19630703    0.63 -0.18 -0.10  0.13 -0.25  0.012
3    19630705    0.40  0.09 -0.28  0.07 -0.30  0.012
4    19630708   -0.63  0.07 -0.20 -0.27  0.06  0.012


In [3]:
# Set index to properly formatted pd.PeriodIndex
df.index = pd.PeriodIndex(pd.to_datetime(df[df.columns[0]], format='%Y%m%d'), freq='D', name='Day')
df = df.drop(columns=df.columns[0])
df = df.sort_index()
df

Unnamed: 0_level_0,Mkt-RF,SMB,HML,RMW,CMA,RF
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1963-07-01,-0.67,0.02,-0.35,0.03,0.13,0.012
1963-07-02,0.79,-0.28,0.28,-0.08,-0.21,0.012
1963-07-03,0.63,-0.18,-0.10,0.13,-0.25,0.012
1963-07-05,0.40,0.09,-0.28,0.07,-0.30,0.012
1963-07-08,-0.63,0.07,-0.20,-0.27,0.06,0.012
...,...,...,...,...,...,...
2024-12-24,1.11,-0.12,-0.05,-0.13,-0.37,0.017
2024-12-26,0.02,1.09,-0.19,-0.44,0.35,0.017
2024-12-27,-1.17,-0.44,0.56,0.41,0.03,0.017
2024-12-30,-1.09,0.24,0.74,0.55,0.14,0.017


In [9]:
import plotly.graph_objects as go
import ipywidgets as widgets
from IPython.display import display

In [11]:
# Create two text input boxes for the start and end dates.
# The expected format is like "2020-01" which matches the PeriodIndex.
start_date_input = widgets.Text(
    value=str(df.index.min()),
    description='Start date',
    placeholder='YYYY-MM-DD'
)
end_date_input = widgets.Text(
    value=str(df.index.max()),
    description='End date',
    placeholder='YYYY-MM-DD'
)

# Compute the initial slice and the computed cumulative product.
initial_slice = df[start_date_input.value:end_date_input.value]
computed_df = (1 + initial_slice / 100).cumprod()

# Create a Plotly FigureWidget for interactive visualization.
# Convert the PeriodIndex to string for proper display on the x-axis.
# fig = go.FigureWidget(
#     data=[go.Scatter(x=computed_df.index.astype(str), y=computed_df['value'],
#                        mode='lines', name='Cumulative Product')],
#     layout=go.Layout(
#         title=f'Cumulative Product from {start_date_input.value} to {end_date_input.value}',
#         xaxis_title='Period',
#         yaxis_title='Cumulative Product'
#     )
# )
fig = go.FigureWidget(
    data=[
        go.Scatter(
            x=computed_df.index.astype(str),
            y=computed_df[col],
            mode='lines',
            name=f'Cumulative Product {col}'
        )
        for col in computed_df.columns
    ],
    layout=go.Layout(
        title=f'Cumulative Product from {start_date_input.value} to {end_date_input.value}',
        xaxis_title='Period',
        yaxis_title='Cumulative Product'
    )
)


# Optional: Create an output widget to display error messages if needed.
output = widgets.Output()

def update_visualization(change):
    with output:
        output.clear_output()  # Clear previous messages
        start_date = start_date_input.value
        end_date = end_date_input.value
        try:
            # Slice the DataFrame using the PeriodIndex.
            sliced_df = df[start_date:end_date]
            # Compute the cumulative product for each column: (1 + column/100).cumprod()
            computed_df = (1 + sliced_df / 100).cumprod()
            # Update the Plotly figure with new x and y values for each column.
            with fig.batch_update():
                # Update each trace corresponding to each column in computed_df.
                for trace, col in zip(fig.data, computed_df.columns):
                    trace.x = computed_df.index.astype(str)
                    trace.y = computed_df[col]
                    trace.name = f'Cumulative Product {col}'
                # Update the title to reflect the new date range.
                fig.layout.title = f'Cumulative Product from {start_date} to {end_date}'
        except Exception as e:
            print("Error updating visualization:", e)

# Link the input boxes to the update function.
start_date_input.observe(update_visualization, names='value')
end_date_input.observe(update_visualization, names='value')

# Display the input boxes, the visualization, and the output widget.
display(start_date_input, end_date_input, fig, output)

# Trigger an initial update to display the default visualization.
update_visualization(None)

Text(value='1963-07-01', description='Start date', placeholder='YYYY-MM-DD')

Text(value='2024-12-31', description='End date', placeholder='YYYY-MM-DD')

FigureWidget({
    'data': [{'mode': 'lines',
              'name': 'Cumulative Product Mkt-RF',
              'type': 'scatter',
              'uid': '5f2a32ac-865d-45d6-b9f0-60668e72bac4',
              'x': array(['1963-07-01', '1963-07-02', '1963-07-03', ..., '2024-12-27',
                          '2024-12-30', '2024-12-31'], shape=(15481,), dtype=object),
              'y': {'bdata': ('07zjFB3J7z/4LULKsgTwP4W7fWWIHv' ... '7Ngn9BQFTS5CyvTkFAndP/mU06QUA='),
                    'dtype': 'f8'}},
             {'mode': 'lines',
              'name': 'Cumulative Product SMB',
              'type': 'scatter',
              'uid': '6b652925-37f3-489b-a49b-d50eb134271b',
              'x': array(['1963-07-01', '1963-07-02', '1963-07-03', ..., '2024-12-27',
                          '2024-12-30', '2024-12-31'], shape=(15481,), dtype=object),
              'y': {'bdata': ('4lgXt9EA8D+qKPs6survP/2/byz92+' ... 'X3SroAQOthUgKSxABAUyTIl+DRAEA='),
                    'dtype': 'f8'}},
            

Output()