In [1]:
# !pip install -U kaleido

In [2]:
import pandas as pd
import requests
from io import BytesIO
from datetime import datetime
from pandas.tseries.offsets import CustomBusinessDay
import holidays
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np
import plotly.io as pio
import re

In [3]:
us_bd = CustomBusinessDay(calendar=holidays.US())
look_back = 30

In [4]:
def hist_trace(days=look_back):
    
    data_frames = []
    
    business_dates = pd.date_range(end=datetime.now(), periods=days+1, freq=us_bd)
    
    for date in business_dates:
        date_str = date.strftime('%Y-%m-%d')
        url = f"https://cdn.finra.org/trace/treasury-aggregates/daily/ts-daily-aggregates-{date_str}.xlsx"
        
        try:
            response = requests.get(url)
            response.raise_for_status()  
            df = pd.read_excel(BytesIO(response.content), header=[3, 4])

            # Clean the DataFrame
            df = df.drop(columns=[('Total', 'VWAP')])
            tips_index = df[df[('Category', 'Unnamed: 0_level_1')] == 'TIPS'].index[0]
            df = df.loc[tips_index:].dropna(axis=0)
            df = df[df[('Category', 'Unnamed: 0_level_1')] != 'Total'].reset_index(drop=True)
            df['Date'] = date_str
            
            data_frames.append(df)
        
        except requests.exceptions.HTTPError:
            print(f"Data for {date_str} is not available. Skipping this date.")
            continue
    
    if not data_frames:
        print("No data available for the specified date range.")
        return None
    
    combined_df = pd.concat(data_frames).reset_index(drop=True)

    # for easier display
    combined_df[('Category', 'Unnamed: 0_level_1')] = combined_df[('Category', 'Unnamed: 0_level_1')].replace('> 5 years and <= 10 years', '> 5 and <= 10 years')

    return combined_df

In [5]:
def relabel(df):
    """
    concat row names with its higher level category
    """
    # Get the index positions of the higher categories (e.g., rows that are not "On-the-run" or "Off-the-run")
    higher_category_indices = df.index[~df[('Category', 'Unnamed: 0_level_1')].str.contains('On-the-run|Off-the-run', case=False, na=False)]

    # Iterate through each higher category index
    for i in higher_category_indices:
        higher_category_name = df.at[i, ('Category', 'Unnamed: 0_level_1')]
        j = i + 1
        while j < len(df) and ('On-the-run' in df.at[j, ('Category', 'Unnamed: 0_level_1')] or 'Off-the-run' in df.at[j, ('Category', 'Unnamed: 0_level_1')]):
            df.at[j, ('Category', 'Unnamed: 0_level_1')] = higher_category_name + ' ' + df.at[j, ('Category', 'Unnamed: 0_level_1')]
            j += 1

    return df


In [6]:
df_90 = relabel(hist_trace(days=90))
df_180 = relabel(hist_trace(days=180))

Data for 2024-05-27 is not available. Skipping this date.
Data for 2024-06-19 is not available. Skipping this date.
Data for 2024-07-04 is not available. Skipping this date.
Data for 2024-09-02 is not available. Skipping this date.
Data for 2024-09-19 is not available. Skipping this date.
Data for 2024-01-15 is not available. Skipping this date.
Data for 2024-02-19 is not available. Skipping this date.
Data for 2024-03-29 is not available. Skipping this date.
Data for 2024-05-27 is not available. Skipping this date.
Data for 2024-06-19 is not available. Skipping this date.
Data for 2024-07-04 is not available. Skipping this date.
Data for 2024-09-02 is not available. Skipping this date.
Data for 2024-09-19 is not available. Skipping this date.


In [7]:
def histogram_with_std(data, col_name, row, col, fig):
    mean = np.mean(data[col_name])
    std = np.std(data[col_name])

    latest_date = data[('Date', '')].max()
    latest_value = data[data[('Date', '')] == latest_date][col_name].values[0]

    counts, bin_edges = np.histogram(data[col_name], bins=50)
    bin_colors = ['blue'] * len(counts)

    # Find which bin the latest value falls into
    for i in range(len(bin_edges) - 1):
        if bin_edges[i] <= latest_value < bin_edges[i + 1]:
            bin_colors[i] = 'red'  
            break

    # Create the histogram trace
    hist = go.Bar(
        x=0.5 * (bin_edges[1:] + bin_edges[:-1]),  # Midpoints of bins for bar positions
        y=counts,
        marker_color=bin_colors,  # Assign custom colors
        name=f'{col_name[1]} Histogram'
    )

    # Add the histogram to the figure
    fig.add_trace(hist, row=row, col=col)

    # Extract the maximum y value from the histogram (height of the tallest bin)
    y_max = max(counts) + 10

    # Add lines for mean and standard deviation, scaled to y_max
    fig.add_shape(type="line", x0=mean, x1=mean, y0=0, y1=y_max, xref='x', yref='y',
                  line=dict(color="black", width=2, dash="dot"), row=row, col=col)
    fig.add_shape(type="line", x0=mean + std, x1=mean + std, y0=0, y1=y_max, xref='x', yref='y',
                  line=dict(color="green", width=2, dash="dot"), row=row, col=col)
    fig.add_shape(type="line", x0=mean - std, x1=mean - std, y0=0, y1=y_max, xref='x', yref='y',
                  line=dict(color="green", width=2, dash="dot"), row=row, col=col)

    # Add annotations for mean and standard deviation
    fig.add_annotation(x=mean, y=y_max + 1, yref="y", text=f"Mean:{mean:.0f}", showarrow=False, 
                       font=dict(color="black"), row=row, col=col)
    fig.add_annotation(x=mean + std, y=y_max + 1, yref="y", text=f"+1SD:{mean + std:.0f}", showarrow=False, 
                       font=dict(color="green"), row=row, col=col)
    fig.add_annotation(x=mean - std, y=y_max + 1, yref="y", text=f"-1SD:{mean - std:.0f}", showarrow=False, 
                       font=dict(color="green"), row=row, col=col)
    fig.update_annotations(font_size=10)

In [8]:
def plot_hist(df, days):
    tenors = df[('Category', 'Unnamed: 0_level_1')].unique()
    
    for tenor in tenors:
        df_tenor = df[df[('Category', 'Unnamed: 0_level_1')] == tenor]
        
        fig = make_subplots(
            rows=2, cols=3,
  
            horizontal_spacing=0.06,  # Reduce horizontal space between plots
            vertical_spacing=0.09      # Reduce vertical space between plots
        )
        
        # Define columns to iterate over for each row
        par_value_cols = [
            ('Total', 'Par Value'), 
            ('ATS & Interdealer', 'Par Value'), 
            ('Dealer to Customer', 'Par Value')
        ]
        trade_cols = [
            ('Total', 'Trades'), 
            ('ATS & Interdealer', 'Trades'), 
            ('Dealer to Customer', 'Trades')
        ]
        
        # Plot Par Values and Trades
        for i, col in enumerate(par_value_cols, 1):
            histogram_with_std(df_tenor[[col, ('Date', '')]], col_name=col, row=1, col=i, fig=fig)
        for i, col in enumerate(trade_cols, 1):
            histogram_with_std(df_tenor[[col, ('Date', '')]], col_name=col, row=2, col=i, fig=fig)
        
        fig.update_layout(title=f'{tenor} TRACE {days}-day Dist, As of: {df_tenor[("Date", "")].max()}', height=600, width=1400, showlegend=False)  

        # Custom size subplot title (this is to avoid changing previous std annotation size)
        subplot_titles = [
            'Total Par Value in Billion', 'ATS & Interdealer Par Value', 'Dealer to Customer Par Value',
            'Total Number of Trades', 'ATS & Interdealer Trades', 'Dealer to Customer Trades'
        ]


        title_coords = [
            (0.12, 1.05), (0.5, 1.05), (0.85, 1.05),  # Row 1 titles
            (0.12, 0.48), (0.5, 0.48), (0.85, 0.48)   # Row 2 titles
        ]
        
        # Add annotations for each subplot title
        for i, (title, (x, y)) in enumerate(zip(subplot_titles, title_coords)):
            fig.add_annotation(
                text=title,  
                xref="paper", yref="paper",
                x=x, y=y,  
                showarrow=False,
                font=dict(size=13), 
                xanchor='center'
            )
        fig.update_layout(
            font=dict(
                family="Times New Roman"
            )
        )
        safe_tenor = re.sub(r'[^a-zA-Z0-9_]', '', tenor)
        file_name = f"tenor_{safe_tenor}_distribution_{days}_days.png"
        pio.write_image(fig, file_name, format='png')
        print(f"Saved plot as {file_name}")
        fig.show()

In [9]:
plot_hist(df_180, 180)

Saved plot as tenor_TIPS_distribution_180_days.png


Saved plot as tenor_5years_distribution_180_days.png


Saved plot as tenor_5yearsOntherun_distribution_180_days.png


Saved plot as tenor_5yearsOfftherun_distribution_180_days.png


Saved plot as tenor_5and10years_distribution_180_days.png


Saved plot as tenor_5and10yearsOntherun_distribution_180_days.png


Saved plot as tenor_5and10yearsOfftherun_distribution_180_days.png


Saved plot as tenor_10years_distribution_180_days.png


Saved plot as tenor_10yearsOntherun_distribution_180_days.png


Saved plot as tenor_10yearsOfftherun_distribution_180_days.png
