In [None]:
# onver thigs in tight time and date formate exel

In [15]:
import pandas as pd
from datetime import datetime

def parse_date_time(date_str, time_str):
    """
    Parse date (e.g. '1/1/2021') and time (e.g. '5', '100', '2359') into
    a Python datetime. '5' means 00:05, '100' means 01:00, '2359' means 23:59.

    Returns: datetime(YYYY, MM, DD, HH, MM).
    """
    # 1) Parse the date (Month/Day/Year).
    date_obj = datetime.strptime(date_str, "%m/%d/%Y").date()  
    # e.g. 2021-01-01

    # 2) Convert the Time string to integer.
    # Example: "2359" -> 2359
    time_val = int(time_str)

    # 3) Extract hour and minute.
    #    e.g. 2359 // 100 = 23,  2359 % 100 = 59
    hour = time_val // 100
    minute = time_val % 100

    # 4) Combine into a new datetime object.
    combined_dt = datetime(
        year=date_obj.year,
        month=date_obj.month,
        day=date_obj.day,
        hour=hour,
        minute=minute
    )
    return combined_dt

def main():
    # Path to your CSV file
    csv_file = "$BTC-USD.csv"  

    # Load CSV into a pandas DataFrame
    df = pd.read_csv(csv_file)

    # Convert the Time column to string if necessary,
    # then parse it into a proper DateTime
    df["DateTime"] = df.apply(
        lambda row: parse_date_time(str(row["Date"]), str(row["Time"])),
        axis=1
    )

    # Optional: Drop original columns if you don’t need them anymore
    df.drop(columns=["Date", "Time"], inplace=True)

    # Optional: reorder columns to put DateTime up front
    new_col_order = ["DateTime", "Symbol", "Description", "Open", "High", "Low", "Close", "Volume"]
    df = df[new_col_order]

    # Show a few rows in the console
    print(df.head(10))

    # Optional: Save back to CSV
    df.to_csv("BTC-USD.csv", index=False)


if __name__ == "__main__":
    main()


             DateTime         Symbol               Description         Open  \
0 2021-01-01 00:05:00  $BTC-USD@GDAX  BitCoin/US Dollar @ GDAX 28990.080000   
1 2021-01-01 00:10:00  $BTC-USD@GDAX  BitCoin/US Dollar @ GDAX 29029.750000   
2 2021-01-01 00:15:00  $BTC-USD@GDAX  BitCoin/US Dollar @ GDAX 28921.690000   
3 2021-01-01 00:20:00  $BTC-USD@GDAX  BitCoin/US Dollar @ GDAX 28811.950000   
4 2021-01-01 00:25:00  $BTC-USD@GDAX  BitCoin/US Dollar @ GDAX 28901.780000   
5 2021-01-01 00:30:00  $BTC-USD@GDAX  BitCoin/US Dollar @ GDAX 28910.050000   
6 2021-01-01 00:35:00  $BTC-USD@GDAX  BitCoin/US Dollar @ GDAX 28904.550000   
7 2021-01-01 00:40:00  $BTC-USD@GDAX  BitCoin/US Dollar @ GDAX 28956.500000   
8 2021-01-01 00:45:00  $BTC-USD@GDAX  BitCoin/US Dollar @ GDAX 28980.220000   
9 2021-01-01 00:50:00  $BTC-USD@GDAX  BitCoin/US Dollar @ GDAX 29019.200000   

          High          Low        Close     Volume  
0 29083.750000 28973.740000 29034.300000 104.083461  
1 29031.710000 28907.9

In [None]:
#main code for the aluatnign the RV, CV AND JV

In [33]:
import pandas as pd
import numpy as np
from scipy.stats import norm
import warnings
warnings.filterwarnings('ignore')

def calculate_threshold(returns, window=185, c=3):
    """
    Calculate threshold function v_j following Corsi, Pirino, and Reno (2010)
    """
    local_var = pd.Series(returns).rolling(window=window, center=True).var()
    return c * local_var

def calculate_volatility_measures(df):
    """
    Calculate RV, TBPV (CV), and JV following exactly the paper's methodology
    """
    # Convert DateTime to datetime type
    df['DateTime'] = pd.to_datetime(df['DateTime'])
    
    # Calculate log returns precisely as specified
    df['log_returns'] = np.log(df['Close']).diff()
    
    daily_groups = df.groupby(df['DateTime'].dt.date)
    results = []
    
    # Constant from the paper
    mu1 = 0.7979
    
    for date, group in daily_groups:
        if len(group) < 288:  # Full day of 5-min intervals
            continue
            
        # Get returns for the day
        returns = group['log_returns'].dropna().values
        n = len(returns)
        
        # 1. Calculate RV exactly as in equation (1)
        RV = np.sum(returns ** 2)
        
        # 2. Calculate TBPV exactly as in equation (2)
        # First calculate threshold function v_j
        thresholds = calculate_threshold(returns)
        
        # Initialize TBPV calculation
        TBPV = 0
        for j in range(1, n):
            # Calculate indicator functions precisely
            I1 = 1 if (returns[j-1]**2 <= thresholds.iloc[j-1] if pd.notnull(thresholds.iloc[j-1]) else True) else 0
            I2 = 1 if (returns[j]**2 <= thresholds.iloc[j] if pd.notnull(thresholds.iloc[j]) else True) else 0
            
            # Implement exact TBPV formula
            TBPV += abs(returns[j-1]) * abs(returns[j]) * I1 * I2
        
        # Scale by μ_1^(-2) as in the paper
        CV = TBPV / (mu1 ** 2)
        
        # 3. Calculate Jump Variation as RV - CV
        JV = max(RV - CV, 0)
        
        results.append({
            'Date': date,
            'RV': RV,
            'CV': CV,
            'JV': JV
        })
    
    return pd.DataFrame(results)

def main():
    # Read the CSV file
    df = pd.read_csv('BTC-USD.CSV')
    
    # Calculate measures
    results_df = calculate_volatility_measures(df)
    
    # Save results
    results_df.to_csv('btc_volatility_measures_185.csv', index=False)
    
    print("\nFirst few rows of results:")
    print(results_df.head())
    
    print("\nSummary statistics:")
    print(results_df.describe())

if __name__ == "__main__":
    main()


First few rows of results:
         Date     RV     CV     JV
0  2021-01-01 0.0009 0.0007 0.0001
1  2021-01-02 0.0052 0.0044 0.0008
2  2021-01-03 0.0042 0.0034 0.0008
3  2021-01-04 0.0093 0.0063 0.0030
4  2021-01-05 0.0058 0.0053 0.0005

Summary statistics:
             RV        CV        JV
count 1325.0000 1325.0000 1325.0000
mean     0.0013    0.0009    0.0004
std      0.0026    0.0015    0.0014
min      0.0000    0.0000    0.0000
25%      0.0004    0.0003    0.0001
50%      0.0007    0.0005    0.0002
75%      0.0013    0.0010    0.0004
max      0.0650    0.0234    0.0416


In [None]:
# FROM the output of the above code, getting the sumrry stat and also viulization 

In [25]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from scipy import stats

def load_and_process_data(file_path):
    """
    Load and process the volatility measures data
    """
    df = pd.read_csv(file_path)
    df['Date'] = pd.to_datetime(df['Date'])
    df[['RV', 'CV', 'JV']] = df[['RV', 'CV', 'JV']] * 100
    return df

def get_sample_info(data):
    """
    Get sample period information
    """
    start_date = data['Date'].min()
    end_date = data['Date'].max()
    total_days = len(data)
    trading_days = len(data[data['RV'] > 0])
    
    return {
        'Start Date': start_date.strftime('%Y-%m-%d'),
        'End Date': end_date.strftime('%Y-%m-%d'),
        'Total Days in Sample': total_days,
        'Trading Days': trading_days
    }

def calculate_summary_stats(data):
    """
    Calculate comprehensive summary statistics for daily volatility measures
    """
    measures = ['RV', 'CV', 'JV']
    stats_dict = {}
    
    for measure in measures:
        series = data[measure].dropna()
        
        stats_dict[measure] = {
            'Mean (daily %)': series.mean(),
            'Median (daily %)': series.median(),
            'Std Dev (daily %)': series.std(),
            'Min (daily %)': series.min(),
            'Max (daily %)': series.max(),
            'Skewness': stats.skew(series),
            'Kurtosis': stats.kurtosis(series),
            'Q1 (daily %)': series.quantile(0.25),
            'Q3 (daily %)': series.quantile(0.75)
        }
    
    summary_df = pd.DataFrame(stats_dict)
    return summary_df, data[measures].corr()

def create_visualization(data):
    """
    Create publication-ready plotly visualizations with bottom legends
    """
    # Create two separate figures
    fig1 = go.Figure()
    fig2 = go.Figure()

    # Add traces to first figure (RV and CV)
    fig1.add_trace(
        go.Scatter(x=data['Date'], y=data['RV'],
                  name='Realized Volatility',
                  line=dict(color='#1f77b4', width=1))
    )

    fig1.add_trace(
        go.Scatter(x=data['Date'], y=data['CV'],
                  name='Continuous Volatility',
                  line=dict(color='#ff7f0e', width=1))
    )

    # Add trace to second figure (JV)
    fig2.add_trace(
        go.Scatter(x=data['Date'], y=data['JV'],
                  name='Jump Variation',
                  line=dict(color='#2ca02c', width=1))
    )

    # Update layout for both figures
    layout_template = dict(
        template='plotly_white',
        showlegend=True,
        height=400,
        legend=dict(
            orientation="h",    # Horizontal legend
            yanchor="top",
            y=-0.2,            # Position below the plot
            xanchor="center",
            x=0.5,             # Centered
            font=dict(size=10)
        ),
        paper_bgcolor='white',
        plot_bgcolor='white',
        margin=dict(b=80)      # Extra bottom margin for legend
    )

    fig1.update_layout(
        **layout_template,
        title=dict(
            text="Panel A: Daily Realized and Continuous Volatility Components",
            x=0.5,
            y=0.95
        )
    )

    fig2.update_layout(
        **layout_template,
        title=dict(
            text="Panel B: Daily Jump Variation Component",
            x=0.5,
            y=0.95
        )
    )

    # Update axes for both figures
    for fig in [fig1, fig2]:
        fig.update_xaxes(showgrid=False, zeroline=False, title_text="Date")
        fig.update_yaxes(showgrid=False, zeroline=False, 
                        ticksuffix='%', title_text="Volatility (%)")

    return fig1, fig2

def main():
    # Load data
    data = load_and_process_data('btc_volatility_measures.csv')
    
    # Get sample information
    sample_info = get_sample_info(data)
    
    # Calculate summary statistics
    summary_stats, correlation_matrix = calculate_summary_stats(data)
    
    # Print sample information
    print("\nSample Information:")
    print("==================")
    for key, value in sample_info.items():
        print(f"{key}: {value}")
    
    # Print summary statistics
    pd.set_option('display.float_format', lambda x: '{:.4f}'.format(x))
    
    print("\nSummary Statistics (Daily Measures):")
    print("==================================")
    print(summary_stats)
    
    print("\nCorrelation Matrix:")
    print("==================")
    print(correlation_matrix.round(4))
    
    # Additional metrics
    jump_days = (data['JV'] > 0).mean() * 100
    avg_jump_size = data.loc[data['JV'] > 0, 'JV'].mean()
    
    print(f"\nAdditional Metrics:")
    print("==================")
    print(f"Percentage of days with jumps: {jump_days:.2f}%")
    print(f"Average jump size when present (daily %): {avg_jump_size:.4f}%")
    
    # Create and display visualizations
    fig1, fig2 = create_visualization(data)
    fig1.show()
    fig2.show()

if __name__ == "__main__":
    main()


Sample Information:
Start Date: 2021-01-01
End Date: 2024-09-05
Total Days in Sample: 1325
Trading Days: 1325

Summary Statistics (Daily Measures):
                        RV      CV       JV
Mean (daily %)      0.1313  0.0707   0.0606
Median (daily %)    0.0708  0.0393   0.0299
Std Dev (daily %)   0.2587  0.1195   0.1520
Min (daily %)       0.0010  0.0005   0.0005
Max (daily %)       6.5017  2.2376   4.2640
Skewness           13.1128  7.5858  17.6973
Kurtosis          284.1219 97.9198 450.4332
Q1 (daily %)        0.0370  0.0206   0.0146
Q3 (daily %)        0.1337  0.0738   0.0609

Correlation Matrix:
       RV     CV     JV
RV 1.0000 0.9398 0.9632
CV 0.9398 1.0000 0.8133
JV 0.9632 0.8133 1.0000

Additional Metrics:
Percentage of days with jumps: 100.00%
Average jump size when present (daily %): 0.0606%


In [None]:
#breking things in two group for the brekpoiint and its diffrent summry, with poper dates according to our other anlysis 


In [35]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from scipy import stats

def load_and_process_data(file_path, start_date='2022-01-01'):
    """
    Load and process the volatility measures data with period split
    Args:
        file_path: Path to the CSV file
        start_date: Start date for analysis in 'YYYY-MM-DD' format
    """
    df = pd.read_csv(file_path)
    df['Date'] = pd.to_datetime(df['Date'])
    
    # Filter data from start_date
    df = df[df['Date'] >= pd.Timestamp(start_date)].reset_index(drop=True)
    
    # Convert to percentage
    df[['RV', 'CV', 'JV']] = df[['RV', 'CV', 'JV']] * 100
    
    # Add period indicator
    breakpoint_date = pd.Timestamp('2023-10-23')
    df['Period'] = np.where(df['Date'] <= breakpoint_date, 'Pre-Break', 'Post-Break')
    
    return df, breakpoint_date

def get_sample_info(data, breakpoint_date):
    """
    Get sample period information for both periods
    """
    periods = {}
    for period in ['Pre-Break', 'Post-Break']:
        period_data = data[data['Period'] == period]
        periods[period] = {
            'Start Date': period_data['Date'].min().strftime('%Y-%m-%d'),
            'End Date': period_data['Date'].max().strftime('%Y-%m-%d'),
            'Total Days': len(period_data),
            'Trading Days': len(period_data[period_data['RV'] > 0])
        }
    return periods

def calculate_summary_stats(data, period=None):
    """
    Calculate summary statistics, optionally for a specific period
    """
    if period:
        data = data[data['Period'] == period]
    
    measures = ['RV', 'CV', 'JV']
    stats_dict = {}
    
    for measure in measures:
        series = data[measure].dropna()
        
        stats_dict[measure] = {
            'Mean (daily %)': series.mean(),
            'Median (daily %)': series.median(),
            'Std Dev (daily %)': series.std(),
            'Min (daily %)': series.min(),
            'Max (daily %)': series.max(),
            'Skewness': stats.skew(series),
            'Kurtosis': stats.kurtosis(series),
            'Q1 (daily %)': series.quantile(0.25),
            'Q3 (daily %)': series.quantile(0.75)
        }
    
    summary_df = pd.DataFrame(stats_dict)
    return summary_df, data[measures].corr()

def create_visualization(data, breakpoint_date):
    """
    Create publication-ready plotly visualizations with break point line
    """
    fig1 = go.Figure()
    fig2 = go.Figure()

    # Add traces to first figure (RV and CV)
    fig1.add_trace(
        go.Scatter(x=data['Date'], y=data['RV'],
                  name='Realized Volatility',
                  line=dict(color='#1f77b4', width=1))
    )

    fig1.add_trace(
        go.Scatter(x=data['Date'], y=data['CV'],
                  name='Continuous Volatility',
                  line=dict(color='#ff7f0e', width=1))
    )

    # Add vertical line as a shape for first figure
    fig1.add_shape(
        type="line",
        x0=breakpoint_date,
        x1=breakpoint_date,
        y0=0,
        y1=1,
        yref="paper",
        line=dict(color="red", width=1, dash="dash")
    )

    # Add annotation for break point in first figure
    fig1.add_annotation(
        x=breakpoint_date,
        y=1,
        yref="paper",
        text="October 23, 2023",
        showarrow=False,
        xanchor="left",
        textangle=0,
        yshift=10
    )

    # Add trace to second figure (JV)
    fig2.add_trace(
        go.Scatter(x=data['Date'], y=data['JV'],
                  name='Jump Variation',
                  line=dict(color='#2ca02c', width=1))
    )

    # Add vertical line as a shape for second figure
    fig2.add_shape(
        type="line",
        x0=breakpoint_date,
        x1=breakpoint_date,
        y0=0,
        y1=1,
        yref="paper",
        line=dict(color="red", width=1, dash="dash")
    )

    # Add annotation for break point in second figure
    fig2.add_annotation(
        x=breakpoint_date,
        y=1,
        yref="paper",
        text="October 23, 2023",
        showarrow=False,
        xanchor="left",
        textangle=0,
        yshift=10
    )

    # Update layout for both figures
    layout_template = dict(
        template='plotly_white',
        showlegend=True,
        height=400,
        legend=dict(
            orientation="h",
            yanchor="top",
            y=-0.2,
            xanchor="center",
            x=0.5,
            font=dict(size=10)
        ),
        paper_bgcolor='white',
        plot_bgcolor='white',
        margin=dict(b=80)
    )

    fig1.update_layout(
        **layout_template,
        title=dict(
            text="Panel A: Daily Realized and Continuous Volatility Components",
            x=0.5,
            y=0.95
        )
    )

    fig2.update_layout(
        **layout_template,
        title=dict(
            text="Panel B: Daily Jump Variation Component",
            x=0.5,
            y=0.95
        )
    )

    # Update axes for both figures
    for fig in [fig1, fig2]:
        fig.update_xaxes(showgrid=False, zeroline=False, title_text="Date")
        fig.update_yaxes(showgrid=False, zeroline=False, 
                        ticksuffix='%', title_text="Volatility (%)")

    return fig1, fig2

def main():
    # Define your start date here
    START_DATE = '2022-06-01'  # Modify this to your desired start date
    
    # Load data with period split and custom start date
    data, breakpoint_date = load_and_process_data('btc_volatility_measures_185.csv', 
                                                 start_date=START_DATE)
    
    # Get sample information for both periods
    sample_info = get_sample_info(data, breakpoint_date)
    
    # Calculate summary statistics for both periods
    pre_stats, pre_corr = calculate_summary_stats(data, 'Pre-Break')
    post_stats, post_corr = calculate_summary_stats(data, 'Post-Break')
    
    # Print sample information
    print("\nSample Information:")
    print("==================")
    print(f"Analysis Start Date: {START_DATE}")
    for period, info in sample_info.items():
        print(f"\n{period} Period:")
        for key, value in info.items():
            print(f"{key}: {value}")
    
    # Print summary statistics
    pd.set_option('display.float_format', lambda x: '{:.4f}'.format(x))
    
    print("\nPre-Break Summary Statistics (Daily Measures):")
    print("============================================")
    print(pre_stats)
    print("\nPre-Break Correlation Matrix:")
    print(pre_corr.round(4))
    
    print("\nPost-Break Summary Statistics (Daily Measures):")
    print("=============================================")
    print(post_stats)
    print("\nPost-Break Correlation Matrix:")
    print(post_corr.round(4))
    
    # Calculate and print additional metrics for both periods
    for period in ['Pre-Break', 'Post-Break']:
        period_data = data[data['Period'] == period]
        jump_days = (period_data['JV'] > 0).mean() * 100
        avg_jump_size = period_data.loc[period_data['JV'] > 0, 'JV'].mean()
        
        print(f"\nAdditional Metrics - {period}:")
        print("=" * (20 + len(period)))
        print(f"Percentage of days with jumps: {jump_days:.2f}%")
        print(f"Average jump size when present (daily %): {avg_jump_size:.4f}%")
    
    # Create and display visualizations
    fig1, fig2 = create_visualization(data, breakpoint_date)
    fig1.show()
    fig2.show()

if __name__ == "__main__":
    main()


Sample Information:
Analysis Start Date: 2022-06-01

Pre-Break Period:
Start Date: 2022-06-01
End Date: 2023-10-23
Total Days: 503
Trading Days: 503

Post-Break Period:
Start Date: 2023-10-24
End Date: 2024-09-05
Total Days: 313
Trading Days: 313

Pre-Break Summary Statistics (Daily Measures):
                       RV      CV      JV
Mean (daily %)     0.0793  0.0551  0.0242
Median (daily %)   0.0417  0.0288  0.0109
Std Dev (daily %)  0.1260  0.0908  0.0436
Min (daily %)      0.0010  0.0008  0.0000
Max (daily %)      0.9952  0.7764  0.4055
Skewness           4.5187  4.9315  4.6680
Kurtosis          24.2891 29.2917 27.2527
Q1 (daily %)       0.0207  0.0147  0.0047
Q3 (daily %)       0.0820  0.0573  0.0254

Pre-Break Correlation Matrix:
       RV     CV     JV
RV 1.0000 0.9708 0.8673
CV 0.9708 1.0000 0.7224
JV 0.8673 0.7224 1.0000

Post-Break Summary Statistics (Daily Measures):
                       RV      CV      JV
Mean (daily %)     0.0778  0.0588  0.0191
Median (daily %)   0.053

In [None]:
# with the use of the vaotility not the varince

In [34]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from scipy import stats

def load_and_process_data(file_path, start_date='2022-01-01'):
    """
    Load and process the volatility measures data with period split
    Convert variance measures to standard deviation (volatility)
    """
    df = pd.read_csv(file_path)
    df['Date'] = pd.to_datetime(df['Date'])
    
    # Filter data from start_date
    df = df[df['Date'] >= pd.Timestamp(start_date)].reset_index(drop=True)
    
    # Convert variance to volatility (take square root) and convert to percentage
    for col in ['RV', 'CV', 'JV']:
        df[col] = np.sqrt(df[col]) * 100  # Convert to volatility and percentage
    
    # Add period indicator
    breakpoint_date = pd.Timestamp('2023-10-23')
    df['Period'] = np.where(df['Date'] <= breakpoint_date, 'Pre-Break', 'Post-Break')
    
    return df, breakpoint_date

def get_sample_info(data, breakpoint_date):
    """
    Get sample period information for both periods
    """
    periods = {}
    for period in ['Pre-Break', 'Post-Break']:
        period_data = data[data['Period'] == period]
        periods[period] = {
            'Start Date': period_data['Date'].min().strftime('%Y-%m-%d'),
            'End Date': period_data['Date'].max().strftime('%Y-%m-%d'),
            'Total Days': len(period_data),
            'Trading Days': len(period_data[period_data['RV'] > 0])
        }
    return periods

def calculate_summary_stats(data, period=None):
    """
    Calculate comprehensive summary statistics for volatility measures
    Now working with standard deviation (volatility) values
    """
    if period:
        data = data[data['Period'] == period]
    
    measures = ['RV', 'CV', 'JV']
    stats_dict = {}
    
    for measure in measures:
        series = data[measure].dropna()
        
        stats_dict[measure] = {
            'Mean Volatility (daily %)': series.mean(),
            'Median Volatility (daily %)': series.median(),
            'Std Dev of Volatility (daily %)': series.std(),
            'Min Volatility (daily %)': series.min(),
            'Max Volatility (daily %)': series.max(),
            'Skewness': stats.skew(series),
            'Kurtosis': stats.kurtosis(series),
            'Q1 (daily %)': series.quantile(0.25),
            'Q3 (daily %)': series.quantile(0.75)
        }
    
    summary_df = pd.DataFrame(stats_dict)
    return summary_df, data[measures].corr()

def create_visualization(data, breakpoint_date):
    """
    Create publication-ready plotly visualizations for volatility measures
    """
    fig1 = go.Figure()
    fig2 = go.Figure()

    # Add traces to first figure (RV and CV)
    fig1.add_trace(
        go.Scatter(x=data['Date'], y=data['RV'],
                  name='Realized Volatility',
                  line=dict(color='#1f77b4', width=1))
    )

    fig1.add_trace(
        go.Scatter(x=data['Date'], y=data['CV'],
                  name='Continuous Volatility',
                  line=dict(color='#ff7f0e', width=1))
    )

    # Add vertical line as a shape for first figure
    fig1.add_shape(
        type="line",
        x0=breakpoint_date,
        x1=breakpoint_date,
        y0=0,
        y1=1,
        yref="paper",
        line=dict(color="red", width=1, dash="dash")
    )

    # Add annotation for break point in first figure
    fig1.add_annotation(
        x=breakpoint_date,
        y=1,
        yref="paper",
        text="October 23, 2023",
        showarrow=False,
        xanchor="left",
        textangle=0,
        yshift=10
    )

    # Add trace to second figure (JV)
    fig2.add_trace(
        go.Scatter(x=data['Date'], y=data['JV'],
                  name='Jump Volatility',  # Updated name to reflect std dev
                  line=dict(color='#2ca02c', width=1))
    )

    # Add vertical line as a shape for second figure
    fig2.add_shape(
        type="line",
        x0=breakpoint_date,
        x1=breakpoint_date,
        y0=0,
        y1=1,
        yref="paper",
        line=dict(color="red", width=1, dash="dash")
    )

    # Add annotation for break point in second figure
    fig2.add_annotation(
        x=breakpoint_date,
        y=1,
        yref="paper",
        text="October 23, 2023",
        showarrow=False,
        xanchor="left",
        textangle=0,
        yshift=10
    )

    # Update layout for both figures
    layout_template = dict(
        template='plotly_white',
        showlegend=True,
        height=400,
        legend=dict(
            orientation="h",
            yanchor="top",
            y=-0.2,
            xanchor="center",
            x=0.5,
            font=dict(size=10)
        ),
        paper_bgcolor='white',
        plot_bgcolor='white',
        margin=dict(b=80)
    )

    fig1.update_layout(
        **layout_template,
        title=dict(
            text="Panel A: Daily Realized and Continuous Volatility Components",
            x=0.5,
            y=0.95
        )
    )

    fig2.update_layout(
        **layout_template,
        title=dict(
            text="Panel B: Daily Jump Volatility Component",  # Updated title
            x=0.5,
            y=0.95
        )
    )

    # Update axes for both figures
    for fig in [fig1, fig2]:
        fig.update_xaxes(showgrid=False, zeroline=False, title_text="Date")
        fig.update_yaxes(showgrid=False, zeroline=False, 
                        ticksuffix='%', title_text="Volatility (%)")

    return fig1, fig2

def main():
    # Define your start date here
    START_DATE = '2022-01-01'  # Modify this to your desired start date
    
    # Load data with period split and custom start date
    data, breakpoint_date = load_and_process_data('btc_volatility_measures_185.csv', 
                                                 start_date=START_DATE)
    
    # Get sample information for both periods
    sample_info = get_sample_info(data, breakpoint_date)
    
    # Calculate summary statistics for both periods
    pre_stats, pre_corr = calculate_summary_stats(data, 'Pre-Break')
    post_stats, post_corr = calculate_summary_stats(data, 'Post-Break')
    
    # Print sample information
    print("\nSample Information:")
    print("==================")
    print(f"Analysis Start Date: {START_DATE}")
    for period, info in sample_info.items():
        print(f"\n{period} Period:")
        for key, value in info.items():
            print(f"{key}: {value}")
    
    # Print summary statistics
    pd.set_option('display.float_format', lambda x: '{:.4f}'.format(x))
    
    print("\nPre-Break Summary Statistics (Daily Volatility):")
    print("=============================================")
    print(pre_stats)
    print("\nPre-Break Correlation Matrix:")
    print(pre_corr.round(4))
    
    print("\nPost-Break Summary Statistics (Daily Volatility):")
    print("==============================================")
    print(post_stats)
    print("\nPost-Break Correlation Matrix:")
    print(post_corr.round(4))
    
    # Calculate and print additional metrics for both periods
    for period in ['Pre-Break', 'Post-Break']:
        period_data = data[data['Period'] == period]
        jump_days = (period_data['JV'] > 0).mean() * 100
        avg_jump_size = period_data.loc[period_data['JV'] > 0, 'JV'].mean()
        
        print(f"\nAdditional Metrics - {period}:")
        print("=" * (20 + len(period)))
        print(f"Percentage of days with jumps: {jump_days:.2f}%")
        print(f"Average jump volatility when present (daily %): {avg_jump_size:.4f}%")
    
    # Create and display visualizations
    fig1, fig2 = create_visualization(data, breakpoint_date)
    fig1.show()
    fig2.show()

if __name__ == "__main__":
    main()


Sample Information:
Analysis Start Date: 2022-01-01

Pre-Break Period:
Start Date: 2022-01-01
End Date: 2023-10-23
Total Days: 652
Trading Days: 652

Post-Break Period:
Start Date: 2023-10-24
End Date: 2024-09-05
Total Days: 313
Trading Days: 313

Pre-Break Summary Statistics (Daily Volatility):
                                     RV      CV     JV
Mean Volatility (daily %)        2.6119  2.1839 1.3676
Median Volatility (daily %)      2.3507  1.9474 1.1767
Std Dev of Volatility (daily %)  1.5352  1.3004 0.9224
Min Volatility (daily %)         0.3095  0.2742 0.0000
Max Volatility (daily %)        11.1205 10.4444 6.3680
Skewness                         1.9905  2.1845 1.8639
Kurtosis                         6.0706  7.5406 5.0301
Q1 (daily %)                     1.6057  1.3502 0.7453
Q3 (daily %)                     3.1590  2.6344 1.6834

Pre-Break Correlation Matrix:
       RV     CV     JV
RV 1.0000 0.9758 0.8918
CV 0.9758 1.0000 0.7757
JV 0.8918 0.7757 1.0000

Post-Break Summary Stati