In [1]:
import pandas as pd
from datetime import datetime

def parse_date_time(date_str, time_str):
    """
    Parse date (e.g. '1/1/2021') and time (e.g. '5', '100', '2359') into
    a Python datetime. '5' means 00:05, '100' means 01:00, '2359' means 23:59.

    Returns: datetime(YYYY, MM, DD, HH, MM).
    """
    # 1) Parse the date (Month/Day/Year).
    date_obj = datetime.strptime(date_str, "%m/%d/%Y").date()  
    # e.g. 2021-01-01

    # 2) Convert the Time string to integer.
    # Example: "2359" -> 2359
    time_val = int(time_str)

    # 3) Extract hour and minute.
    #    e.g. 2359 // 100 = 23,  2359 % 100 = 59
    hour = time_val // 100
    minute = time_val % 100

    # 4) Combine into a new datetime object.
    combined_dt = datetime(
        year=date_obj.year,
        month=date_obj.month,
        day=date_obj.day,
        hour=hour,
        minute=minute
    )
    return combined_dt

def main():
    # Path to your CSV file
    csv_file = "$LTC-USD.csv"  

    # Load CSV into a pandas DataFrame
    df = pd.read_csv(csv_file)

    # Convert the Time column to string if necessary,
    # then parse it into a proper DateTime
    df["DateTime"] = df.apply(
        lambda row: parse_date_time(str(row["Date"]), str(row["Time"])),
        axis=1
    )

    # Optional: Drop original columns if you don’t need them anymore
    df.drop(columns=["Date", "Time"], inplace=True)

    # Optional: reorder columns to put DateTime up front
    new_col_order = ["DateTime", "Symbol", "Description", "Open", "High", "Low", "Close", "Volume"]
    df = df[new_col_order]

    # Show a few rows in the console
    print(df.head(10))

    # Optional: Save back to CSV
    df.to_csv("LTC-USD.csv", index=False)


if __name__ == "__main__":
    main()


             DateTime         Symbol                Description  Open  High  \
0 2016-08-17 04:25:00  $LTC-USD@GDAX  LiteCoin/US Dollar @ GDAX  3.70  3.70   
1 2016-08-17 04:30:00  $LTC-USD@GDAX  LiteCoin/US Dollar @ GDAX  3.70  3.70   
2 2016-08-17 20:05:00  $LTC-USD@GDAX  LiteCoin/US Dollar @ GDAX  3.62  3.62   
3 2016-08-18 05:55:00  $LTC-USD@GDAX  LiteCoin/US Dollar @ GDAX  3.62  3.70   
4 2016-08-19 04:25:00  $LTC-USD@GDAX  LiteCoin/US Dollar @ GDAX  3.70  3.70   
5 2016-08-19 20:35:00  $LTC-USD@GDAX  LiteCoin/US Dollar @ GDAX  3.80  4.00   
6 2016-08-21 16:35:00  $LTC-USD@GDAX  LiteCoin/US Dollar @ GDAX  4.00  4.00   
7 2016-08-22 16:00:00  $LTC-USD@GDAX  LiteCoin/US Dollar @ GDAX  3.99  3.99   
8 2016-08-22 20:50:00  $LTC-USD@GDAX  LiteCoin/US Dollar @ GDAX  3.99  3.99   
9 2016-08-23 01:10:00  $LTC-USD@GDAX  LiteCoin/US Dollar @ GDAX  3.99  3.99   

    Low  Close      Volume  
0  3.70   3.70    5.000000  
1  3.70   3.70    5.000000  
2  3.62   3.62    0.317680  
3  3.62   3.70

In [2]:
import pandas as pd
import numpy as np
from scipy.stats import norm
import warnings
warnings.filterwarnings('ignore')

def calculate_threshold(returns, window=185, c=3):
    """
    Calculate threshold function v_j following Corsi, Pirino, and Reno (2010)
    """
    local_var = pd.Series(returns).rolling(window=window, center=True).var()
    return c * local_var

def calculate_volatility_measures(df):
    """
    Calculate RV, TBPV (CV), and JV following exactly the paper's mLTCodology
    """
    # Convert DateTime to datetime type
    df['DateTime'] = pd.to_datetime(df['DateTime'])
    
    # Calculate log returns precisely as specified
    df['log_returns'] = np.log(df['Close']).diff()
    
    daily_groups = df.groupby(df['DateTime'].dt.date)
    results = []
    
    # Constant from the paper
    mu1 = 0.7979
    
    for date, group in daily_groups:
        if len(group) < 288:  # Full day of 5-min intervals
            continue
            
        # Get returns for the day
        returns = group['log_returns'].dropna().values
        n = len(returns)
        
        # 1. Calculate RV exactly as in equation (1)
        RV = np.sum(returns ** 2)
        
        # 2. Calculate TBPV exactly as in equation (2)
        # First calculate threshold function v_j
        thresholds = calculate_threshold(returns)
        
        # Initialize TBPV calculation
        TBPV = 0
        for j in range(1, n):
            # Calculate indicator functions precisely
            I1 = 1 if (returns[j-1]**2 <= thresholds.iloc[j-1] if pd.notnull(thresholds.iloc[j-1]) else True) else 0
            I2 = 1 if (returns[j]**2 <= thresholds.iloc[j] if pd.notnull(thresholds.iloc[j]) else True) else 0
            
            # Implement exact TBPV formula
            TBPV += abs(returns[j-1]) * abs(returns[j]) * I1 * I2
        
        # Scale by μ_1^(-2) as in the paper
        CV = TBPV / (mu1 ** 2)
        
        # 3. Calculate Jump Variation as RV - CV
        JV = max(RV - CV, 0)
        
        results.append({
            'Date': date,
            'RV': RV,
            'CV': CV,
            'JV': JV
        })
    
    return pd.DataFrame(results)

def main():
    # Read the CSV file
    df = pd.read_csv('LTC-USD.CSV')
    
    # Calculate measures
    results_df = calculate_volatility_measures(df)
    
    # Save results
    results_df.to_csv('LTC_volatility_measures_185.csv', index=False)
    
    print("\nFirst few rows of results:")
    print(results_df.head())
    
    print("\nSummary statistics:")
    print(results_df.describe())

if __name__ == "__main__":
    main()


First few rows of results:
         Date        RV        CV        JV
0  2017-05-04  0.049665  0.051316  0.000000
1  2017-05-05  0.016286  0.011692  0.004594
2  2017-05-06  0.003847  0.003774  0.000073
3  2017-05-07  0.011833  0.009775  0.002058
4  2017-05-08  0.012070  0.010356  0.001714

Summary statistics:
                RV           CV           JV
count  2546.000000  2546.000000  2546.000000
mean      0.003556     0.002495     0.001063
std       0.007679     0.005064     0.003839
min       0.000072     0.000052     0.000000
25%       0.000933     0.000623     0.000210
50%       0.001749     0.001219     0.000463
75%       0.003548     0.002454     0.000966
max       0.185061     0.120794     0.141930


In [3]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from scipy import stats

def load_and_process_data(file_path, start_date='2022-01-01'):
    """
    Load and process the volatility measures data with period split
    Args:
        file_path: Path to the CSV file
        start_date: Start date for analysis in 'YYYY-MM-DD' format
    """
    df = pd.read_csv(file_path)
    df['Date'] = pd.to_datetime(df['Date'])
    
    # Filter data from start_date
    df = df[df['Date'] >= pd.Timestamp(start_date)].reset_index(drop=True)
    
    # Convert to percentage
    df[['RV', 'CV', 'JV']] = df[['RV', 'CV', 'JV']] * 100
    
    # Add period indicator
    breakpoint_date = pd.Timestamp('2023-10-23')
    df['Period'] = np.where(df['Date'] <= breakpoint_date, 'Pre-Break', 'Post-Break')
    
    return df, breakpoint_date

def get_sample_info(data, breakpoint_date):
    """
    Get sample period information for both periods
    """
    periods = {}
    for period in ['Pre-Break', 'Post-Break']:
        period_data = data[data['Period'] == period]
        periods[period] = {
            'Start Date': period_data['Date'].min().strftime('%Y-%m-%d'),
            'End Date': period_data['Date'].max().strftime('%Y-%m-%d'),
            'Total Days': len(period_data),
            'Trading Days': len(period_data[period_data['RV'] > 0])
        }
    return periods

def calculate_summary_stats(data, period=None):
    """
    Calculate summary statistics, optionally for a specific period
    """
    if period:
        data = data[data['Period'] == period]
    
    measures = ['RV', 'CV', 'JV']
    stats_dict = {}
    
    for measure in measures:
        series = data[measure].dropna()
        
        stats_dict[measure] = {
            'Mean (daily %)': series.mean(),
            'Median (daily %)': series.median(),
            'Std Dev (daily %)': series.std(),
            'Min (daily %)': series.min(),
            'Max (daily %)': series.max(),
            'Skewness': stats.skew(series),
            'Kurtosis': stats.kurtosis(series),
            'Q1 (daily %)': series.quantile(0.25),
            'Q3 (daily %)': series.quantile(0.75)
        }
    
    summary_df = pd.DataFrame(stats_dict)
    return summary_df, data[measures].corr()

def create_visualization(data, breakpoint_date):
    """
    Create publication-ready plotly visualizations with break point line
    """
    fig1 = go.Figure()
    fig2 = go.Figure()

    # Add traces to first figure (RV and CV)
    fig1.add_trace(
        go.Scatter(x=data['Date'], y=data['RV'],
                  name='Realized Volatility',
                  line=dict(color='#1f77b4', width=1))
    )

    fig1.add_trace(
        go.Scatter(x=data['Date'], y=data['CV'],
                  name='Continuous Volatility',
                  line=dict(color='#ff7f0e', width=1))
    )

    # Add vertical line as a shape for first figure
    fig1.add_shape(
        type="line",
        x0=breakpoint_date,
        x1=breakpoint_date,
        y0=0,
        y1=1,
        yref="paper",
        line=dict(color="red", width=1, dash="dash")
    )

    # Add annotation for break point in first figure
    fig1.add_annotation(
        x=breakpoint_date,
        y=1,
        yref="paper",
        text="October 23, 2023",
        showarrow=False,
        xanchor="left",
        textangle=0,
        yshift=10
    )

    # Add trace to second figure (JV)
    fig2.add_trace(
        go.Scatter(x=data['Date'], y=data['JV'],
                  name='Jump Variation',
                  line=dict(color='#2ca02c', width=1))
    )

    # Add vertical line as a shape for second figure
    fig2.add_shape(
        type="line",
        x0=breakpoint_date,
        x1=breakpoint_date,
        y0=0,
        y1=1,
        yref="paper",
        line=dict(color="red", width=1, dash="dash")
    )

    # Add annotation for break point in second figure
    fig2.add_annotation(
        x=breakpoint_date,
        y=1,
        yref="paper",
        text="October 23, 2023",
        showarrow=False,
        xanchor="left",
        textangle=0,
        yshift=10
    )

    # Update layout for both figures
    layout_template = dict(
        template='plotly_white',
        showlegend=True,
        height=400,
        legend=dict(
            orientation="h",
            yanchor="top",
            y=-0.2,
            xanchor="center",
            x=0.5,
            font=dict(size=10)
        ),
        paper_bgcolor='white',
        plot_bgcolor='white',
        margin=dict(b=80)
    )

    fig1.update_layout(
        **layout_template,
        title=dict(
            text="Panel A: Daily Realized and Continuous Volatility Components",
            x=0.5,
            y=0.95
        )
    )

    fig2.update_layout(
        **layout_template,
        title=dict(
            text="Panel B: Daily Jump Variation Component",
            x=0.5,
            y=0.95
        )
    )

    # Update axes for both figures
    for fig in [fig1, fig2]:
        fig.update_xaxes(showgrid=False, zeroline=False, title_text="Date")
        fig.update_yaxes(showgrid=False, zeroline=False, 
                        ticksuffix='%', title_text="Volatility (%)")

    return fig1, fig2

def main():
    # Define your start date here
    START_DATE = '2022-06-01'  # Modify this to your desired start date
    
    # Load data with period split and custom start date
    data, breakpoint_date = load_and_process_data('LTC_volatility_measures_185.csv', 
                                                 start_date=START_DATE)
    
    # Get sample information for both periods
    sample_info = get_sample_info(data, breakpoint_date)
    
    # Calculate summary statistics for both periods
    pre_stats, pre_corr = calculate_summary_stats(data, 'Pre-Break')
    post_stats, post_corr = calculate_summary_stats(data, 'Post-Break')
    
    # Print sample information
    print("\nSample Information:")
    print("==================")
    print(f"Analysis Start Date: {START_DATE}")
    for period, info in sample_info.items():
        print(f"\n{period} Period:")
        for key, value in info.items():
            print(f"{key}: {value}")
    
    # Print summary statistics
    pd.set_option('display.float_format', lambda x: '{:.4f}'.format(x))
    
    print("\nPre-Break Summary Statistics (Daily Measures):")
    print("============================================")
    print(pre_stats)
    print("\nPre-Break Correlation Matrix:")
    print(pre_corr.round(4))
    
    print("\nPost-Break Summary Statistics (Daily Measures):")
    print("=============================================")
    print(post_stats)
    print("\nPost-Break Correlation Matrix:")
    print(post_corr.round(4))
    
    # Calculate and print additional metrics for both periods
    for period in ['Pre-Break', 'Post-Break']:
        period_data = data[data['Period'] == period]
        jump_days = (period_data['JV'] > 0).mean() * 100
        avg_jump_size = period_data.loc[period_data['JV'] > 0, 'JV'].mean()
        
        print(f"\nAdditional Metrics - {period}:")
        print("=" * (20 + len(period)))
        print(f"Percentage of days with jumps: {jump_days:.2f}%")
        print(f"Average jump size when present (daily %): {avg_jump_size:.4f}%")
    
    # Create and display visualizations
    fig1, fig2 = create_visualization(data, breakpoint_date)
    fig1.show()
    fig2.show()

if __name__ == "__main__":
    main()


Sample Information:
Analysis Start Date: 2022-06-01

Pre-Break Period:
Start Date: 2022-06-01
End Date: 2023-10-23
Total Days: 502
Trading Days: 502

Post-Break Period:
Start Date: 2023-10-24
End Date: 2024-09-08
Total Days: 316
Trading Days: 316

Pre-Break Summary Statistics (Daily Measures):
                       RV      CV      JV
Mean (daily %)     0.1729  0.1252  0.0478
Median (daily %)   0.1158  0.0824  0.0250
Std Dev (daily %)  0.2297  0.1699  0.0767
Min (daily %)      0.0072  0.0054  0.0000
Max (daily %)      2.4298  1.7457  0.7585
Skewness           4.9769  5.3380  4.8598
Kurtosis          32.9726 37.3216 31.9127
Q1 (daily %)       0.0606  0.0449  0.0115
Q3 (daily %)       0.1963  0.1460  0.0514

Pre-Break Correlation Matrix:
       RV     CV     JV
RV 1.0000 0.9702 0.8457
CV 0.9702 1.0000 0.6913
JV 0.8457 0.6913 1.0000

Post-Break Summary Statistics (Daily Measures):
                       RV      CV       JV
Mean (daily %)     0.1465  0.1070   0.0395
Median (daily %)   0.0