In [31]:
import pandas as pd
import numpy as np
from datetime import datetime

# Function to convert DataFrame to nested dictionary
def df_to_nested_dict(df):
    nested_dict = {}
    for index, row in df.iterrows():
        date_str = row['date'].strftime('%Y-%m-%d')
        nested_dict.setdefault(date_str, {}).setdefault(row['udl'], {}).setdefault(row['param'], {})[row['matu']] = row['value']
    return nested_dict

# Convert the nested dictionary to a DataFrame
def nested_dict_to_df(nested_dict):
    records = []
    for date, udl_data in nested_dict.items():
        for udl, params in udl_data.items():
            for param, matu_data in params.items():
                for matu, value in matu_data.items():
                    records.append((date, udl, param, matu, value))
    df = pd.DataFrame(records, columns=['date', 'udl', 'param', 'matu', 'value'])
    df['date'] = pd.to_datetime(df['date'])
    return df

# Dummy functions for compute_spread and calculate_z_score
def compute_spread(df_iv, df_rv, mode='div'):
    return df_iv / df_rv

def calculate_z_score(df_iv, start_date, end_date):
    return (df_iv - df_iv.mean()) / df_iv.std()

# Function to compute index spread
def compute_index_spread(nested_dict, udl_list, rv_type, strike_type, t, matu, start_date=None, end_date=None, mode='div', param_level=None):
    """
    Get RV and IV or IVFD data from dict on specific time frame and matu, moneyness or delta
    """
    time_series_data_iv = []  # Initialize list for IV data
    time_series_data_rv = []  # Initialize list for RV data
    
    date_str = start_date
    for date_str in nested_dict:
        date = datetime.strptime(date_str, "%Y-%m-%d").date()
        
        for udl in udl_list:
            if rv_type == 'RV':
                if rv_type in nested_dict[date_str][udl] and t in nested_dict[date_str][udl][rv_type]:
                    value = nested_dict[date_str][udl][rv_type][t]
                    time_series_data_rv.append((date, value))
            if strike_type == 'IV' or strike_type == 'IVFD':
                print(f"Accessing: {date_str} - {udl} ; {matu}m ; {strike_type}")
                if strike_type in nested_dict[date_str][udl]:
                    if matu in nested_dict[date_str][udl][strike_type]:
                        value = nested_dict[date_str][udl][strike_type][matu]
                        if param_level is None:
                            time_series_data_iv.append((date, value))
                        else:
                            value = value.get(param_level, np.nan)
                            time_series_data_iv.append((date, value))

    df_iv = pd.DataFrame(time_series_data_iv, columns=['Date', 'Value']).set_index('Date')
    df_iv = df_iv.ffill().bfill()

    df_rv = pd.DataFrame(time_series_data_rv, columns=['Date', 'Value']).set_index('Date')
    df_rv = df_rv.ffill().bfill()

    # Compute index spread
    rel = compute_spread(df_iv, df_rv, mode=mode)
    rel = rel.ffill().bfill().tail(1).T
    rel.index = udl_list
    
    # Compute rolling z-scores
    z_score = calculate_z_score(df_iv, start_date=start_date, end_date=end_date)
    z_score = z_score.ffill().bfill().tail(1).T
    z_score.index = udl_list
    
    # Concatenate both DataFrames
    df = pd.concat([z_score, rel / 100], axis=1)
    df.columns = ['IV z-score', 'IV / RV']
    df.index = df.index.tolist()  # Replacing mds_to_bbg with identity

    return df

In [32]:
# Adjust the sample DataFrame to include consistent 'RV' data
data_adjusted = {
    'udl': ['EU_STOXX50E'] * 12,
    'param': ['IV', 'IVFD', 'RV', 'spot', 'IV', 'IVFD', 'RV', 'spot', 'IV', 'IVFD', 'RV', 'RV'],
    'matu': [1.0, 1.0, 21.0, np.nan, 5.0, 5.0, 21.0, np.nan, 1.0, 1.0, 21.0, 21.0],
    'value': [24.48, 23.45, 21.0, 4701.68, 25.18, 24.47, 4600.48, np.nan, 25.84, 25.64, 22.0, 22.5],
    'date': ['2001-01-02', '2001-01-02', '2001-01-02', '2001-01-02', 
             '2001-01-03', '2001-01-03', '2001-01-03', '2001-01-03',
             '2001-01-04', '2001-01-04', '2001-01-04', '2001-01-04']
}
df_adjusted = pd.DataFrame(data_adjusted)
df_adjusted['date'] = pd.to_datetime(df_adjusted['date'])
# Convert adjusted DataFrame to nested dictionary
nested_dict_adjusted = df_to_nested_dict(df_adjusted)

### Print data

In [40]:
# Step 1: Define the nested dictionary
nested_dict = {
    '2001-01-02': {
        'EU_STOXX50E': {
            'IV': {1.0: 24.48},
            'IVFD': {1.0: 23.45},
            'RV': {21.0: 21.00},
            'spot': {None: 4701.68}
        }
    },
    '2001-01-03': {
        'EU_STOXX50E': {
            'IV': {5.0: 25.18},
            'IVFD': {5.0: 24.47},
            'RV': {21.0: 4600.48},
            'spot': {None: None}
        }
    },
    '2001-01-04': {
        'EU_STOXX50E': {
            'IV': {1.0: 25.84},
            'IVFD': {1.0: 25.64},
            'RV': {21.0: 22.50}
        }
    }
}

# Convert the nested dictionary back to a DataFrame
df_from_dict = nested_dict_to_df(nested_dict)

# Step 3: Print the DataFrame
print(df_from_dict)

         date          udl param  matu    value
0  2001-01-02  EU_STOXX50E    IV   1.0    24.48
1  2001-01-02  EU_STOXX50E  IVFD   1.0    23.45
2  2001-01-02  EU_STOXX50E    RV  21.0    21.00
3  2001-01-02  EU_STOXX50E  spot   NaN  4701.68
4  2001-01-03  EU_STOXX50E    IV   5.0    25.18
5  2001-01-03  EU_STOXX50E  IVFD   5.0    24.47
6  2001-01-03  EU_STOXX50E    RV  21.0  4600.48
7  2001-01-03  EU_STOXX50E  spot   NaN      NaN
8  2001-01-04  EU_STOXX50E    IV   1.0    25.84
9  2001-01-04  EU_STOXX50E  IVFD   1.0    25.64
10 2001-01-04  EU_STOXX50E    RV  21.0    22.50


### Parameters for the function

In [29]:
udl_list = ['EU_STOXX50E']
rv_type = 'RV'
strike_type = 'IV'
t = 21.0
matu = 1.0
start_date = '2001-01-01'
end_date = '2001-01-05'

### Run

In [30]:
# Run the corrected function
result_index_spread = compute_index_spread(nested_dict_adjusted, udl_list, rv_type, strike_type, t, matu, start_date, end_date)
result_index_spread

Accessing: 2001-01-02 -> EU_STOXX50E -> IV -> 1.0
Accessing: 2001-01-03 -> EU_STOXX50E -> IV -> 1.0
Accessing: 2001-01-04 -> EU_STOXX50E -> IV -> 1.0


Unnamed: 0,IV z-score,IV / RV
EU_STOXX50E,0.707107,0.011484


In [39]:
import pandas as pd

# Step 1: Define the nested dictionary
nested_dict = {
    '2001-01-02': {
        'EU_STOXX50E': {
            'IV': {1.0: 24.48},
            'IVFD': {1.0: 23.45},
            'RV': {21.0: 21.00},
            'spot': {None: 4701.68}
        }
    },
    '2001-01-03': {
        'EU_STOXX50E': {
            'IV': {5.0: 25.18},
            'IVFD': {5.0: 24.47},
            'RV': {21.0: 4600.48},
            'spot': {None: None}
        }
    },
    '2001-01-04': {
        'EU_STOXX50E': {
            'IV': {1.0: 25.84},
            'IVFD': {1.0: 25.64},
            'RV': {21.0: 22.50}
        }
    }
}

# Step 2: Convert the nested dictionary to a multi-indexed DataFrame
def nested_dict_to_multiindex_df(nested_dict):
    records = []
    for date, udl_data in nested_dict.items():
        for udl, params in udl_data.items():
            for param, matu_data in params.items():
                for matu, value in matu_data.items():
                    records.append((date, udl, param, matu, value))
    df = pd.DataFrame(records, columns=['date', 'udl', 'param', 'matu', 'value'])
    df['date'] = pd.to_datetime(df['date'])
    df.set_index(['date', 'udl', 'param', 'matu'], inplace=True)
    df = df.unstack(level=['udl', 'param'])
    return df

# Convert the nested dictionary back to a multi-indexed DataFrame
df_from_dict_multiindex = nested_dict_to_multiindex_df(nested_dict)

# Step 3: Print the multi-indexed DataFrame
(df_from_dict_multiindex)


Unnamed: 0_level_0,Unnamed: 1_level_0,value,value,value,value
Unnamed: 0_level_1,udl,EU_STOXX50E,EU_STOXX50E,EU_STOXX50E,EU_STOXX50E
Unnamed: 0_level_2,param,IV,IVFD,RV,spot
date,matu,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3
2001-01-02,,,,,4701.68
2001-01-02,1.0,24.48,23.45,,
2001-01-02,21.0,,,21.0,
2001-01-03,,,,,
2001-01-03,5.0,25.18,24.47,,
2001-01-03,21.0,,,4600.48,
2001-01-04,1.0,25.84,25.64,,
2001-01-04,21.0,,,22.5,
