In [1]:
import pandas as pd
import numpy as np
import os
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.colors as pc
from modified_spectral_method import recursive_spectral_method, create_correlation_matrix, calculate_C_g
from modified_louvain_method import *
from itertools import product
from collections import Counter

# Transform data
---

In [2]:
df_cds_data = pd.read_csv('reshaped_eur_data.csv') #The raw original cds eur data (reshaped)
df_itraxx = pd.read_csv('ITRAXX-Europe Timeseries 20241127.csv')

In [3]:
print("cds data shape: ", df_cds_data.shape)
print("itraxx data shape: ", df_itraxx.shape)

cds data shape:  (2515, 319)
itraxx data shape:  (5221, 7)


In [4]:
#Rename date column to Date
df_itraxx.rename(columns={'AsOf':'Date'}, inplace=True)

In [5]:
# Convert the 'AsOf' column to a datetime object
df_itraxx['Date'] = pd.to_datetime(df_itraxx['Date'], format='%d/%b/%y')

# Sort the DataFrame by the 'AsOf' column in descending order by day
df_itrax_sorted = df_itraxx.sort_values(by='Date', ascending=True)
df_itrax_sorted

Unnamed: 0,Date,Batch,Tenor,Ticker,Series,Version,ConvSpread
5220,2004-10-05,EOD,5Y,ITRAXX-Europe,2,1,35.55
5219,2004-11-19,EOD,5Y,ITRAXX-Europe,2,1,36.88
5218,2004-11-22,EOD,5Y,ITRAXX-Europe,2,1,37.25
5217,2004-11-23,EOD,5Y,ITRAXX-Europe,2,1,36.94
5216,2004-11-24,EOD,5Y,ITRAXX-Europe,2,1,36.84
...,...,...,...,...,...,...,...
4,2024-11-20,EOD,5Y,ITRAXX-Europe,42,1,56.59
3,2024-11-21,EOD,5Y,ITRAXX-Europe,42,1,56.81
2,2024-11-22,EOD,5Y,ITRAXX-Europe,42,1,57.55
1,2024-11-25,EOD,5Y,ITRAXX-Europe,42,1,57.30


In [9]:
#Remove rows with missing values
df_itraxx_sorted = df_itrax_sorted.dropna()
df_itraxx_sorted

Unnamed: 0,Date,Batch,Tenor,Ticker,Series,Version,ConvSpread
5220,2004-10-05,EOD,5Y,ITRAXX-Europe,2,1,35.55
5219,2004-11-19,EOD,5Y,ITRAXX-Europe,2,1,36.88
5218,2004-11-22,EOD,5Y,ITRAXX-Europe,2,1,37.25
5217,2004-11-23,EOD,5Y,ITRAXX-Europe,2,1,36.94
5216,2004-11-24,EOD,5Y,ITRAXX-Europe,2,1,36.84
...,...,...,...,...,...,...,...
4,2024-11-20,EOD,5Y,ITRAXX-Europe,42,1,56.59
3,2024-11-21,EOD,5Y,ITRAXX-Europe,42,1,56.81
2,2024-11-22,EOD,5Y,ITRAXX-Europe,42,1,57.55
1,2024-11-25,EOD,5Y,ITRAXX-Europe,42,1,57.30


In [10]:
#Filter the Itraxx data to only include the dates that are in the eur_data_standardized_returns
#---------------------------------------------------------------------------------------------------
df_itraxx_filtered = df_itraxx_sorted[df_itraxx_sorted['Date'].isin(df_cds_data['Date'])]
print("filtered itraxx data shape: ", df_itraxx_filtered.shape)
#Take log returns
#---------------------------------------------------------------------------------------------------

# Calculate the log returns for the CDS spread
df_itraxx_filtered['Log_Return'] = np.log(df_itraxx_filtered['ConvSpread'] / df_itraxx_filtered['ConvSpread'].shift(1))

# Drop NaN values that occur due to the first row (no previous value to compare)
df_itraxx_log_returns = df_itraxx_filtered.dropna()
# Standardize the log returns (Z-score normalization)
log_return_mean = df_itraxx_log_returns['Log_Return'].mean()
log_return_std = df_itraxx_log_returns['Log_Return'].std()
df_itraxx_log_returns['Standardized_Log_Return'] = (df_itraxx_log_returns['Log_Return'] - log_return_mean) / log_return_std

# Display the filtered DataFrame
df_itraxx_log_returns


filtered itraxx data shape:  (2491, 7)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_itraxx_filtered['Log_Return'] = np.log(df_itraxx_filtered['ConvSpread'] / df_itraxx_filtered['ConvSpread'].shift(1))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_itraxx_log_returns['Standardized_Log_Return'] = (df_itraxx_log_returns['Log_Return'] - log_return_mean) / log_return_std


Unnamed: 0,Date,Batch,Tenor,Ticker,Series,Version,ConvSpread,Log_Return,Standardized_Log_Return
3701,2010-09-15,EOD,5Y,ITRAXX-Europe,13,1,105.58,0.005509,0.181407
3700,2010-09-16,EOD,5Y,ITRAXX-Europe,13,1,106.28,0.006608,0.217123
3699,2010-09-17,EOD,5Y,ITRAXX-Europe,13,1,106.96,0.006378,0.209641
3698,2010-09-20,EOD,5Y,ITRAXX-Europe,14,1,110.97,0.036805,1.197995
3697,2010-09-21,EOD,5Y,ITRAXX-Europe,14,1,109.85,-0.010144,-0.327034
...,...,...,...,...,...,...,...,...,...
1192,2020-04-28,EOD,5Y,ITRAXX-Europe,33,1,81.58,0.005778,0.190153
1191,2020-04-29,EOD,5Y,ITRAXX-Europe,33,1,78.02,-0.044619,-1.446868
1190,2020-04-30,EOD,5Y,ITRAXX-Europe,33,1,80.58,0.032285,1.051184
1189,2020-05-01,EOD,5Y,ITRAXX-Europe,33,1,83.58,0.036554,1.189836


In [None]:
#Check for anymore rows with NaN values
nan_rows = df_itraxx_log_returns[df_itraxx_log_returns.isnull().any(1)]
nan_rows

  nan_rows = df_itraxx_log_returns[df_itraxx_log_returns.isnull().any(1)]


Unnamed: 0,Date,Batch,Tenor,Ticker,Series,Version,ConvSpread,Log_Return,Standardized_Log_Return


In [14]:
# Create a new DataFrame with only 'Date' and 'Standardized_Log_Return'
df_itraxx_standardised_returns= df_itraxx_log_returns[['Date', 'Standardized_Log_Return']]

# Rename the 'Standardized_Log_Return' column to 'ITRAXX-Europe'
df_itraxx_standardised_returns.rename(columns={'Standardized_Log_Return': 'ITRAXX-Europe'}, inplace=True)

df_itraxx_standardised_returns.to_csv('data/standardized_itraxx_returns.csv', index=False)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_itraxx_standardised_returns.rename(columns={'Standardized_Log_Return': 'ITRAXX-Europe'}, inplace=True)


# Calculate correlations
---

In [18]:
#Get average community time series saved previously
df_community_returns_average = pd.read_csv('community_returns_average.csv')

In [None]:
#Convert the date columns to datetime objects
df_community_returns_average['Date'] = pd.to_datetime(df_community_returns_average['Date'])
df_itraxx_standardised_returns['Date'] = pd.to_datetime(df_itraxx_standardised_returns['Date'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_itraxx_standardised_returns['Date'] = pd.to_datetime(df_itraxx_standardised_returns['Date'])


In [None]:
#Filter the community returns to only include the dates that are in the eur_data_standardized_returns
df_community_returns_average_filtered = df_community_returns_average[df_community_returns_average['Date'].isin(df_itraxx_standardised_returns['Date'])]
df_community_returns_average_filtered

Unnamed: 0,Date,Community 1,Community 2,Community 3
0,2010-09-15,0.013052,0.007300,0.004545
1,2010-09-16,-0.005645,-0.009831,0.000546
2,2010-09-17,-0.000017,-0.007869,-0.001524
3,2010-09-20,0.023052,0.003669,0.002373
4,2010-09-21,-0.007649,-0.019731,-0.001270
...,...,...,...,...
2509,2020-04-28,-0.028556,-0.006785,-0.009186
2510,2020-04-29,-0.009492,-0.001260,-0.001900
2511,2020-04-30,-0.012537,-0.018167,-0.005807
2512,2020-05-01,0.024110,0.015664,0.005099


In [34]:
#Exclude the date column from both dataframes
df_community_returns_average_filtered = df_community_returns_average_filtered.drop(columns=['Date'])
df_itraxx_standardised_returns = df_itraxx_standardised_returns.drop(columns=['Date'])

In [39]:
#Calculate correlations
correlations = []
for community_average_time_series in df_community_returns_average_filtered.columns:
    correlation = df_community_returns_average_filtered[community_average_time_series].corr(df_itraxx_standardised_returns['ITRAXX-Europe'])
    print("Correlation between ", community_average_time_series, " and ITRAXX-Europe: ", correlation)
    correlations.append(correlation)

Correlation between  Community 1  and ITRAXX-Europe:  -0.00941392836811746
Correlation between  Community 2  and ITRAXX-Europe:  -0.003231888409646981
Correlation between  Community 3  and ITRAXX-Europe:  0.039201267121895984


In [55]:
#Plot correlations between community average time series and ITRAXX-Europe as a bar chart
# Define a list of neon colors
neon_colors = ['#39FF14', '#FF073A', '#0FF0FC', '#FFFB00', '#FF00FF', '#00FF00', '#00FFFF', '#FF4500']

# Ensure the number of colors matches the number of bars
num_bars = len(df_community_returns_average_filtered.columns)
colors = (neon_colors * (num_bars // len(neon_colors) + 1))[:num_bars]

# Plot correlations between community average time series and ITRAXX-Europe as a bar chart
fig = go.Figure(data=[go.Bar(x=df_community_returns_average_filtered.columns, y=correlations, marker_color=colors)])
fig.update_layout(
    title={
        'text': 'Correlations between Community Average Time Series and ITRAXX-Europe',
        'font': {
            'size': 15  # Adjust the font size of the title
        },
        'x': 0.5,  # Center the title
        'xanchor': 'center'
    },
    plot_bgcolor='black',  # Set the plot background color to black
    paper_bgcolor='black',  # Set the paper background color to black
    font=dict(color='white'),  # Set the font color to white
    xaxis=dict(
        gridcolor='white'  # Set the x-axis gridline color to white
    ),
    yaxis=dict(
        gridcolor='white'  # Set the y-axis gridline color to white
    ),
    width=600,
)
fig.show()