In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.colors as pc
from modified_spectral_method import *
from modified_louvain_method import *
from proxy_methods_final import *
import cvxpy as cp
import copy
import matplotlib.pyplot as plt

In [56]:
metadata = pd.read_csv('data/metadata.csv')
reshaped_data = pd.read_csv('data/reshaped_data.csv')

### (i)
Remove all ‘CCC-’ and ‘D’-rated entities since their
spreads contain a large percentage of outliers which
makes our analysis inaccurate. Furthermore, for these
close-to-default entities, banks might decide to use a special methodology instead of the standard proxy method.

In [59]:
# Remove all ratings below CCC- ( AverageRating > 19 )
filtered_metadata = metadata[metadata['AverageRating'] <= 19]

# Extract the tickers from the metadata
valid_tickers = filtered_metadata['Ticker']

# Filter the reshaped_data to only keep columns with tickers in metadata, including the "Date" column
filtered_reshaped_data = reshaped_data[['Date'] + [col for col in reshaped_data.columns if col in valid_tickers.values]]

In [65]:
filtered_reshaped_data.shape

(2515, 318)

### (iv)
Remove the quotes from the ‘Government’ sector since
sovereign CDS’s have been studied independently in
the literature

In [67]:
# Remove "Government" sector tickers from filtered_metadata
filtered_metadata = filtered_metadata[filtered_metadata['Sector'] != 'Government']

# Update the list of valid tickers
valid_tickers = filtered_metadata['Ticker']

# Filter the reshaped_data again to only keep columns with tickers in the updated metadata, including the "Date" column
filtered_reshaped_data = reshaped_data[['Date'] + [col for col in reshaped_data.columns if col in valid_tickers.values]]


In [69]:
filtered_reshaped_data.shape

(2515, 298)

### (vii)
Remove quotes for entities which are above 1000 basis
points. This allows us to have a more accurate estimate of
errors in proxying, and similar to ‘CCC-’ and ‘D’-rated
entities, banks might decide to use a special methodology instead of the standard proxy method for these
entities.

In [75]:
# Remove quotes for entities with values above 1000 basis points in the reshaped data
# Excluding the 'Date' column, apply the filtering condition
filtered_reshaped_data = filtered_reshaped_data.copy()
for column in filtered_reshaped_data.columns[1:]:  # Exclude 'Date'
    filtered_reshaped_data[column] = filtered_reshaped_data[column].where(filtered_reshaped_data[column] <= 1000)

# Count the total number of NaN values in the filtered dataset
# ( Values above 1000 are turned into NaNs )
nan_count = filtered_reshaped_data.isna().sum().sum()

nan_count

6426

In [77]:
# Remove columns that contain any NaN values
filtered_reshaped_data = filtered_reshaped_data.dropna(axis=1)

In [79]:
filtered_reshaped_data.shape

(2515, 271)

In [81]:
# Extract the list of tickers from the final filtered reshaped data (excluding the 'Date' column)
final_tickers = set(filtered_reshaped_data_no_nans.columns[1:])

# Filter the metadata to include only tickers that are in the final reshaped data
filtered_metadata_final = filtered_metadata[filtered_metadata['Ticker'].isin(final_tickers)]

# Display the filtered metadata
print(filtered_metadata_final)


          Ticker                          ShortName             Sector  \
1            ABE        Abertis Infraestructuras SA        Industrials   
2         ABHLTD           Alliance Boots Hldgs Ltd  Consumer Services   
3          ACAFP                     Cr Agricole SA         Financials   
4      ACAFP-CIB  Cr Agricole Corporate and Invt Bk         Financials   
5          ACCOR                              ACCOR  Consumer Services   
..           ...                                ...                ...   
312         WENL                             WENDEL         Financials   
314       WOLKLU                 Wolters Kluwer N V  Consumer Services   
315  WPPGRP-2005                       WPP 2005 Ltd  Consumer Services   
316       YORPOW               Yorkshire Pwr Gp Ltd          Utilities   
317        ZINCO                  Zurich Ins Co Ltd         Financials   

     Region         Country  AverageRating  
1    Europe           Spain              9  
2    Europe  United K

In [83]:
# Export the final filtered metadata to a CSV file
filtered_metadata_final.to_csv('data/new_metadata.csv', index=False)

# Export the final filtered reshaped data to a CSV file
filtered_reshaped_data_no_nans.to_csv('data/new_reshaped_data.csv', index=False)