In [None]:
import pandas as pd
import numpy as np


In [None]:
input_file = r"C:\\Users\\Elmeri\\OneDrive - Aalto University\\Gradu oscun kaa\\DATA\\Hinta ja return data\\Financial Turbulence\\Financial turbulence.xlsx"
sheet_name = "Prices"
output_file = r"C:\\Users\\Elmeri\\OneDrive - Aalto University\\Gradu oscun kaa\\DATA\\Hinta ja return data\\ft_outputs.xlsx"

# Read the Excel sheet
price_data = pd.read_excel(input_file, sheet_name=sheet_name, header=0)  # Read the first row as headers

# Drop the second row (codes)
price_data = price_data.drop(index=1)

In [None]:
# Replace commas with dots and convert numeric columns
price_data = price_data.replace({",": "."}, regex=True)
for col in price_data.columns[1:]:  # Skip the Timestamp column
    price_data[col] = pd.to_numeric(price_data[col], errors='coerce')

# Calculate returns for equity indexes (price changes)
def calculate_returns(df):
    return df.pct_change()  # Calculate percentage change (returns)

equity_columns = price_data.columns[1:-2]  # All columns except Timestamp and the last two (bond yields)
bond_yield_columns = price_data.columns[-2:]  # Last two columns are bond yields
returns_data = calculate_returns(price_data[equity_columns])

# Convert bond yields to returns (difference between consecutive yields)
def convert_yields_to_returns(df):
    return df.diff()

bond_returns = convert_yields_to_returns(price_data[bond_yield_columns])



In [None]:
# Combine equity returns and bond returns
combined_data = pd.concat([returns_data, bond_returns], axis=1)
combined_data.insert(0, "Timestamp", price_data.iloc[1:, 0])  # Add the timestamp column back

# Format the date column
def format_date_column(df, column_name):
    df[column_name] = pd.to_datetime(df[column_name]).dt.strftime('%d.%m.%Y')

format_date_column(combined_data, "Timestamp")

# Drop rows with NaN values (required for covariance matrix calculation)
returns_clean = combined_data.dropna()

# Save renamed returns to the first sheet
with pd.ExcelWriter(output_file, engine="openpyxl") as writer:
    combined_data.to_excel(writer, index=False, sheet_name="Returns")


combined_data.loc[returns_clean.index, "Turbulence"] = calculate_turbulence(returns_only)

In [None]:
# Calculate financial turbulence
def calculate_turbulence(data):
    results = []
    mu = data.mean()
    covariance_matrix = data.cov()
    try:
        covariance_matrix_inv = np.linalg.inv(covariance_matrix)
    except np.linalg.LinAlgError:
        covariance_matrix_inv = np.linalg.pinv(covariance_matrix)  # Use pseudo-inverse if singular

    for index, row in data.iterrows():
        yt = row - mu
        turbulence = float(yt.T @ covariance_matrix_inv @ yt)
        results.append(turbulence)
    return results

# Subset the returns columns (excluding Timestamp)
returns_only = returns_clean.iloc[:, 1:]

# Calculate turbulence
combined_data["Turbulence"] = np.nan  # Initialize turbulence column

In [None]:
# Save intermediate outputs: correlation matrix, mean, covariance matrix
correlation_matrix = returns_only.corr()
mu = returns_only.mean()
covariance_matrix = returns_only.cov()

with pd.ExcelWriter(output_file, engine="openpyxl", mode="a") as writer:
    correlation_matrix.to_excel(writer, index=True, sheet_name="Correlations")
    mu.to_excel(writer, index=True, header=["Mean"], sheet_name="Mean")
    covariance_matrix.to_excel(writer, index=True, sheet_name="Covariance Matrix")

# Save turbulence data to the final sheet
turbulence_output = combined_data[["Timestamp", "Turbulence"]]
with pd.ExcelWriter(output_file, engine="openpyxl", mode="a") as writer:
    turbulence_output.to_excel(writer, index=False, sheet_name="Turbulence")

print(f"Financial turbulence and intermediate steps saved to {output_file}.")