### Setup

In [None]:
# Import required libraries 

import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm

In [None]:
# Set time range of 24 years
t = pd.date_range(start='31/10/2000', end='31/10/2024', freq='ME')

# Define column names for the equities
equity_columns = [
    'JOHNSON & JOHNSON',
    'BOSTON SCIENTIFIC',
    'ELI LILLY',
    'PFIZER', 
    'TELEFLEX',
    'CIGNA',
    'REVVITY',
    'MEDTRONIC', 
    'LABCORP HOLDINGS',
    'HUMANA'
    ]
    
# Generate lists for total return indices and market value indices
equity_TRI_columns = [f"{name} - TOT RETURN IND" for name in equity_columns]
equity_MV_columns  = [f"{name} - MARKET VALUE"   for name in equity_columns]

### Assignment 1

In [None]:
#-----------------------------------------------------------------------------------------------------------------------------------#
# Interest rates

# Download risk free interest rates (skipping rows 1,2 because they are not actual market values but additional information)
rF_yearly = pd.read_excel('INTEREST_RATE.xlsx', usecols=['US FEDERAL FUNDS RATE (MONTHLY AVERAGE) NADJ'], skiprows=[1, 2], header=0)

# Verify there are no NaN values in the DataFrame
assert not rF_yearly.isnull().values.any(), "DataFrame contains NaN values"

# Adding a missing value for October 2024, required since other files contain this data. 
# Source: https://www.federalreserve.gov/releases/h15/
rF_yearly.loc[len(rF_yearly)] = 4.92

# Compute monthly rates
rF = np.array(rF_yearly / 12)

#-----------------------------------------------------------------------------------------------------------------------------------#
# Market indices

# Download overall equity market index
health_MKT = pd.read_excel('STOXX_HEALTH_PI.xlsx', usecols=['STOXX NTH AMER 600 HEALTH CARE E - PRICE INDEX'])

# Verify there are no NaN values in the DataFrame
assert not health_MKT.isnull().values.any(), "DataFrame contains NaN values"

# Conversion in percentual montlhy variance
rMKT = 100 * (np.log(health_MKT) - np.log(health_MKT.shift(1)))

# Drop the first row (since log returns are computed as difference between consecutive data, first row will be NaN)
# Calculation of Excess Market Returns from risk-free Market Returns
rMKT = np.array(rMKT[1:])
eMKT = np.subtract(rMKT, rF[1:])

#-----------------------------------------------------------------------------------------------------------------------------------#
# Market value of constituents

# Download market value of constituents
mkt_value = pd.read_excel('MKT_VALUE.xlsx', usecols=equity_MV_columns, skiprows=[1, 2], header=0)

# Verify there are no NaN values in the DataFrame
assert not mkt_value.isnull().values.any(), "DataFrame contains NaN values"

#-----------------------------------------------------------------------------------------------------------------------------------#
# Equities

# Download equities
data = pd.read_excel('Stocks_Constituents.xlsx', usecols=equity_TRI_columns)

# Verify there are no NaN values in the DataFrame
assert not data.isnull().values.any(), "DataFrame contains NaN values"

### Assignment 2

In [None]:
# Calculate the log returns for all equities
log_returns = 100 * (np.log(data) - np.log(data.shift(1)))

# Drop the first row (since log returns are computed as difference between consecutive data, first row will be NaN)
log_returns = log_returns.iloc[1:]

# Verify there are no other NaN values in the DataFrame
assert not log_returns.isnull().values.any(), "DataFrame contains NaN values"

# Broadcasting rF across columns in log_return to create the result dataframe
eEquities = log_returns.sub(rF[1:], axis=0)

#-----------------------------------------------------------------------------------------------------------------------------------#
# Create svatter plots

# Get number of equities
num_equities = eEquities.shape[1]  

# Calculate the number of rows needed with two plots per row (creating an extra row if there's an odd number of equities)
num_rows  = (num_equities + 1) // 2  

# Create the figure and axis grid
fig, axes = plt.subplots(num_rows, 2, figsize=(14, 4 * num_rows))
# Flatten the grid for easy indexing
axes      = axes.flatten() 

# Loop through each equity column and create a scatter plot
for i, column in enumerate(eEquities.columns):
    axes[i].scatter(eEquities[column], eMKT, alpha=0.5, color='m')
    axes[i].set_title(equity_columns[i])
    axes[i].set_xlabel('Stock excess returns [%]')
    axes[i].set_ylabel('Market excess returns [%]')

# Hide any unused subplots if the number of equities is odd
for j in range(i + 1, len(axes)):
    axes[j].axis('off')

# Make sure the layout is pretty
plt.tight_layout()
plt.show()

### Assignment 3

In [None]:
# Adds an intercept (constant) term to eMKT
eMKT_with_const   = sm.add_constant(eMKT)

# Perform regressions
regression_models = [sm.OLS(eEquities[column], eMKT_with_const).fit() for column in eEquities.columns]

# Define the file name
output_file       = "regression_summaries.txt"

# Open the file in write mode
with open(output_file, "w") as f:
    for i, model in enumerate(regression_models):
        # Write the equity name and the summary to the file
        f.write(f"Summary for {eEquities.columns[i]}:\n")
        # Convert the summary to a string format
        f.write(model.summary().as_text())
        # Separator between summaries to improve readability
        f.write("\n\n" + "<->:"*24 + "\n\n")  

print(f"Regression summaries have been saved to {output_file}")


Regression summaries have been saved to regression_summaries.txt
