In [None]:
#----------------------------------------------------------------------------------------------------------------#
#:<->:<->:<->:<->:<->:<->:<->:<->:<->:<->:<->:<->:<->:<->:<->:<->:<->:<->:<->:<->:<->:<->:<->:<->:<->:<->:<->:<->#
#                                                                                                                #
# Regression & Time Series Models                                             ########   ########                #
#                                                                            ##         ##                       #
# Regression using CAPM model                                                ##   ####  ##                       #
#                                                                            ##     ##  ##                       #
# 30/10/2024                                                                  ########   ########                #
#                                                                                                                #
#:<->:<->:<->:<->:<->:<->:<->:<->:<->:<->:<->:<->:<->:<->:<->:<->:<->:<->:<->:<->:<->:<->:<->:<->:<->:<->:<->:<->#
#----------------------------------------------------------------------------------------------------------------#

Import required libraries

In [None]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt

### Assignment 1

In [None]:
# Set time range of 24 years
t = pd.date_range(start='31/10/2000', end='31/10/2024', freq='ME')

# Define column names for the equities' total return indices
equity_TRI_columns = [
    'JOHNSON & JOHNSON - TOT RETURN IND',
    'BOSTON SCIENTIFIC - TOT RETURN IND',
    'ELI LILLY - TOT RETURN IND',
    'PFIZER - TOT RETURN IND', 
    'TELEFLEX - TOT RETURN IND',
    'CIGNA - TOT RETURN IND',
    'REVVITY - TOT RETURN IND',
    'MEDTRONIC - TOT RETURN IND', 
    'LABCORP HOLDINGS - TOT RETURN IND',
    'HUMANA - TOT RETURN IND'
    ]

equity_MV_columns = [
    'JOHNSON & JOHNSON - MARKET VALUE',
    'BOSTON SCIENTIFIC - MARKET VALUE',
    'ELI LILLY - MARKET VALUE',
    'PFIZER - MARKET VALUE', 
    'TELEFLEX - MARKET VALUE',
    'CIGNA - MARKET VALUE',
    'REVVITY - MARKET VALUE',
    'MEDTRONIC - MARKET VALUE', 
    'LABCORP HOLDINGS - MARKET VALUE',
    'HUMANA - MARKET VALUE'
    ]

In [None]:
# Download risk free interest rates (skipping rows 1,2 because they are not actual market values, rather additional information)
rF_yearly = pd.read_excel('INTEREST_RATE.xlsx', usecols=['US FEDERAL FUNDS RATE (MONTHLY AVERAGE) NADJ'], skiprows=[1, 2], header=0)

# Verify there are no NaN values in the DataFrame
assert not rF_yearly.isnull().values.any(), "DataFrame contains NaN values"

# Adding a missing value for October 2024, required since other files contain this data. 
# Source: https://www.federalreserve.gov/releases/h15/
rF_yearly.loc[len(rF_yearly)] = 4.92

#calculation of the monthly rates
rF = np.array(rF_yearly / 12)

rF_yearly

In [None]:
# Download overall equity market index
health_MKT = pd.read_excel('STOXX_HEALTH_PI.xlsx', usecols=['STOXX NTH AMER 600 HEALTH CARE E - PRICE INDEX'])

# Verify there are no NaN values in the DataFrame
assert not health_MKT.isnull().values.any(), "DataFrame contains NaN values"

# Conversion in percentual montlhy variance
rMKT = 100 * (np.log(health_MKT) - np.log(health_MKT.shift(1)))

# Drop the first row (since log returns are computed as difference between consecutive data, first row will be NaN)
# Calculation of excess returns (market return-riskfree)
rMKT = np.array(rMKT[1:])
eMKT = np.subtract(rMKT, rF[1:])

eMKT



In [None]:
# Download market value of constituents
mkt_value = pd.read_excel('MKT_VALUE.xlsx', usecols=equity_MV_columns, skiprows=[1, 2], header=0)

# Verify there are no NaN values in the DataFrame
assert not mkt_value.isnull().values.any(), "DataFrame contains NaN values"

mkt_value

In [None]:
# Load all equities in a single DataFrame
data = pd.read_excel('Stocks_Constituents.xlsx', usecols=equity_TRI_columns)

# (Optional) Print the dataframe including all downloaded data
# data

### Assignment 2

In [None]:
# Calculate the log returns for all equities
log_returns = 100 * (np.log(data) - np.log(data.shift(1)))

# Drop the first row (since log returns are computed as difference between consecutive data, first row will be NaN)
log_returns = log_returns.iloc[1:]

# Verify there are no other NaN values in the DataFrame
assert not log_returns.isnull().values.any(), "DataFrame contains NaN values"

# (Optional) Print the dataframe including all downloaded data
log_returns
