In [2]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.covariance import LedoitWolf
import cvxpy as cvx


In [11]:
"""
    Downloading historical opening and closing prices for the equities. Open and Close is used since trading is done by buying at open and selling as close to close as possible. For this reason, returns will be calculated in a way that reflects this rule.
"""
# 48 largest companies in Sweden
"""
tickers = [
    'INVE-B.ST', 'ATCO-B.ST', 'VOLV-B.ST', 'ASSA-B.ST', 'EQT.ST',
    'SEB-A.ST', 'ERIC-B.ST', 'HEXA-B.ST', 'SAND.ST', 'HM-B.ST',
    'SWED-A.ST', 'EPI-A.ST', 'SHB-A.ST', 'ESSITY-B.ST', 'ALFA.ST',
    'EVO.ST', 'LATO-B.ST', 'INDU-A.ST', 'LIFCO-B.ST',
    'LUND-B.ST', 'SAAB-B.ST', 'TELIA.ST', 'AKEL-D.ST',
    'SOBI.ST', 'INDT.ST', 'SCA-B.ST', 'SKF-B.ST', 'SKA-B.ST',
    'BEIJ-B.ST', 'ALIV-SDB.ST', 'SAVE.ST', 'HOLM-B.ST', 'AAK.ST',
    'SWEC-B.ST', 'SECT-B.ST', 'SSAB-B.ST', 'AXFO.ST', 'GETI-B.ST',
    'SAGA-B.ST', 'VOLCAR-B.ST', 'NDA-SE.ST', 'KINV-B.ST', 'BOL.ST',
    'FABG.ST', 'BALD-B.ST', 'CAST.ST', 'PEAB-B.ST', 'NCC-B.ST'
]
"""
tickers = [
    'INVE-B.ST', 'ATCO-B.ST', 'VOLV-B.ST', 'ASSA-B.ST',
    'SEB-A.ST']

# Analysis period, can easily be changed
start_date = "2010-01-01"
end_date = "2025-05-23"

# Download historical opening and closing prices
price_close = yf.download(tickers, start=start_date, end=end_date)['Close']
price_open = yf.download(tickers, start=start_date, end=end_date)['Open']

# Calculation of historical returns for closing prices and a intraday intepolation of prices.
simple_returns_close = price_close[tickers].pct_change()
log_returns_close = np.log(price_open / price_open.shift(1))

[*********************100%***********************]  5 of 5 completed
[*********************100%***********************]  5 of 5 completed

Ticker
INVE-B.ST    0
ATCO-B.ST    0
VOLV-B.ST    0
ASSA-B.ST    0
SEB-A.ST     0
dtype: int64
Ticker      INVE-B.ST  ATCO-B.ST  VOLV-B.ST  ASSA-B.ST  SEB-A.ST
Date                                                            
2010-01-05   0.005174  -0.001579   0.025336   0.005674 -0.004168
2010-01-07  -0.002941  -0.003688   0.015444  -0.020452  0.015418
2010-01-08   0.001475   0.003173   0.003042   0.018719  0.035792
2010-01-11   0.000000   0.013179   0.014405   0.009894  0.011728
2010-01-12  -0.009573  -0.013007  -0.031390  -0.044787 -0.008694
...               ...        ...        ...        ...       ...
2025-05-16  -0.004586   0.002072  -0.007557   0.005784  0.008440
2025-05-19   0.005290  -0.001723   0.001088  -0.003834  0.004650
2025-05-20   0.005772  -0.002071   0.000362   0.001924  0.009873
2025-05-21  -0.013671  -0.011415  -0.006879  -0.012484 -0.007638
2025-05-22  -0.031314  -0.024493  -0.021509  -0.019449 -0.023399

[3867 rows x 5 columns]
Ticker
INVE-B.ST   2010-01-04
ATCO-




In [None]:
# Simple code for handling stocks with insufficient amount of historical data, probably more than enough.   
def create_window_and_valid_tickers(returns, current_date):
    window = returns.loc[current_date - pd.DateOffset(years=4):current_date]
    valid_data = window.dropna(axis=1)  # remove stocks with any missing values
    return valid_data

"""
    GPT generated code for analyzing gaps in historical prices.
"""
# This function might be overkill
def analyze_return_gaps(price_df):
    summary = {}

    for ticker in price_df.columns:
        series = price_df[ticker]
        first_valid = series.first_valid_index()
        post_ipo_series = series.loc[first_valid:]
        missing = post_ipo_series.isna()

        gap_lengths = []
        current_gap = 0

        for val in missing:
            if val:
                current_gap += 1
            elif current_gap > 0:
                gap_lengths.append(current_gap)
                current_gap = 0

        # If gap at end
        if current_gap > 0:
            gap_lengths.append(current_gap)

        summary[ticker] = {
            'first_valid': first_valid,
            'total_days': len(post_ipo_series),
            'missing_days': missing.sum(),
            'missing_pct': 100 * missing.sum() / len(post_ipo_series),
            'gap_count': len(gap_lengths),
            'max_gap': max(gap_lengths) if gap_lengths else 0,
            'avg_gap': sum(gap_lengths) / len(gap_lengths) if gap_lengths else 0
        }

    return pd.DataFrame(summary).T.sort_values(by='missing_pct', ascending=False)


In [10]:
"""
    This section handles the different covariance models. Will include;
    1.) Sample covariance with .cov() method, implemented directly in the portfolio optimization methods
    2.) Sample covariance with Ledoit-Wolff Shrinkage model from sklearn
    3.) EWMA based covariance matrix
    4.) DCC-GARCH based covariance matrix

    For the estimation of covariance matrices, returns should be filtered so that they contain enough historical values. For this implementation, 4 years of data will be used to estimate covariance matrices, stocks with >5% missing values will be dropped for EWMA and >1% for DCC-GARCH.
"""

def ledoit_wolff_covariance(returns):
    model = LedoitWolf()
    cov_matrix = model.fit(returns).covariance_
    return cov_matrix

def ewma_covariance(returns):
    lambda_ = 0.94
    # Initialize the covariance matrix (using the first row of returns)
    cov_matrix = returns.iloc[0].to_frame().dot(returns.iloc[0].to_frame().T)
    
    # Iteratively calculate EWMA covariance
    for t in range(1, len(returns)):
        r_t = returns.iloc[t].to_frame()
        cov_matrix = lambda_ * cov_matrix + (1 - lambda_) * r_t.dot(r_t.T)
    
    return cov_matrix.values

# TODO: Create DCC-GARCH method

''

In [None]:
"""
    Portfolio optimization models.

    1.) Markowitz max returns with linear costs
    2.) Markowitz min variance
    3.) ERC - Equal Risk Contributions
    4.) HRP - Hiearchical Risk Parity
"""