In [1]:
# Standard library imports
import os
import sys
import copy
from typing import Union, Optional

# Third party imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import empyrical as ep

# Add the project root directory to Python path
project_root = os.path.dirname(os.path.dirname(os.getcwd()))   #<Change this path if needed>
src_path = os.path.join(project_root, 'qpmwp-course\\src')    #<Change this path if needed>
sys.path.append(project_root)
sys.path.append(src_path)

# Local modules imports
from helper_functions import load_data_spi, load_pickle
from estimation.covariance import Covariance
from estimation.expected_return import ExpectedReturn
from optimization.optimization import Optimization, Objective, MeanVariance
from optimization.optimization_data import OptimizationData
from optimization.constraints import Constraints
from backtesting.backtest_item_builder_classes import (
    SelectionItemBuilder,
    OptimizationItemBuilder,
)
from backtesting.backtest_item_builder_functions import (
    bibfn_selection_min_volume,
    bibfn_selection_gaps,
    bibfn_return_series,
    bibfn_budget_constraint,
    bibfn_box_constraints,
    bibfn_size_dependent_upper_bounds,
)
from backtesting.backtest_data import BacktestData
from backtesting.backtest_service import BacktestService
from backtesting.backtest import Backtest

In [2]:
# Load data
path_to_data = "C:/Users/admin/OneDrive/Dokumente/Studium UZH/Master/5. Semester/Quantitative Portfolio Management with Python/Assingments/4. Assingment/1. Daten erhalten/" # <change this if necessary>

# Load market and jkp data from parquet files
market_data = pd.read_parquet(path = f'{path_to_data}market_data.parquet')

# Instantiate the BacktestData class
# and set the market data and jkp data as attributes
data = BacktestData()
data.market_data = market_data
data.bm_series = load_data_spi(path='../data/')  # <change this if necessary>
# "load_data_spi" is a function defined in the class "helper_functions" that loads the SPI index data.
print("Market data:")
print(data.market_data.head())
print(data.market_data.tail())
print("Benchmark series (SPI Index):")
print(data.bm_series.head())
print(data.bm_series.tail())

Market data:
               price        mktcap  liquidity sector
date       id                                       
1999-05-06 1   461.0  1.235952e+09   129080.0   None
1999-05-07 1   455.0  1.219866e+09     4550.0   None
1999-05-10 1   455.0  1.219866e+09      910.0   None
1999-05-11 1   460.0  1.233271e+09     4600.0   None
1999-05-12 1   460.0  1.233271e+09      460.0   None
                    price      mktcap  liquidity sector
date       id                                          
2024-04-24 313   9.814924  20167830.0        0.0   None
2024-04-25 313   9.814924  20167830.0        0.0   None
2024-04-26 313   9.814924  20167830.0        0.0   None
2024-04-29 313  10.905471  22408700.0     2000.0   None
2024-04-30 313  10.905471  22408700.0        0.0   None
Benchmark series (SPI Index):
1999-01-02    0.000000
1999-01-03    0.000000
1999-01-04    0.034044
1999-01-05    0.012670
1999-01-06    0.015455
Name: SPI, dtype: float64
2024-04-26    0.008030
2024-04-27    0.000000
2024-04

In [3]:
# Define rebalancing dates
market_data_dates = market_data.index.get_level_values('date').unique().sort_values(ascending=True)
print(market_data_dates)
# 1.) Taking out the "date" index of the market data.
# 2.) Getting the unique dates out of it (because the most dates are repeatedly in the "date" index due to several company ID's).
# 3.) Sorting the unique dates in ascending order.

n_days = 21*3 # Rebalance every 3 months (21 trading days per month).
rebdates = market_data_dates[market_data_dates > '2015-01-01'][::n_days].strftime('%Y-%m-%d').tolist() # For further explanation how it exactly works look below.
print(rebdates) # This is the list of rebalancing dates.

DatetimeIndex(['1985-12-31', '1986-01-01', '1986-01-02', '1986-01-03',
               '1986-01-06', '1986-01-07', '1986-01-08', '1986-01-09',
               '1986-01-10', '1986-01-13',
               ...
               '2024-04-17', '2024-04-18', '2024-04-19', '2024-04-22',
               '2024-04-23', '2024-04-24', '2024-04-25', '2024-04-26',
               '2024-04-29', '2024-04-30'],
              dtype='datetime64[ns]', name='date', length=9979, freq=None)
['2015-01-02', '2015-04-01', '2015-06-29', '2015-09-24', '2015-12-22', '2016-03-18', '2016-06-15', '2016-09-12', '2016-12-08', '2017-03-07', '2017-06-02', '2017-08-30', '2017-11-27', '2018-02-22', '2018-05-22', '2018-08-17', '2018-11-14', '2019-02-11', '2019-05-09', '2019-08-06', '2019-11-01', '2020-01-29', '2020-04-27', '2020-07-23', '2020-10-20', '2021-01-15', '2021-04-14', '2021-07-12', '2021-10-07', '2022-01-04', '2022-04-01', '2022-06-29', '2022-09-26', '2022-12-22', '2023-03-21', '2023-06-16', '2023-09-13', '2023-12-11', '2

In [4]:
# Define the selection item builders.
selection_item_builders = {
    'gaps': SelectionItemBuilder(
        bibfn = bibfn_selection_gaps,
        width = 252*3, # three years look-back window
        n_days = 10,
    ),
    # Drops elements from the selection when there is a gap of more than "n_days" (i.e., consecutive zero's) within one look-back window ("width") in the volume (i.e., "liquidity") series.
    'min_volume': SelectionItemBuilder(
        bibfn = bibfn_selection_min_volume,
        width = 252, # one year look-back window.
        min_volume = 500_000, # The same as 500'000.
        agg_fn = np.median,
    ),
    # Filter stocks based on their median volume (< min_volume) within one look-back window ("width") in the volume (i.e., "liquidity") series.
    # At the next rebalancing date, the asset will be re-evaluated using the same selection rules (new data, new check). --> rolling window.
}
# Selection item builders → Filter or screen assets before optimization starts.

# Define the optimization item builders.
optimization_item_builders = {
    'return_series': OptimizationItemBuilder(
        bibfn = bibfn_return_series,
        width = 252*3, # three years look-back window.
        fill_value = 0, # fill missing values with 0.
    ),
    # Prepares an element of bs.optimization_data with single stock return series that are used (available) for optimization.
    'budget_constraint': OptimizationItemBuilder(
        bibfn = bibfn_budget_constraint,
        budget = 1,
    ),
    # Setting the budget constraint for optimization.
    'box_constraints': OptimizationItemBuilder(
        bibfn = bibfn_box_constraints,
        upper = 0.1,
    ),
    # Setting the box constraints for optimization.
    'size_dep_upper_bounds': OptimizationItemBuilder(
        bibfn = bibfn_size_dependent_upper_bounds,
        small_cap = {'threshold': 300_000_000, 'upper': 0.02},
        mid_cap = {'threshold': 1_000_000_000, 'upper': 0.05},
        large_cap = {'threshold': 10_000_000_000, 'upper': 0.1},
    ),
    # Setting the size-dependent upper bounds for optimization (based on the "mktcap" column in the market data).
}

# Initialize the backtest service
bs = BacktestService(
    data = data,
    selection_item_builders = selection_item_builders,
    optimization_item_builders = optimization_item_builders,
    rebdates = rebdates,
)

In [5]:
def mean_ewm(X: Union[pd.DataFrame, np.ndarray, object],
                    scalefactor: Union[float, int] = 1,
                    span: Union[float, int] = 10,
                    reverse: bool = True, # False = starting from the top and true = starting from the bottom.
                    attribute: Optional[str] = None) -> Union[pd.Series, np.ndarray]: 

    if not isinstance(X, pd.DataFrame):
        if attribute is not None and hasattr(X, attribute) and isinstance(getattr(X, attribute), pd.DataFrame):
        # 1.) "hasattr(X, attribute)" checks whether the object X has an attribute with the name stored in the variable "attribute".
        # 2.) "isinstance(getattr(X, attribute), pd.DataFrame)" checks whether that the attribute of X is actually a pandas DataFrame, and not something else (like a string or list).
            X = X.get_return_series()
        else:
            raise ValueError("If X is a BacktestData object, an 'attribute' must be specified.")

    denominators_dict = {}
    numerators_dict = {}
    mu_dict = {}
    mu = []
    alpha = 2 / (span + 1)

    for col in X.columns:
        series = X[col].values
        if reverse:
            series = series[::-1]

        denominators = []
        numerators = []

        for t in range(len(series)):
            value = series[t]
            if pd.isna(value):
                continue  # "pd.isna(value)"" checks if the current value is missing (i.e., NaN, None, or pd.NA). If yes, "continue" tells Python to skip the rest of the loop body and go to the next iteration.
            weight = alpha * (1 - alpha) ** t
            numerators.append(weight * value)
            denominators.append(weight)

        # Store each list under the corresponding column name
        denominators_dict[col] = denominators
        numerators_dict[col] = numerators

    for col in X.columns:
        # print(f"Weights for {col}: {denominators_dict[col]}")
        # print(f"Weights * Value for {col}: {numerators_dict[col]}")
        denom_sum = np.sum(denominators_dict[col])
        if denom_sum == 0:
            mu = np.nan
        else:
            mu = (np.sum(numerators_dict[col]) / denom_sum) * scalefactor

        mu_dict[col] = mu

    return pd.Series(mu_dict, dtype = "float64")

In [6]:
# Step 1: Prepare optimization data
bs.prepare_rebalancing('2024-03-07')

# Step 2: Access the return series directly
Test_df = bs.optimization_data['return_series']
print(Test_df)

id               102       103       104       111       120       122  \
date                                                                     
2021-04-15  0.004464  0.000000  0.017699  0.002951 -0.011299 -0.000827   
2021-04-16  0.018182  0.011876  0.008696  0.019127  0.007619 -0.004759   
2021-04-19  0.000992 -0.002347  0.000000  0.006256 -0.003781 -0.004782   
2021-04-20 -0.018876  0.000000 -0.012931 -0.018173 -0.011385 -0.030499   
2021-04-21 -0.004912  0.003529 -0.006550  0.010716 -0.009597  0.020901   
...              ...       ...       ...       ...       ...       ...   
2024-03-01  0.015500 -0.027961  0.002494 -0.001539  0.008871  0.018137   
2024-03-04  0.013684  0.006768  0.047264 -0.015669  0.007994 -0.005239   
2024-03-05  0.009865 -0.008403 -0.026128 -0.015919  0.023791 -0.006320   
2024-03-06  0.010797 -0.008475  0.017073  0.037391 -0.001549  0.000000   
2024-03-07  0.000000 -0.006838  0.019185  0.002301  0.017843  0.021201   

id               125       126       

In [7]:
test_mean = mean_ewm(Test_df, scalefactor=1, span=10, reverse=True) * -1 # Because the vector q is multiplied with minus one in the function "MaxSharpe".
pd.set_option('display.max_rows', None) # Show all rows in print output
print("Test mean:")
print(test_mean)

Test mean:
102   -0.005298
103   -0.003803
104   -0.004238
111   -0.002462
120   -0.004710
122   -0.006219
125    0.001373
126    0.004283
127    0.000426
128    0.003918
129   -0.006104
134   -0.017800
136    0.005660
139   -0.001741
141   -0.003621
142   -0.004040
148   -0.011305
149   -0.002020
154    0.012007
159   -0.010275
161   -0.001463
169    0.000795
170   -0.002497
174   -0.000581
175    0.002828
176   -0.003285
177   -0.001262
184   -0.007096
185   -0.005403
191    0.000585
192   -0.031885
197   -0.006969
2      0.000064
200   -0.002502
201   -0.002922
204   -0.006737
206   -0.002671
213    0.000684
22    -0.002370
224    0.000350
227    0.000890
229   -0.001579
231    0.000737
233    0.001680
24     0.000064
243   -0.007885
244   -0.004102
245   -0.006228
246   -0.000330
249    0.000636
252    0.003871
254   -0.003835
256   -0.001743
258   -0.002488
261   -0.005783
265    0.044255
267   -0.002420
27    -0.000702
270   -0.002053
271   -0.013749
276   -0.003500
279   -0.0036