In [45]:
# Standard library imports
import os
import sys
import copy
from typing import Union, Optional

# Third party imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import empyrical as ep

# Add the project root directory to Python path
project_root = os.path.dirname(os.path.dirname(os.getcwd()))   #<Change this path if needed>
src_path = os.path.join(project_root, 'qpmwp-course\\src')    #<Change this path if needed>
sys.path.append(project_root)
sys.path.append(src_path)

# Local modules imports
from helper_functions import load_data_spi, load_pickle
from estimation.covariance import Covariance
from estimation.expected_return import ExpectedReturn
from optimization.optimization import Optimization, Objective, MeanVariance
from optimization.optimization_data import OptimizationData
from optimization.constraints import Constraints
from backtesting.backtest_item_builder_classes import (
    SelectionItemBuilder,
    OptimizationItemBuilder,
)
from backtesting.backtest_item_builder_functions import (
    bibfn_selection_min_volume,
    bibfn_selection_gaps,
    bibfn_return_series,
    bibfn_budget_constraint,
    bibfn_box_constraints,
    bibfn_size_dependent_upper_bounds,
)
from backtesting.backtest_data import BacktestData
from backtesting.backtest_service import BacktestService
from backtesting.backtest import Backtest

In [46]:
df = pd.read_csv('c:/Users/admin\OneDrive/Dokumente/Studium UZH/Master/5. Semester/Quantitative Portfolio Management with Python/Assingments/1. Assingment/2. Werkstatt/qpmwp-course/data/msci_country_indices.csv',
                 index_col=0, header=0, parse_dates=True, date_format='%d-%m-%Y')
#df = df.tail(2).iloc[:, :2]
print(df)

                  AT        AU        BE        CA        CH        DE  \
Index                                                                    
1999-01-01  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   
1999-01-04  0.010057  0.009080  0.042147  0.013070  0.035885  0.052249   
1999-01-05  0.013661 -0.010048  0.020162  0.021940  0.012016  0.001444   
1999-01-06  0.000000  0.015264 -0.000078  0.027640  0.015335  0.036205   
1999-01-07  0.004104  0.016564 -0.016877 -0.003480 -0.011902 -0.020187   
...              ...       ...       ...       ...       ...       ...   
2023-04-12 -0.002488  0.004714 -0.012072  0.001708  0.000017  0.002662   
2023-04-13  0.001297 -0.003806 -0.000049  0.004728  0.000913  0.002400   
2023-04-14  0.009533  0.004829  0.007622  0.001950  0.008387  0.005797   
2023-04-17 -0.005665  0.003404  0.004917  0.003392 -0.001485 -0.002086   
2023-04-18  0.004836 -0.003262 -0.007127  0.002161  0.004373  0.006089   

                  DK        ES       

In [47]:
# Load data
path_to_data = "C:/Users/admin/OneDrive/Dokumente/Studium UZH/Master/5. Semester/Quantitative Portfolio Management with Python/Assingments/4. Assingment/1. Daten erhalten/" # <change this if necessary>

# Load market and jkp data from parquet files
market_data = pd.read_parquet(path = f'{path_to_data}market_data.parquet')

# Instantiate the BacktestData class
# and set the market data and jkp data as attributes
df1 = BacktestData()
df1.market_data = market_data
print(df1.market_data.head(10))
print(df1.market_data.tail(10))

               price        mktcap  liquidity sector
date       id                                       
1999-05-06 1   461.0  1.235952e+09   129080.0   None
1999-05-07 1   455.0  1.219866e+09     4550.0   None
1999-05-10 1   455.0  1.219866e+09      910.0   None
1999-05-11 1   460.0  1.233271e+09     4600.0   None
1999-05-12 1   460.0  1.233271e+09      460.0   None
1999-05-14 1   460.0  1.233271e+09     4600.0   None
1999-05-19 1   460.0  1.233271e+09     2300.0   None
1999-05-21 1   460.0  1.233271e+09     4600.0   None
1999-05-26 1   460.0  1.233271e+09    13800.0   None
1999-05-31 1   463.0  1.241314e+09    13890.0   None
                    price      mktcap  liquidity sector
date       id                                          
2024-04-17 313   9.814924  20167830.0        0.0   None
2024-04-18 313   9.814924  20167830.0        0.0   None
2024-04-19 313   9.814924  20167830.0        0.0   None
2024-04-22 313   9.814924  20167830.0        0.0   None
2024-04-23 313   9.814924  2

In [None]:
def mean_ewm(X: Union[pd.DataFrame, np.ndarray, object],
                    scalefactor: Union[float, int] = 1,
                    span: Union[float, int] = 10,
                    reverse: bool = True,
                    attribute: Optional[str] = None) -> Union[pd.Series, np.ndarray]: # False = starting from the top and true = starting from the bottom.

    if not isinstance(X, pd.DataFrame):
        if attribute is not None and hasattr(X, attribute) and isinstance(getattr(X, attribute), pd.DataFrame):
        # 1.) "hasattr(X, attribute)" checks whether the object X has an attribute with the name stored in the variable "attribute".
        # 2.) "isinstance(getattr(X, attribute), pd.DataFrame)" checks whether that the attribute of X is actually a pandas DataFrame, and not something else (like a string or list).
            X = getattr(X, attribute).pivot_table(index="date", columns="id", values="price")
        else:
            raise ValueError("If X is a BacktestData object, an 'attribute' must be specified.")

        if attribute not in X.market_data.columns:
                raise ValueError(f"'{attribute}' not found in market_data.")

            X = X.market_data.pivot_table(index="date", columns="id", values=attribute)

        elif hasattr(X, "jkp_data") and isinstance(X.jkp_data, pd.DataFrame):
            if attribute is None:
                raise ValueError("If X is a BacktestData object, an 'attribute' must be specified.")

            if attribute not in X.jkp_data.columns:
                raise ValueError(f"'{attribute}' not found in jkp_data.")

            X = X.jkp_data.pivot_table(index="date", columns="id", values=attribute)

        else:
            raise TypeError("X must be a DataFrame or have .market_data/.jkp_data attributes that are DataFrames.")



    # Handle BacktestData-like objects
    if not isinstance(X, pd.DataFrame):
        if hasattr(X, "market_data") and isinstance(X.market_data, pd.DataFrame):
        # 1.) "hasattr(X, "...")" checks whether the object X has an attribute named "...".
        # 2.) "isinstance(X."", pd.DataFrame)" checks whether that attribute (X."") is actually a pandas DataFrame, and not something else (like a string or list).
            X = X.market_data
        else:
            raise TypeError("X must be a DataFrame or have a .market_data attribute that is a DataFrame.")

    denominators_dict = {}
    numerators_dict = {}
    mu_dict = {}
    mu = []
    alpha = 2 / (span + 1)

    for col in X.columns:
        series = X[col].values
        if reverse:
            series = series[::-1]

        denominators = []
        numerators = []

        for t in range(len(series)):
            i = t
            weight = alpha * (1 - alpha) ** i
            value = series[i]
            numerators.append(weight * value)
            denominators.append(weight)

        # Store each list under the corresponding column name
        denominators_dict[col] = denominators
        numerators_dict[col] = numerators

    for col in X.columns:
        # print(f"Weights for {col}: {denominators_dict[col]}")
        # print(f"Weights * Value for {col}: {numerators_dict[col]}")
        mu = (np.sum(numerators_dict[col]) / np.sum(denominators_dict[col])) * scalefactor
        mu_dict[col] = mu

    return pd.Series(mu_dict, dtype="float64")

In [43]:
span = 10
scalefactor = 1
reverse = True

test_mean_1 = mean_ewm(df, scalefactor, span, reverse)
test_mean_2 = mean_ewm(df1, scalefactor, span, reverse)
print("Test mean 1:")
print(test_mean_1)
print("Test mean 2:")
print(test_mean_2)

TypeError: X must be a DataFrame or have a .market_data attribute that is a DataFrame.

In [48]:
df2 = getattr(df1, market_data).pivot_table(index="date", columns="id", values="price")
print(df2)

TypeError: attribute name must be string, not 'DataFrame'