In [1]:
# Standard library imports
import os
import sys
import copy
from typing import Union, Optional

# Third party imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import empyrical as ep

# Add the project root directory to Python path
project_root = os.path.dirname(os.path.dirname(os.getcwd()))   #<Change this path if needed>
src_path = os.path.join(project_root, 'qpmwp-course\\src')    #<Change this path if needed>
sys.path.append(project_root)
sys.path.append(src_path)

# Local modules imports
from helper_functions import load_data_spi, load_pickle
from estimation.covariance import Covariance
from estimation.expected_return import ExpectedReturn
from optimization.optimization import Optimization, Objective, MeanVariance
from optimization.optimization_data import OptimizationData
from optimization.constraints import Constraints
from backtesting.backtest_item_builder_classes import (
    SelectionItemBuilder,
    OptimizationItemBuilder,
)
from backtesting.backtest_item_builder_functions import (
    bibfn_selection_min_volume,
    bibfn_selection_gaps,
    bibfn_return_series,
    bibfn_budget_constraint,
    bibfn_box_constraints,
    bibfn_size_dependent_upper_bounds,
)
from backtesting.backtest_data import BacktestData
from backtesting.backtest_service import BacktestService
from backtesting.backtest import Backtest

In [2]:
df = pd.read_csv('c:/Users/admin\OneDrive/Dokumente/Studium UZH/Master/5. Semester/Quantitative Portfolio Management with Python/Assingments/1. Assingment/2. Werkstatt/qpmwp-course/data/msci_country_indices.csv',
                 index_col=0, header=0, parse_dates=True, date_format='%d-%m-%Y')
#df = df.tail(2).iloc[:, :2]
print(df)

                  AT        AU        BE        CA        CH        DE  \
Index                                                                    
1999-01-01  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   
1999-01-04  0.010057  0.009080  0.042147  0.013070  0.035885  0.052249   
1999-01-05  0.013661 -0.010048  0.020162  0.021940  0.012016  0.001444   
1999-01-06  0.000000  0.015264 -0.000078  0.027640  0.015335  0.036205   
1999-01-07  0.004104  0.016564 -0.016877 -0.003480 -0.011902 -0.020187   
...              ...       ...       ...       ...       ...       ...   
2023-04-12 -0.002488  0.004714 -0.012072  0.001708  0.000017  0.002662   
2023-04-13  0.001297 -0.003806 -0.000049  0.004728  0.000913  0.002400   
2023-04-14  0.009533  0.004829  0.007622  0.001950  0.008387  0.005797   
2023-04-17 -0.005665  0.003404  0.004917  0.003392 -0.001485 -0.002086   
2023-04-18  0.004836 -0.003262 -0.007127  0.002161  0.004373  0.006089   

                  DK        ES       

In [None]:
# Load data
path_to_data = "C:/Users/admin/OneDrive/Dokumente/Studium UZH/Master/5. Semester/Quantitative Portfolio Management with Python/Assingments/4. Assingment/1. Daten erhalten/" # <change this if necessary>

# Load market and jkp data from parquet files
market_data = pd.read_parquet(path = f'{path_to_data}market_data.parquet')
jkp_data = pd.read_parquet(path = f'{path_to_data}jkp_data.parquet')

# Instantiate the BacktestData class
# and set the market data and jkp data as attributes
df1 = BacktestData()
df1.market_data = market_data # If the class "BacktestData" is used the attribute is always called "market_data" because it is defined like this within the class. If we want to use the function "get_return_series".
#print(df1.market_data.head(10))
#print(df1.market_data.tail(10))

df1.jkp_data = jkp_data
#print(df1.jkp_data.head(10))
#print(df1.jkp_data.tail(10))

# The dir() function returns a list of all attributes and methods of an object:
print(dir(df1.market_data))
print(dir(df1.jkp_data))
# These include:
# - Attributes (like data, market_data, etc.)
# - Methods (like get_return_series(), get_volume_series())
# - Special methods (like __init__, __str__, etc.)

# Get the return series:
returns = df1.get_return_series()
print(returns.head(10))
print(returns.tail(10))

# Get the volume series:
volumes = df1.get_volume_series()
print(volumes.head(10))
print(volumes.tail(10))

# Get the characteristic_series:
print(df1.jkp_data.columns)
#prices = df1.get_characteristic_series(field="price")
#print(prices.head(10))
#print(prices.tail(10))



In [4]:
def mean_ewm(X: Union[pd.DataFrame, np.ndarray, object],
                    scalefactor: Union[float, int] = 1,
                    span: Union[float, int] = 10,
                    reverse: bool = True, # False = starting from the top and true = starting from the bottom.
                    attribute: Optional[str] = None) -> Union[pd.Series, np.ndarray]: 

    if not isinstance(X, pd.DataFrame):
        if attribute is not None and hasattr(X, attribute) and isinstance(getattr(X, attribute), pd.DataFrame):
        # 1.) "hasattr(X, attribute)" checks whether the object X has an attribute with the name stored in the variable "attribute".
        # 2.) "isinstance(getattr(X, attribute), pd.DataFrame)" checks whether that the attribute of X is actually a pandas DataFrame, and not something else (like a string or list).
            X = X.get_return_series()
        else:
            raise ValueError("If X is a BacktestData object, an 'attribute' must be specified.")

    denominators_dict = {}
    numerators_dict = {}
    mu_dict = {}
    mu = []
    alpha = 2 / (span + 1)

    for col in X.columns:
        series = X[col].values
        if reverse:
            series = series[::-1]

        denominators = []
        numerators = []

        for t in range(len(series)):
            value = series[t]
            if pd.isna(value):
                continue  # "pd.isna(value)"" checks if the current value is missing (i.e., NaN, None, or pd.NA). If yes, "continue" tells Python to skip the rest of the loop body and go to the next iteration.
            weight = alpha * (1 - alpha) ** t
            numerators.append(weight * value)
            denominators.append(weight)

        # Store each list under the corresponding column name
        denominators_dict[col] = denominators
        numerators_dict[col] = numerators

    for col in X.columns:
        # print(f"Weights for {col}: {denominators_dict[col]}")
        # print(f"Weights * Value for {col}: {numerators_dict[col]}")
        denom_sum = np.sum(denominators_dict[col])
        if denom_sum == 0:
            mu = np.nan
        else:
            mu = (np.sum(numerators_dict[col]) / denom_sum) * scalefactor

        mu_dict[col] = mu

    return pd.Series(mu_dict, dtype = "float64")

In [None]:
span = 10
scalefactor = 1
reverse = True

test_mean_1 = mean_ewm(df, scalefactor=1, span=10, reverse=True, attribute="market_data")
test_mean_2 = mean_ewm(df1, scalefactor=1, span=10, reverse=True, attribute="market_data")
pd.set_option('display.max_rows', None) # Show all rows in print output
print("Test mean 1:")
print(test_mean_1)
print("Test mean 2:")
print(test_mean_2.round(6))

Test mean 1:
AT    0.002798
AU    0.001788
BE    0.000074
CA    0.002864
CH    0.002667
DE    0.002657
DK    0.006635
ES    0.002692
FI   -0.000141
FR    0.003251
GB    0.003051
GR    0.005125
HK    0.001514
IE    0.005498
IL    0.000664
IT    0.003213
JP    0.003874
NL    0.000168
NO    0.003108
NZ   -0.000819
PT    0.000054
SE    0.003743
SG    0.000579
US    0.001963
dtype: float64
Test mean 2:
1      0.019162
2      0.000195
3      0.000863
4      0.182269
5      0.001967
6      0.003607
7           NaN
8     -0.001196
9           NaN
10          NaN
11          NaN
12          NaN
13     0.019849
14          NaN
15     0.008004
16     0.003957
17          NaN
18    -0.001061
19     0.002240
20          NaN
21     0.002871
22     0.001296
23    -0.000009
24    -0.000040
25     0.000067
26          NaN
27     0.004779
28          NaN
29     0.005910
30     0.004807
31          NaN
32    -0.000000
33    -0.001387
34    -0.000000
35          NaN
36     0.003469
37     0.000189
38    -

In [None]:
attribute = "market_data"
if attribute is not None and hasattr(df1, attribute) and isinstance(getattr(df1, attribute), pd.DataFrame):
    X = df1.get_return_series()
else:
    raise ValueError("If X is a BacktestData object, an 'attribute' must be specified.")
#print(X.head(20))
#print(X.tail(20))
print(X[""]) # look ath specific columns

date
1986-01-01             NaN
1986-01-02             NaN
1986-01-03             NaN
1986-01-06             NaN
1986-01-07             NaN
1986-01-08             NaN
1986-01-09             NaN
1986-01-10             NaN
1986-01-13             NaN
1986-01-14             NaN
1986-01-15             NaN
1986-01-16             NaN
1986-01-17             NaN
1986-01-20             NaN
1986-01-21             NaN
1986-01-22             NaN
1986-01-23             NaN
1986-01-24             NaN
1986-01-27             NaN
1986-01-28             NaN
1986-01-29             NaN
1986-01-30             NaN
1986-01-31             NaN
1986-02-03             NaN
1986-02-04             NaN
1986-02-05             NaN
1986-02-06             NaN
1986-02-07             NaN
1986-02-10             NaN
1986-02-11             NaN
1986-02-12             NaN
1986-02-13             NaN
1986-02-14             NaN
1986-02-17             NaN
1986-02-18             NaN
1986-02-19             NaN
1986-02-20             