# Assignment 4

Deadline: 30.04.2025 12:00 CET

<Add your name, student-id and emal address>

## Prerequisites: Library imports, data load and initialization of the backtest service

In [62]:
# Standard library imports
import os
import sys
import copy
from typing import Optional

# Third party imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Add the project root directory to Python path
project_root = os.path.dirname(os.path.dirname(os.getcwd()))   #<Change this path if needed>
src_path = os.path.join(project_root, 'qpmwp-course\\src')    #<Change this path if needed>
sys.path.append(project_root)
sys.path.append(src_path)

# Local modules imports
from helper_functions import load_data_spi, load_pickle
from estimation.covariance import Covariance
from estimation.expected_return import ExpectedReturn
from optimization.optimization import Optimization, Objective, MeanVariance
from optimization.optimization_data import OptimizationData
from optimization.constraints import Constraints
from backtesting.backtest_item_builder_classes import (
    SelectionItemBuilder,
    OptimizationItemBuilder,
)
from backtesting.backtest_item_builder_functions import (
    bibfn_selection_min_volume,
    bibfn_selection_gaps,
    bibfn_return_series,
    bibfn_budget_constraint,
    bibfn_box_constraints,
    bibfn_size_dependent_upper_bounds,
)
from backtesting.backtest_data import BacktestData
from backtesting.backtest_service import BacktestService
from backtesting.backtest import Backtest

# custom imports
from scipy.optimize import minimize
from scipy.optimize import minimize_scalar

In [63]:
# Load data
path_to_data = '../data/'  # <change this to your path to data>

# Load market and jkp data from parquet files
market_data = pd.read_parquet(path = f'{path_to_data}market_data.parquet')

# Instantiate the BacktestData class
# and set the market data and jkp data as attributes
data = BacktestData()
data.market_data = market_data
data.bm_series = load_data_spi(path='../data/')  # <change this if necessary>

In [64]:
# Define rebalancing dates
n_days = 21*3
market_data_dates = market_data.index.get_level_values('date').unique().sort_values(ascending=True)
rebdates = market_data_dates[market_data_dates > '2015-01-01'][::n_days].strftime('%Y-%m-%d').tolist()

In [65]:
# Define the selection item builders.
selection_item_builders = {
    'gaps': SelectionItemBuilder(
        bibfn = bibfn_selection_gaps,
        width = 252*3,
        n_days = 10,
    ),
    'min_volume': SelectionItemBuilder(
        bibfn = bibfn_selection_min_volume,
        width = 252,
        min_volume = 500_000,
        agg_fn = np.median,
    ),
}

# Define the optimization item builders.
optimization_item_builders = {
    'return_series': OptimizationItemBuilder(
        bibfn = bibfn_return_series,
        width = 252*3,
        fill_value = 0,
    ),
    'budget_constraint': OptimizationItemBuilder(
        bibfn = bibfn_budget_constraint,
        budget = 1,
    ),
    'box_constraints': OptimizationItemBuilder(
        bibfn = bibfn_box_constraints,
        upper = 0.1,
    ),
    'size_dep_upper_bounds': OptimizationItemBuilder(
        bibfn = bibfn_size_dependent_upper_bounds,
        small_cap = {'threshold': 300_000_000, 'upper': 0.02},
        mid_cap = {'threshold': 1_000_000_000, 'upper': 0.05},
        large_cap = {'threshold': 10_000_000_000, 'upper': 0.1},
    ),
}

# Initialize the backtest service
bs = BacktestService(
    data = data,
    selection_item_builders = selection_item_builders,
    optimization_item_builders = optimization_item_builders,
    rebdates = rebdates,
)

## 1. Maximum Sharpe Ratio Portfolio

a) 

(6 points)

Complete the `MaxSharpe` class below by implementing your its methods `set_objective` and `solve`.
The `solve` method should implement an iterative algorithm that quickly approximates the "true" maximimum Sharpe ratio portfolio (given the estimates of mean and covariance). This approximation should be done by repeatedly solving a mean-variance optimization problem, where the risk aversion parameter (which scales the covariance matrix) is adjusted in each iteration. The algorithm should terminate after a maximum of 10 iterations. 

In [66]:
class MaxSharpe(Optimization):

    def __init__(self,
                 constraints: Optional[Constraints] = None,
                 covariance: Optional[Covariance] = None,
                 expected_return: Optional[ExpectedReturn] = None,
                 iters: int = 10,
                 risk_aversion: float = 1.0,
                 turnover_penalty: float = 0.0,
                 **kwargs) -> None:
        super().__init__(
            constraints=constraints,
            **kwargs,
        )
        self.covariance = Covariance() if covariance is None else covariance
        self.expected_return = ExpectedReturn() if expected_return is None else expected_return
        self.iters = iters
        self.params['risk_aversion'] = risk_aversion
        self.params['turnover_penalty'] = turnover_penalty

    def set_objective(self, optimization_data: OptimizationData) -> None:
        self.data = optimization_data
        X = optimization_data['return_series']
        self.covmat = self.covariance.estimate(X=X, inplace=False)
        self.mu  = self.expected_return.estimate(X=X, inplace=False)
        self.objective = Objective(
            q = self.mu * -1,
            P = self.covmat * 2 * self.params['risk_aversion'],
        )

        self.og_P = (2 * self.covmat).copy()
        return None

    def solve(self) -> None:
        
        # opimization function
        def mean_var_solver(a):
            self.params['risk_aversion'] = a[0]
            self.objective.coefficients['P'] = self.og_P * a[0]
            super(MaxSharpe, self).solve()
            w = np.array(list(self.results['weights'].values()))
            sharpe = (self.mu @ w) / np.sqrt(w @ self.covmat @ w)
            return sharpe

        sharpe_maximization = minimize(
        fun=lambda x: -1.0*mean_var_solver(x),
        x0=np.array([1.0]),
        bounds=[(1e-2, 1e2)],
        method="Powell",
        tol = 1e-3,
        options={"maxiter": self.iters}
        )

        # getting results
        optimal_ra = (sharpe_maximization.x)[0]
        optimal_sr = -1*sharpe_maximization.fun
        self.params['risk_aversion'] = optimal_ra
        self.objective.coefficients['P'] = self.og_P * optimal_ra
        super().solve()
        optimal_w = self.results['weights']

        # storing results
        self.results = {
            'risk_aversion': optimal_ra,
            'sharpe_ratio': optimal_sr,
            'weights': optimal_w
        }

        return None

b) 

(2 points)

Provide a theoretical or empirical justification that your algorithm converges to the true maximum Sharpe ratio portfolio for the given coefficients of mean and covariance.
Hint: If you want to provide an empirical justification, you can perform an optimization for a single point in time by running the following code.

In [67]:
# bs.optimization = MaxSharpe(
#     covariance=Covariance(method='pearson'),
#     expected_return=ExpectedReturn(method='geometric'),
#     solver_name='cvxopt',  # <change this to your preferred solver>
#     iters = 1,
#     risk_aversion = 1.0
# )
# bs.prepare_rebalancing('2015-01-02')
# bs.optimization.set_objective(bs.optimization_data)
# bs.optimization.solve()
# bs.optimization.results

In [68]:
test_iterations = np.arange(0, 101, 10)
Convergence_df = pd.DataFrame(index=test_iterations, columns=['Sharpe Ratio', 'Risk Aversion'])
for iters in test_iterations:
    bs.optimization = MaxSharpe(
        covariance=Covariance(method='pearson'),
        expected_return=ExpectedReturn(method='geometric'),
        solver_name='cvxopt',  # <change this to your preferred solver>
        iters = iters,
        risk_aversion = 1.0
        )
    bs.prepare_rebalancing('2015-01-02')
    bs.optimization.set_objective(bs.optimization_data)
    bs.optimization.solve()
    results = bs.optimization.results
    Convergence_df.loc[iters, 'Sharpe Ratio'] = results['sharpe_ratio']
    Convergence_df.loc[iters, 'Risk Aversion'] = results['risk_aversion']

Convergence_df



Unnamed: 0,Sharpe Ratio,Risk Aversion
0,0.149226,12.242865
10,0.149226,12.243072
20,0.149226,12.243072
30,0.149226,12.243072
40,0.149226,12.243072
50,0.149226,12.243072
60,0.149226,12.243072
70,0.149226,12.243072
80,0.149226,12.243072
90,0.149226,12.243072


## 2. Backtest MaxSharpe with Turnover Penalty

(5 points)

Calibrate the turnover penalty parameter such that the backtest of the MaxSharpe strategy displays an annual turnover of roughly 100%.

In [69]:
# Update the backtest service with a MaxSharpe optimization object
penalties  = [ 1e-5, 5e-5, 1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2]
closest_penalty = None
closest_gap= 100

for i in list(penalties):
    #bs_copy = copy.deepcopy(bs)
    #bs_copy.optimization = 
    bs.optimization = MaxSharpe(
    covariance = Covariance(method='pearson'),
    expected_return = ExpectedReturn(method='geometric'),
    solver_name = 'cvxopt',    # <change this to your preferred solver>
    turnover_penalty= i,   # <change this>
    )

    backtest_trials = Backtest()
    #backtest_trials.run(bs=bs_copy)
    backtest_trials.run(bs=bs)
    
    #annual_turnover = backtest_trials.strategy.turnover(return_series = bs_copy.data.get_return_series()).mean() * 4
    annual_turnover = backtest_trials.strategy.turnover(return_series = bs.data.get_return_series()).mean() * 4
    tolerance = 0.05
    gap = abs(annual_turnover - 1.0)
    if gap < closest_gap:
        if gap < tolerance:
            closest_penalty = i
            break
        else:
            closest_gap = gap
            closest_penalty = i
            new_point1 = i + i/2
            newpoint2 = i - i/2
            if new_point1 not in penalties:
                penalties.extend([new_point1])
            if newpoint2 not in penalties:
                penalties.extend([newpoint2])
    
    print(gap)
    print(closest_gap)
    print(i)

print(closest_penalty)

Rebalancing date: 2015-01-02
Rebalancing date: 2015-04-01
Rebalancing date: 2015-06-29
Rebalancing date: 2015-09-24
Rebalancing date: 2015-12-22
Rebalancing date: 2016-03-18
Rebalancing date: 2016-06-15
Rebalancing date: 2016-09-12
Rebalancing date: 2016-12-08
Rebalancing date: 2017-03-07
Rebalancing date: 2017-06-02
Rebalancing date: 2017-08-30
Rebalancing date: 2017-11-27
Rebalancing date: 2018-02-22
Rebalancing date: 2018-05-22
Rebalancing date: 2018-08-17
Rebalancing date: 2018-11-14
Rebalancing date: 2019-02-11
Rebalancing date: 2019-05-09
Rebalancing date: 2019-08-06
Rebalancing date: 2019-11-01
Rebalancing date: 2020-01-29
Rebalancing date: 2020-04-27
Rebalancing date: 2020-07-23
Rebalancing date: 2020-10-20
Rebalancing date: 2021-01-15
Rebalancing date: 2021-04-14
Rebalancing date: 2021-07-12
Rebalancing date: 2021-10-07
Rebalancing date: 2022-01-04
Rebalancing date: 2022-04-01
Rebalancing date: 2022-06-29
Rebalancing date: 2022-09-26
Rebalancing date: 2022-12-22
Rebalancing da

KeyboardInterrupt: 

In [60]:
# # getting subset of rebdates to test on
# rebdates_df = pd.Series(rebdates)
# rebdates_cal = rebdates_df[rebdates_df > '2018-01-01']
# rebdates_cal = rebdates_cal[rebdates_cal< '2020-01-01'].tolist()

# # creating backtest service with shorter lookback
# bs = BacktestService(
#     data = data,
#     selection_item_builders = selection_item_builders,
#     optimization_item_builders = optimization_item_builders,
#     rebdates = rebdates_cal,
# )

In [None]:
tol = 0.05 # tolerance
iter_count = 0 # iteration of refinement count
max_iter = 3 # max number of refinements
to_array = [1e-5, 5e-5, 1e-4, 5e-4, 1e-3, 5e-3]


best_to = None # best turnover
best_annual_to = None # the annual turnover best_to achieves

my_bool = True

while my_bool:
    errors = []
    print('turnover penalty array is',to_array)
    for i in range((len(to_array))):
        to = to_array[i]
        bs.optimization = MaxSharpe(
            covariance = Covariance(method='pearson'),
            expected_return = ExpectedReturn(method='geometric'),
            solver_name = 'cvxopt',    # <change this to your preferred solver>
            turnover_penalty= to,   # <change this>
            )

        # Instantiate the backtest object
        bt_ms = Backtest()

        # Run the backtest
        bt_ms.run(bs = bs)

        annual_to = bt_ms.strategy.turnover(return_series = bs.data.get_return_series()).mean() * 4

        errors.append(abs(annual_to - 1.0))

        print(f'for index {i} and to {to} our annual turnover is {annual_to} with error {errors[i]}')

        if errors[i] < tol:
            best_to = to_array[i]
            best_annual_to = annual_to
            my_bool = False
            break

        elif (i>1) and (errors[i]>errors[i-1]):
            to_array = np.linspace(to_array[i-2], to_array[i], 7)[1:-1]
            break
    iter_count += 1
    if iter_count >= max_iter:
        break
     
print('best penalty',best_to)
print('annual turover achieved',best_annual_to)

turnover penalty array is [1e-05, 5e-05, 0.0001, 0.0005, 0.001, 0.005]
Rebalancing date: 2015-01-02
Rebalancing date: 2015-04-01
Rebalancing date: 2015-06-29
Rebalancing date: 2015-09-24
Rebalancing date: 2015-12-22
Rebalancing date: 2016-03-18
Rebalancing date: 2016-06-15
Rebalancing date: 2016-09-12
Rebalancing date: 2016-12-08
Rebalancing date: 2017-03-07
Rebalancing date: 2017-06-02
Rebalancing date: 2017-08-30
Rebalancing date: 2017-11-27
Rebalancing date: 2018-02-22
Rebalancing date: 2018-05-22
Rebalancing date: 2018-08-17
Rebalancing date: 2018-11-14
Rebalancing date: 2019-02-11
Rebalancing date: 2019-05-09
Rebalancing date: 2019-08-06
Rebalancing date: 2019-11-01
Rebalancing date: 2020-01-29
Rebalancing date: 2020-04-27
Rebalancing date: 2020-07-23
Rebalancing date: 2020-10-20
Rebalancing date: 2021-01-15
Rebalancing date: 2021-04-14
Rebalancing date: 2021-07-12
Rebalancing date: 2021-10-07
Rebalancing date: 2022-01-04
Rebalancing date: 2022-04-01
Rebalancing date: 2022-06-29
R

KeyboardInterrupt: 

## 3. Simulation and Descriptive Statistics

(3 points)

- Simulate the portfolio returns from your MaxSharpe backtest. Use fixed costs of 1% and variable costs of 0.3%.
- Plot the cumulated returns of the MaxSharpe strategy together with those of the SPI Index.
- Plot the turnover of your MaxSharpe strategy over time.
- Print the annualized turnover (computed as the average turnover over the backtest multiplied by the number of rebalancing per year) for your MaxSharpe strategy.
- Create and print a table with descriptive performance statistics for your MaxSharpe strategy and the SPI Index.


In [None]:
# need to 


# Get return series
return_series = bs.data.get_return_series(
    ids=list(set().union(*[set(p.weights.keys()) for p in bt_ms.strategy.portfolios])),
    fillna_value=0
)

# Simulate with transaction costs
strategy_returns = bt_ms.strategy.simulate(
    return_series=return_series,
    fc=0.01,    # 1% fixed costs
    vc=0.003,   # 0.3% variable costs
)

benchmark_returns = bs.data.bm_series

# Calculate cumulative returns
strategy_cumulative = (1 + strategy_returns).cumprod()
benchmark_cumulative = (1 + benchmark_returns).cumprod()

# Plot cumulative returns
plt.figure(figsize=(12, 6))
plt.plot(strategy_cumulative.index, strategy_cumulative.values, label='MaxSharpe Strategy')
plt.plot(benchmark_cumulative.index, benchmark_cumulative.values, label='SPI Index')
plt.title('Cumulative Returns: MaxSharpe vs SPI Index')
plt.xlabel('Date')
plt.ylabel('Cumulative Return')
plt.legend()
plt.grid(True)
plt.show()

# Calculate turnover over time
portfolios = bt_ms.strategy.portfolios
turnover_values = []
dates = []

for i in range(1, len(portfolios)):
    prev_weights = pd.Series(portfolios[i-1].weights)
    curr_weights = pd.Series(portfolios[i].weights)
    
    # Align indices to ensure consistent keys
    all_assets = sorted(set(prev_weights.index) | set(curr_weights.index))
    prev_weights = prev_weights.reindex(all_assets, fill_value=0)
    curr_weights = curr_weights.reindex(all_assets, fill_value=0)
    
    # Calculate turnover (one-way)
    turnover = (curr_weights - prev_weights).abs().sum() / 2
    
    turnover_values.append(turnover)
    dates.append(portfolios[i].rebalancing_date)

turnover = pd.Series(turnover_values, index=dates)

# Plot turnover over time
plt.figure(figsize=(12, 6))
plt.plot(turnover.index, turnover.values)
plt.title('Portfolio Turnover Over Time')
plt.xlabel('Date')
plt.ylabel('Turnover')
plt.grid(True)
plt.show()

# Calculate annualized turnover
avg_turnover = turnover.mean()
annualized_turnover = avg_turnover * 4  # Quarterly rebalancing
print(f"Annualized Turnover: {annualized_turnover:.2%}")

# Calculate performance statistics
def calculate_performance_stats(returns):
    total_return = (1 + returns).prod() - 1
    annualized_return = (1 + total_return) ** (252/len(returns)) - 1
    volatility = returns.std() * np.sqrt(252)
    sharpe_ratio = annualized_return / volatility
    max_drawdown = (1 + returns).cumprod().div((1 + returns).cumprod().cummax()) - 1
    max_drawdown = max_drawdown.min()
    downside_deviation = returns[returns < 0].std() * np.sqrt(252)
    skewness = returns.skew()
    kurtosis = returns.kurtosis()
    
    return {
        'Total Return': total_return,
        'Annualized Return': annualized_return,
        'Volatility': volatility,
        'Sharpe Ratio': sharpe_ratio,
        'Max Drawdown': max_drawdown,
        'Downside Deviation': downside_deviation,
        'Skewness': skewness,
        'Kurtosis': kurtosis
    }

# Calculate statistics for both strategy and benchmark
strategy_stats = calculate_performance_stats(strategy_returns)
benchmark_stats = calculate_performance_stats(benchmark_returns)

# Create DataFrame for statistics
stats_df = pd.DataFrame({
    'MaxSharpe Strategy': strategy_stats,
    'SPI Index': benchmark_stats
})

# Display statistics
print("\nPerformance Statistics:")
print(stats_df)

NameError: name 'bt_ms' is not defined