# Group Assignment QF627

*Group Members:*
* Anna Germaine Lim
* Peng Cheng
* Zenith Tay
* Gregory Tan

# Packages Used in This Workbook

In [1]:
## Data Download

import yfinance as yf
import numpy as np
import pandas as pd
from datetime import datetime

## For Visualisation

# Useful Functions Used in this Sheet

In [2]:
def download_data (ticker,
                   start_date: str | datetime,
                   end_date: str | datetime) -> pd.DataFrame:
    data =\
    (
        yf.download(tickers = ticker,
                    start = start_date,
                    end = end_date)
    )

    return data

In [3]:
# preallocate empty array and assign slice by chrisaycock

def np_shift(arr, num, fill_value=np.nan):
    result = np.empty_like(arr)
    if num > 0:
        result[:num] = fill_value
        result[num:] = arr[:-num]
    elif num < 0:
        result[num:] = fill_value
        result[:num] = arr[-num:]
    else:
        result[:] = arr
    return result

### Mean Reversion Strategies

In [4]:
def bollinger_band(price_data: pd.Series,
                            window: int = 14,
                                                ) -> pd.Series:

    price = price_data[price_col]

    std_dev_series =\
    (
        price
        .rolling(window = window)
        .std()
    )

    price_high =\
    (
        price + 2*std_dev_series
    )

    price_low =\
    (
        price - 2*std_dev_series
    )

    return price_high, price_low

### Momentum

In [5]:
def generate_moving_avg(price_data: pd.Series,
                        window: int
                                              ) -> pd.Series:
    
    ma_series =\
    (
        pd.Series
        (   
            price_data
            .rolling(window = window)
            .mean(),

            name = 'MA' + str(window)
        )
    )

    return ma_series

In [6]:
# Exponential Moving Average

def EMA(price_data: pd.Series, 
        window: int
                            ) -> pd.Series:
    EMA = pd.Series(price_data
                    .ewm(span = window,
                         min_periods = window)
                    .mean(),
                    name = "EMA_" + str(window)
                    )
    return EMA

In [7]:
def generate_moving_avg_cross_signal(long_ma: pd.Series,
                                    short_ma: pd.Series) -> pd.Series:
    
    ## Sanity Check
    if len(long_ma) != len(short_ma):
        print('MA series lengths not equal, please check')
        return

    ## Return Signals
    else:

        moving_avg_cross_positions = np.where(short_ma > long_ma, 1.0, 0.0)
        moving_avg_cross_positions = np.where(short_ma < long_ma, -1.0 , moving_avg_cross_positions)

        moving_avg_cross_signals = np.where(moving_avg_cross_positions - np_shift(moving_avg_cross_positions,1) > 0, 1, 0)
        moving_avg_cross_signals = np.where(moving_avg_cross_positions - np_shift(moving_avg_cross_positions, 1) < 0, -1, moving_avg_cross_signals)

        buy_or_sell = pd.DataFrame({'MA_Cross_Signal':moving_avg_cross_signals, 'MA_Cross_Position': moving_avg_cross_positions},
                                   index = long_ma.index
                                   )

        return buy_or_sell

In [8]:
def generate_rate_of_change(price_data: pd.Series,
                            n: int
                            ) -> pd.Series:
    
    ROC = pd.Series(
                        (price_data - price_data.diff(n)) / price_data.diff(n),
                        name = 'ROC'+str(n),
                        # index = price_data.index
                    )
    
    return ROC

In [9]:
def generate_rate_of_change_signal(roc_data: pd.Series) -> pd.Series:

    roc_position = pd.Series(np.where(roc_data > 0, 1.0, 0.0), index=roc_data.index, name = 'ROC_Position')
    roc_signal = roc_position.diff()
    roc_signal.name = 'ROC_Signal'
    
    # roc_signal = roc_position - np_shift(roc_position, 1)


    # buy_or_sell = pd.DataFrame({'ROC_Position': roc_position, 'ROC_Signal': roc_signal},
    #                         #    index = roc_data.index
    #                            )

    return pd.concat([roc_position, roc_signal], axis=1)

In [10]:
def annual_sharpe(returns):
    days = (returns.index[-1] - returns.index[0]).days
    
    return\
    (
        (
            (1+returns).prod()
            **(365/days) 
            - 1
        )
        /
        returns.std()
        /
        np.sqrt(252)
    )

In [11]:
def RSI(series, period):
    
    delta = series.diff().dropna()
    
    u = delta * 0
    d = u.copy()
    
    u[delta > 0] = delta[delta > 0]
    d[delta < 0] = -delta[delta < 0]
    
    u[u.index[period - 1]] = np.mean( u[:period] ) # 
    
    u = u.drop(u.index[:(period - 1)
                      ]
              )
    
    d[d.index[period - 1]] = np.mean( d[:period] )
    
    d = d.drop(d.index[:(period - 1)
                      ]
              )
    
    rs = u.ewm(com = period - 1, adjust = False).mean() / \
         d.ewm(com = period - 1, adjust = False).mean()
    
    return 100 - 100 / (1 + rs)

## Performance Metrics

In [12]:
def annual_sharpe(returns):
    days = (returns.index[-1] - returns.index[0]).days
    
    return\
    (
        (
            (1+returns).prod()
            **(365/days) 
            - 1
        )
        /
        returns.std()
        /
        np.sqrt(252)
    )

In [13]:
## CAGR

def cagr(returns: pd.Series) -> float:
    days = (returns.index[-1] - returns.index[0]).days
    return ( (1 + returns).prod() )**(365/days) - 1   

In [14]:
### Max Drawdown

def max_drawdown(cumulative_returns):
    max_performance = cumulative_returns.cummax()
    dd = ((max_performance - cumulative_returns) / max_performance).max()
    return dd


### Longest Drawdown

def calculate_longest_drawdown(cumulative_returns):
    drawdown = cumulative_returns.cummax() - cumulative_returns
    period =\
    (
        np
        .diff(np
              .append(drawdown[drawdown == 0].index, 
                      drawdown.index[-1: ]
                    )
            )
    )
    return period.max() / np.timedelta64(1, "D")

In [15]:
def evaluate_returns(returns_series: pd.Series):
    
    cum_returns_series = (1 + returns_series).cumprod()

    tot_returns = (1 + returns_series).prod()
    CAGR = cagr(returns_series)
    Annualised_Sharpe = annual_sharpe(returns_series)
    Max_DD = max_drawdown(cum_returns_series)
    Longest_DD = calculate_longest_drawdown(cum_returns_series)


    print('-- Summary of Returns -- \n',
          f'Total Returns: {tot_returns: .2%} \n',
          f'CAGR: {CAGR: .2%} \n',
          f'Annualised_Sharpe: {Annualised_Sharpe: .2%} \n',
          f'Max Drawdown: {Max_DD: .2%} \n',
          f'Longest Drawdown (Days): {Longest_DD}'            
          )

    return pd.Series([tot_returns, CAGR, Annualised_Sharpe, Max_DD, Longest_DD])

## Packages

In [16]:
## Data Download

import yfinance as yf
import numpy as np
import pandas as pd
from datetime import datetime

### Visualisation

## Download Dataset

In [18]:
train_proportion = 0.75

spy_data =\
(
    download_data('SPY',
                  start_date = '2006-11-01',
                  end_date = '2025-11-12')
    .droplevel(level = 1,
               axis = 1)
)

spy_train_data = spy_data[:int(train_proportion*len(spy_data))]
spy_test_data = spy_data[int(train_proportion*len(spy_data)):]


spy_data_close = spy_train_data['Close'].to_frame()
spy_data_returns = spy_train_data['Close'].pct_change().to_frame().rename(columns= {'Close': 'Returns'})

  yf.download(tickers = ticker,
[*********************100%***********************]  1 of 1 completed


In [19]:
spy_data_returns

Unnamed: 0_level_0,Returns
Date,Unnamed: 1_level_1
2006-11-01,
2006-11-02,-0.000585
2006-11-03,-0.001754
2006-11-06,0.011278
2006-11-07,0.003838
...,...
2021-02-01,0.016646
2021-02-02,0.014140
2021-02-03,0.000786
2021-02-04,0.011366


In [20]:
evaluate_returns(spy_data_returns['Returns'])

-- Summary of Returns -- 
 Total Returns:  378.22% 
 CAGR:  9.77% 
 Annualised_Sharpe:  47.66% 
 Max Drawdown:  55.19% 
 Longest Drawdown (Days): 1773.0


0       3.782159
1       0.097678
2       0.476599
3       0.551895
4    1773.000000
dtype: float64

## Momentum Strategies

In [21]:
time_periods = np.arange(10, 201, 10).tolist()

#### ROC

In [22]:
def generate_ROC_Metrics(time_periods):

    ROC_Metrics = pd.DataFrame()

    for i in time_periods:
        roc_data = spy_data_returns.copy()
        
        roc_data[f'ROC{i}'] = generate_rate_of_change(spy_data_close['Close'], i)
        roc_data = pd.concat([roc_data, generate_rate_of_change_signal(roc_data[f'ROC{i}'])], axis = 1)
        roc_data['Strat_returns'] = roc_data['Returns'] * roc_data['ROC_Position'].shift(1)
        
        print(f'===Data for ROC{i}===')
        roc_series = evaluate_returns(roc_data['Strat_returns'])
        roc_series.name = f'ROC{i}'
        
        ROC_Metrics = pd.concat([ROC_Metrics, roc_series], axis = 1)

    ROC_Metrics.index = ['Total Returns', 'CAGR', 'Annualised Sharpe', 'Max Drawdown', 'Longest Drawdown (Days)']
    return ROC_Metrics.T

In [23]:
generate_ROC_Metrics(time_periods)

===Data for ROC10===
-- Summary of Returns -- 
 Total Returns:  168.31% 
 CAGR:  3.71% 
 Annualised_Sharpe:  31.33% 
 Max Drawdown:  45.35% 
 Longest Drawdown (Days): 4178.0
===Data for ROC20===
-- Summary of Returns -- 
 Total Returns:  248.25% 
 CAGR:  6.58% 
 Annualised_Sharpe:  59.26% 
 Max Drawdown:  26.66% 
 Longest Drawdown (Days): 864.0
===Data for ROC30===
-- Summary of Returns -- 
 Total Returns:  222.98% 
 CAGR:  5.78% 
 Annualised_Sharpe:  51.64% 
 Max Drawdown:  28.71% 
 Longest Drawdown (Days): 1318.0
===Data for ROC40===
-- Summary of Returns -- 
 Total Returns:  268.57% 
 CAGR:  7.17% 
 Annualised_Sharpe:  62.99% 
 Max Drawdown:  33.84% 
 Longest Drawdown (Days): 1205.0
===Data for ROC50===
-- Summary of Returns -- 
 Total Returns:  230.08% 
 CAGR:  6.01% 
 Annualised_Sharpe:  53.33% 
 Max Drawdown:  24.29% 
 Longest Drawdown (Days): 1023.0
===Data for ROC60===
-- Summary of Returns -- 
 Total Returns:  289.40% 
 CAGR:  7.73% 
 Annualised_Sharpe:  67.89% 
 Max Drawdown:

Unnamed: 0,Total Returns,CAGR,Annualised Sharpe,Max Drawdown,Longest Drawdown (Days)
ROC10,1.683073,0.037147,0.313251,0.453507,4178.0
ROC20,2.482471,0.065773,0.592614,0.266624,864.0
ROC30,2.229756,0.057787,0.516356,0.287065,1318.0
ROC40,2.685681,0.071664,0.629922,0.338358,1205.0
ROC50,2.300761,0.060112,0.533286,0.242892,1023.0
ROC60,2.894003,0.077287,0.678903,0.163699,860.0
ROC70,1.83094,0.043283,0.376865,0.316276,1300.0
ROC80,2.144994,0.054918,0.484968,0.283964,1106.0
ROC90,2.06628,0.052159,0.457028,0.269074,936.0
ROC100,2.353112,0.061785,0.524381,0.237624,879.0


In [25]:
whos

Variable                           Type         Data/Info
---------------------------------------------------------
EMA                                function     <function EMA at 0x000001E4D1E93420>
RSI                                function     <function RSI at 0x000001E4D1E936A0>
annual_sharpe                      function     <function annual_sharpe at 0x000001E4D1E93BA0>
bollinger_band                     function     <function bollinger_band at 0x000001E4D1E932E0>
cagr                               function     <function cagr at 0x000001E4D1E93560>
calculate_longest_drawdown         function     <function calculate_longe<...>wn at 0x000001E4D1E93880>
datetime                           type         <class 'datetime.datetime'>
download_data                      function     <function download_data at 0x000001E4CEEB23E0>
evaluate_returns                   function     <function evaluate_returns at 0x000001E4D1E937E0>
generate_ROC_Metrics               function     <function genera

In [71]:
def generate_ma_cross_metrics(short_ma: list[int], long_ma: list[int]) -> pd.Series:
    
    ma_cross_data = spy_data_close.copy()

    ma_cross_summary_table = pd.DataFrame()

    for j in long_ma:

        for i in short_ma:

            if i < j:
            
                short_ma_cross_data = generate_moving_avg(ma_cross_data['Close'], i)
                long_ma_cross_data = generate_moving_avg(ma_cross_data['Close'], j)

                ma_cross_returns =\
                (
                    pd
                    .concat(
                            (spy_data_returns, generate_moving_avg_cross_signal(long_ma_cross_data, short_ma_cross_data)),
                            axis = 1
                            )
                )

                ma_cross_returns['Strat_Returns'] = ma_cross_returns['Returns']*ma_cross_returns['MA_Cross_Position'].shift(1)

                ma_cross_summary_stat_series = evaluate_returns(ma_cross_returns['Strat_Returns'])
                ma_cross_summary_stat_series.name = f'MA{i} + MA{j}'


                ma_cross_summary_table = pd.concat([ma_cross_summary_table, ma_cross_summary_stat_series], axis = 1)

    ma_cross_summary_table.index = ['Total Returns', 'CAGR', 'Annualised Sharpe', 'Max Drawdown', 'Longest Drawdown (Days)']
    return ma_cross_summary_table.T

In [85]:
MA_list =\
(
    list(range(10, 201, 10))
) 

In [93]:
ma_cross_summary = generate_ma_cross_metrics(MA_list, MA_list)
ma_cross_summary.sort_values(by = 'Annualised Sharpe', ascending = False).head(10)

-- Summary of Returns -- 
 Total Returns:  59.22% 
 CAGR: -3.60% 
 Annualised_Sharpe: -17.58% 
 Max Drawdown:  72.37% 
 Longest Drawdown (Days): 4484.0
-- Summary of Returns -- 
 Total Returns:  80.15% 
 CAGR: -1.54% 
 Annualised_Sharpe: -7.51% 
 Max Drawdown:  45.59% 
 Longest Drawdown (Days): 4460.0
-- Summary of Returns -- 
 Total Returns:  137.62% 
 CAGR:  2.26% 
 Annualised_Sharpe:  11.04% 
 Max Drawdown:  37.04% 
 Longest Drawdown (Days): 1886.0
-- Summary of Returns -- 
 Total Returns:  65.60% 
 CAGR: -2.91% 
 Annualised_Sharpe: -14.20% 
 Max Drawdown:  52.58% 
 Longest Drawdown (Days): 4460.0
-- Summary of Returns -- 
 Total Returns:  77.97% 
 CAGR: -1.73% 
 Annualised_Sharpe: -8.43% 
 Max Drawdown:  59.85% 
 Longest Drawdown (Days): 4460.0
-- Summary of Returns -- 
 Total Returns:  96.25% 
 CAGR: -0.27% 
 Annualised_Sharpe: -1.30% 
 Max Drawdown:  44.87% 
 Longest Drawdown (Days): 4460.0
-- Summary of Returns -- 
 Total Returns:  72.30% 
 CAGR: -2.25% 
 Annualised_Sharpe: -10.

Unnamed: 0,Total Returns,CAGR,Annualised Sharpe,Max Drawdown,Longest Drawdown (Days)
MA180 + MA200,2.594055,0.069061,0.340667,0.463585,2496.0
MA10 + MA80,2.516408,0.066787,0.326693,0.277263,964.0
MA180 + MA190,2.436755,0.064386,0.317043,0.462252,2494.0
MA110 + MA120,2.402293,0.063324,0.310285,0.436644,1106.0
MA130 + MA180,2.300332,0.060098,0.295255,0.46307,3073.0
MA40 + MA50,2.219692,0.057451,0.280492,0.269,931.0
MA170 + MA200,2.167825,0.055701,0.274739,0.425095,2174.0
MA130 + MA190,2.132212,0.054477,0.268231,0.423252,2171.0
MA150 + MA170,2.11602,0.053914,0.264716,0.422551,2167.0
MA120 + MA190,2.085899,0.052856,0.260247,0.453921,2530.0


MA50 + MA150     1.798223
MA150 + MA200    1.793303
MA50 + MA200     1.630600
MA100 + MA150    1.469398
MA100 + MA200    1.256069
MA50 + MA100     1.240042
Name: Total Returns, dtype: float64

In [None]:
short_ma_test = generate_moving_avg(ma_cross_data['Close'], 20)

In [41]:
short_ma_test

Date
2006-11-01           NaN
2006-11-02           NaN
2006-11-03           NaN
2006-11-06           NaN
2006-11-07           NaN
                 ...    
2021-02-01    353.972536
2021-02-02    354.569853
2021-02-03    355.062314
2021-02-04    355.654013
2021-02-05    356.057059
Name: MA20, Length: 3590, dtype: float64