In [1]:
import pandas as pd
import numpy as np


In [2]:
df = pd.read_csv('datasets/industry_daily.csv')
df.head()

Unnamed: 0,Date,NoDur,Durbl,Manuf,Enrgy,HiTec,Telcm,Shops,Hlth,Utils,Other
0,19260701,0.02,-0.28,-0.23,0.57,-0.21,-0.02,-0.01,0.97,0.61,0.2
1,19260702,0.29,1.07,0.81,0.64,0.36,0.26,0.01,0.13,0.47,0.1
2,19260706,0.24,0.72,0.22,0.17,0.47,0.17,-0.23,0.23,0.73,-0.18
3,19260707,0.27,0.06,0.23,-0.04,-0.1,0.32,-0.59,0.33,0.17,0.16
4,19260708,0.69,0.05,0.15,0.12,0.35,0.4,-0.36,0.91,-0.2,0.39


In [3]:
df.tail()

Unnamed: 0,Date,NoDur,Durbl,Manuf,Enrgy,HiTec,Telcm,Shops,Hlth,Utils,Other
25977,20250424,-0.69,3.33,1.66,1.48,3.28,0.7,1.56,1.97,0.43,1.59
25978,20250425,-0.59,7.15,0.26,-0.07,1.56,-2.81,0.36,0.67,-0.33,-0.44
25979,20250428,-0.3,0.3,0.17,0.63,-0.23,0.99,-0.17,0.53,0.76,0.34
25980,20250429,0.71,1.68,0.61,-0.4,0.51,1.12,0.45,0.79,0.58,0.7
25981,20250430,0.7,-2.73,0.28,-2.42,0.1,1.27,-0.17,0.93,-0.9,0.18


In [4]:
start_date = pd.to_datetime('2000-01-01')
end_date = pd.to_datetime('2025-04-30')


# Convert Date column to datetime
df['Date'] = pd.to_datetime(df['Date'], format='%Y%m%d')

# Filter between start and end dates
df = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)].reset_index(drop=True)


In [5]:
T, N = df.shape
N = N - 1
df.head()

Unnamed: 0,Date,NoDur,Durbl,Manuf,Enrgy,HiTec,Telcm,Shops,Hlth,Utils,Other
0,2000-01-03,-1.65,-1.36,-2.49,-3.08,2.47,-1.26,-2.3,-0.58,-2.37,-3.31
1,2000-01-04,-1.81,-2.6,-2.68,-1.96,-5.34,-4.4,-3.28,-4.39,0.47,-3.68
2,2000-01-05,-0.13,-0.04,0.57,2.26,-1.09,0.01,-0.38,2.32,3.48,-0.38
3,2000-01-06,0.68,0.58,2.17,4.0,-4.93,-1.92,0.24,2.68,0.57,2.34
4,2000-01-07,2.11,4.81,3.21,1.16,3.51,0.93,4.27,7.61,1.13,1.99


In [6]:
def getData(df, start, M):
    """returns excess returns for the N assets over a window of M periods from start"""
    return df.loc[start : (start + M - 1), df.columns != "Date"].astype(float).values


def naiveStrategy(N):
    """returns equal weights 1/N for N risky assets"""
    return np.ones(N) / N


def optimalWeights(m, c):
    """returns optimal normalized weights (equation 3 in DeMiguel)
    given mean vector m and covariance matrix c for N risky assets"""
    covI = np.linalg.inv(c) # inverse of covariance
    w = np.matmul(covI, m) # unnormalized optimal weights
    return w/sum(w) # normalized optimal weights


def meanVarianceStrategy(x):
    """returns optimal normalized weights for N risky assets 
    using Markowitz (1952) mean-variance strategy
    based on sample mean and covariance matrix of observations x"""
    m = np.mean(x,axis=0) # mean vector
    c = np.cov(x, rowvar=False)
    return optimalWeights(m, c)


def minVarianceStrategy(x):
    """Minimum variance portfolio (only uses covariance matrix)"""
    c = np.cov(x, rowvar=False)  # Covariance matrix
    ones = np.ones(len(c))       # Vector of ones
    covI = np.linalg.inv(c)
    w = np.matmul(covI, ones)    # Same as DeMiguel equation (8)
    return w/w.sum()             # Normalized weights


def returns(df, t, w):
    """Given weight vector w, computes returns in period t"""
    x = getData(df, t, 1)
    return np.matmul(x, w).item()

def SharpeRatio(returns):
    """returns Sharpe ratio given returns""" 
    m, s = np.mean(returns), np.std(returns)
    return m/s

def evaluateNaiveStrategy(M):
    w = naiveStrategy(N) # fixed weights
    res = []
    for t in range(T-M-2):
        x = getData(df, t, M)
        r = returns(df, t+M+1, w)
        res.append(r)
    SR = SharpeRatio(res)
    print('Sharpe Ratio for Naive Strategy = %4.4f' %SR)
    return SR

def evaluateMeanVarianceStrategy(M):
    resInSample, resOutOfSample = [], []
    for t in range(T-M-2):
        x = getData(df, t, M)
        w = meanVarianceStrategy(x)
        ri = returns(df, t+M, w)
        ro = returns(df, t+M+1, w)
        resInSample.append(ri)
        resOutOfSample.append(ro)
    SRI, SRO = SharpeRatio(resInSample), SharpeRatio(resOutOfSample)
    print(f'Sharpe Ratio for Mean Variance Strategy = {SRO:.4f}')
    return SRI, SRO


def evaluateMinVarianceStrategy(M):
    resOutOfSample = []
    for t in range(T-M-2):
        x = getData(df, t, M)
        w = minVarianceStrategy(x)
        ro = returns(df, t+M+1, w)
        resOutOfSample.append(ro)
    SR = SharpeRatio(resOutOfSample)
    print(f'Sharpe Ratio for Min Variance = {SR:.4f}')
    return SR

In [7]:

for M in range(60, 601, 60):
    print('\nWindow size = %d' % M)
    SR_naive = evaluateNaiveStrategy(M)
    SR_mv_insample, SR_mv = evaluateMeanVarianceStrategy(M)
    SR_minvar = evaluateMinVarianceStrategy(M)


Window size = 60
Sharpe Ratio for Naive Strategy = 0.0343
Sharpe Ratio for Mean Variance Strategy = 0.0016
Sharpe Ratio for Min Variance = 0.0462

Window size = 120
Sharpe Ratio for Naive Strategy = 0.0347
Sharpe Ratio for Mean Variance Strategy = 0.0166
Sharpe Ratio for Min Variance = 0.0462

Window size = 180
Sharpe Ratio for Naive Strategy = 0.0345
Sharpe Ratio for Mean Variance Strategy = -0.0042
Sharpe Ratio for Min Variance = 0.0453

Window size = 240
Sharpe Ratio for Naive Strategy = 0.0350
Sharpe Ratio for Mean Variance Strategy = -0.0077
Sharpe Ratio for Min Variance = 0.0471

Window size = 300
Sharpe Ratio for Naive Strategy = 0.0358
Sharpe Ratio for Mean Variance Strategy = 0.0049
Sharpe Ratio for Min Variance = 0.0450

Window size = 360
Sharpe Ratio for Naive Strategy = 0.0353
Sharpe Ratio for Mean Variance Strategy = 0.0099
Sharpe Ratio for Min Variance = 0.0442

Window size = 420
Sharpe Ratio for Naive Strategy = 0.0369
Sharpe Ratio for Mean Variance Strategy = -0.0152
S