In [1]:
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
from dask import dataframe as dd
from dask.diagnostics import ProgressBar
pbar = ProgressBar()
pbar.register()
from multiprocessing import cpu_count
import statsmodels.api as sm
import math
import matplotlib.pyplot as plt
from tqdm import tqdm, tqdm_notebook
import scipy.stats as stats
from IPython.display import display
from IPython.core.debugger import set_trace as bp
import os
from statsmodels.tsa.stattools import adfuller

%matplotlib inline

from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_curve, auc

from sklearn.utils import resample
from sklearn.utils import shuffle
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor, BaggingClassifier
from sklearn.model_selection import train_test_split

import pyfolio as pf
from scipy import interp

from random import gauss
import scipy.stats as ss

  ' to position notionals.'


# Chapter 11 - The Dangers of Backtesting

In [2]:
def testAccuracy_MC(sr_base, sr_case):
    """
    Test the accuracy of CSCV against hold-out
    It generates numTrials random samples and directly computes the _____
    ____ proportion where OOS performance was below the median
    """
    length, numTrials, numMC = 1000, 100, 1000

    # 1) Determine mu, sigma
    mu_base, sigma_base = sr_base / (365.25 * 5 / 7.00), 1 / (365.25 * 5 / 7.00) ** 0.5

    mu_case, sigma_case = sr_case / (365.25 * 5 / 7.00), 1 / (365.25 * 5 / 7.00) ** 0.5

    hist, probOverfit = [], 0

    # 2) Generate trials
    # M Matrix build from gaussian distribution
    for m in range(numMC):
        for i in range(1, numTrials):
            j = np.array([gauss(0, 1) for j in range(length)])
            j *= sigma_base / np.std(j) # re-scale
            j += mu_base - np.mean(j) # re-center
            j = np.reshape(j, (j.shape[0], 1))

            if i == 1:
                pnl = np.copy(j)
            else:
                pnl = np.append(pnl, j, axis=1)
    # 3) Add test case
        j = np.array([gauss(0, 1) for j in range(length)])
        j *= sigma_case / np.std(j) # re-scale
        j += mu_case - np.mean(j)
        j = np.reshape(j, (j.shape[0], 1))
        pnl = np.append(pnl, j, axis=1)

    # 4) Run test
        mu_is = [np.average(pnl[:length//2, i]) for i in range(pnl.shape[1])]
        sigma_is = [np.std(pnl[:length//2, i]) for i in range(pnl.shape[1])]

        mu_oos = [np.average(pnl[length//2:, i]) for i in range(pnl.shape[1])]
        sigma_oos = [np.std(pnl[length//2:,i]) for i in range(pnl.shape[1])]

        sr_is = [mu_is[i]/ sigma_is[i] for i in range(len(mu_is))]
        sr_oos = [mu_oos[i]/sigma_oos[i] for i in range(len(mu_oos))]

        print('# matrix:', m, '# max index:', sr_is.index(max(sr_is)), '# Max_IS_sharpe_ratio:', round(max(sr_is), 4), '# Max_OOS_sharpe_ratio:', round(sr_oos[sr_is.index(max(sr_is))], 4))

        sr_oos_ = sr_oos[sr_is.index(max(sr_is))]

        # out of sample result saved 
        hist.append(sr_oos_)

        # over fitting count 
        if sr_oos_ < np.median(sr_oos):
            probOverfit += 1

    probOverfit /= float(numMC)

    print(probOverfit)
    return

In [3]:
testAccuracy_MC(1000, 900)

# matrix: 0 # max index: 27 # Max_IS_sharpe_ratio: 66.3233 # Max_OOS_sharpe_ratio: 58.2854
# matrix: 1 # max index: 20 # Max_IS_sharpe_ratio: 64.7249 # Max_OOS_sharpe_ratio: 59.6277
# matrix: 2 # max index: 57 # Max_IS_sharpe_ratio: 66.6072 # Max_OOS_sharpe_ratio: 58.0977
# matrix: 3 # max index: 46 # Max_IS_sharpe_ratio: 66.385 # Max_OOS_sharpe_ratio: 58.2543
# matrix: 4 # max index: 10 # Max_IS_sharpe_ratio: 64.7693 # Max_OOS_sharpe_ratio: 59.4709
# matrix: 5 # max index: 80 # Max_IS_sharpe_ratio: 66.0362 # Max_OOS_sharpe_ratio: 58.4929
# matrix: 6 # max index: 73 # Max_IS_sharpe_ratio: 65.1139 # Max_OOS_sharpe_ratio: 59.1572
# matrix: 7 # max index: 55 # Max_IS_sharpe_ratio: 66.2876 # Max_OOS_sharpe_ratio: 58.3237
# matrix: 8 # max index: 27 # Max_IS_sharpe_ratio: 64.6875 # Max_OOS_sharpe_ratio: 59.5364
# matrix: 9 # max index: 37 # Max_IS_sharpe_ratio: 65.2381 # Max_OOS_sharpe_ratio: 59.0523
# matrix: 10 # max index: 18 # Max_IS_sharpe_ratio: 66.0282 # Max_OOS_sharpe_ratio: 58.5728

# matrix: 89 # max index: 6 # Max_IS_sharpe_ratio: 64.9801 # Max_OOS_sharpe_ratio: 59.3234
# matrix: 90 # max index: 51 # Max_IS_sharpe_ratio: 65.2111 # Max_OOS_sharpe_ratio: 59.0967
# matrix: 91 # max index: 93 # Max_IS_sharpe_ratio: 65.79 # Max_OOS_sharpe_ratio: 58.7025
# matrix: 92 # max index: 45 # Max_IS_sharpe_ratio: 66.4635 # Max_OOS_sharpe_ratio: 58.1826
# matrix: 93 # max index: 43 # Max_IS_sharpe_ratio: 65.2939 # Max_OOS_sharpe_ratio: 59.0086
# matrix: 94 # max index: 31 # Max_IS_sharpe_ratio: 65.2278 # Max_OOS_sharpe_ratio: 59.0972
# matrix: 95 # max index: 83 # Max_IS_sharpe_ratio: 64.9059 # Max_OOS_sharpe_ratio: 59.3045
# matrix: 96 # max index: 38 # Max_IS_sharpe_ratio: 64.661 # Max_OOS_sharpe_ratio: 59.4879
# matrix: 97 # max index: 16 # Max_IS_sharpe_ratio: 64.8148 # Max_OOS_sharpe_ratio: 59.3693
# matrix: 98 # max index: 83 # Max_IS_sharpe_ratio: 65.056 # Max_OOS_sharpe_ratio: 59.2439
# matrix: 99 # max index: 16 # Max_IS_sharpe_ratio: 65.8579 # Max_OOS_sharpe_ratio: 5

# matrix: 177 # max index: 39 # Max_IS_sharpe_ratio: 64.6413 # Max_OOS_sharpe_ratio: 59.5366
# matrix: 178 # max index: 27 # Max_IS_sharpe_ratio: 65.0854 # Max_OOS_sharpe_ratio: 59.1623
# matrix: 179 # max index: 5 # Max_IS_sharpe_ratio: 65.5911 # Max_OOS_sharpe_ratio: 58.7895
# matrix: 180 # max index: 64 # Max_IS_sharpe_ratio: 65.0619 # Max_OOS_sharpe_ratio: 59.1834
# matrix: 181 # max index: 14 # Max_IS_sharpe_ratio: 65.7722 # Max_OOS_sharpe_ratio: 58.8497
# matrix: 182 # max index: 70 # Max_IS_sharpe_ratio: 65.36 # Max_OOS_sharpe_ratio: 59.0952
# matrix: 183 # max index: 45 # Max_IS_sharpe_ratio: 65.9007 # Max_OOS_sharpe_ratio: 58.6421
# matrix: 184 # max index: 85 # Max_IS_sharpe_ratio: 65.5599 # Max_OOS_sharpe_ratio: 58.8177
# matrix: 185 # max index: 10 # Max_IS_sharpe_ratio: 64.6523 # Max_OOS_sharpe_ratio: 59.4958
# matrix: 186 # max index: 13 # Max_IS_sharpe_ratio: 65.8937 # Max_OOS_sharpe_ratio: 58.6631
# matrix: 187 # max index: 46 # Max_IS_sharpe_ratio: 64.9032 # Max_OOS_sh

# matrix: 265 # max index: 22 # Max_IS_sharpe_ratio: 66.3435 # Max_OOS_sharpe_ratio: 58.263
# matrix: 266 # max index: 23 # Max_IS_sharpe_ratio: 65.5426 # Max_OOS_sharpe_ratio: 58.8367
# matrix: 267 # max index: 92 # Max_IS_sharpe_ratio: 66.1168 # Max_OOS_sharpe_ratio: 58.4568
# matrix: 268 # max index: 86 # Max_IS_sharpe_ratio: 65.3318 # Max_OOS_sharpe_ratio: 58.9839
# matrix: 269 # max index: 87 # Max_IS_sharpe_ratio: 64.277 # Max_OOS_sharpe_ratio: 59.9081
# matrix: 270 # max index: 5 # Max_IS_sharpe_ratio: 66.1234 # Max_OOS_sharpe_ratio: 58.4288
# matrix: 271 # max index: 73 # Max_IS_sharpe_ratio: 65.2514 # Max_OOS_sharpe_ratio: 59.1265
# matrix: 272 # max index: 78 # Max_IS_sharpe_ratio: 65.0727 # Max_OOS_sharpe_ratio: 59.17
# matrix: 273 # max index: 1 # Max_IS_sharpe_ratio: 65.0756 # Max_OOS_sharpe_ratio: 59.1737
# matrix: 274 # max index: 28 # Max_IS_sharpe_ratio: 65.2339 # Max_OOS_sharpe_ratio: 59.1129
# matrix: 275 # max index: 73 # Max_IS_sharpe_ratio: 65.3811 # Max_OOS_sharp

# matrix: 353 # max index: 59 # Max_IS_sharpe_ratio: 66.3232 # Max_OOS_sharpe_ratio: 58.3577
# matrix: 354 # max index: 59 # Max_IS_sharpe_ratio: 67.9477 # Max_OOS_sharpe_ratio: 57.2422
# matrix: 355 # max index: 54 # Max_IS_sharpe_ratio: 65.1544 # Max_OOS_sharpe_ratio: 59.1805
# matrix: 356 # max index: 76 # Max_IS_sharpe_ratio: 65.8516 # Max_OOS_sharpe_ratio: 58.8021
# matrix: 357 # max index: 10 # Max_IS_sharpe_ratio: 65.7774 # Max_OOS_sharpe_ratio: 58.6595
# matrix: 358 # max index: 40 # Max_IS_sharpe_ratio: 65.5332 # Max_OOS_sharpe_ratio: 58.8318
# matrix: 359 # max index: 16 # Max_IS_sharpe_ratio: 66.0226 # Max_OOS_sharpe_ratio: 58.483
# matrix: 360 # max index: 40 # Max_IS_sharpe_ratio: 66.9264 # Max_OOS_sharpe_ratio: 57.9105
# matrix: 361 # max index: 36 # Max_IS_sharpe_ratio: 65.7326 # Max_OOS_sharpe_ratio: 58.9028
# matrix: 362 # max index: 92 # Max_IS_sharpe_ratio: 66.2934 # Max_OOS_sharpe_ratio: 58.2988
# matrix: 363 # max index: 48 # Max_IS_sharpe_ratio: 65.6993 # Max_OOS_

# matrix: 441 # max index: 12 # Max_IS_sharpe_ratio: 64.7259 # Max_OOS_sharpe_ratio: 59.4358
# matrix: 442 # max index: 1 # Max_IS_sharpe_ratio: 66.0802 # Max_OOS_sharpe_ratio: 58.5429
# matrix: 443 # max index: 72 # Max_IS_sharpe_ratio: 66.2449 # Max_OOS_sharpe_ratio: 58.3318
# matrix: 444 # max index: 48 # Max_IS_sharpe_ratio: 66.8404 # Max_OOS_sharpe_ratio: 57.9435
# matrix: 445 # max index: 58 # Max_IS_sharpe_ratio: 66.746 # Max_OOS_sharpe_ratio: 57.9944
# matrix: 446 # max index: 47 # Max_IS_sharpe_ratio: 66.0456 # Max_OOS_sharpe_ratio: 58.5162
# matrix: 447 # max index: 34 # Max_IS_sharpe_ratio: 66.3666 # Max_OOS_sharpe_ratio: 58.2517
# matrix: 448 # max index: 68 # Max_IS_sharpe_ratio: 65.0732 # Max_OOS_sharpe_ratio: 59.1693
# matrix: 449 # max index: 0 # Max_IS_sharpe_ratio: 65.0347 # Max_OOS_sharpe_ratio: 59.2037
# matrix: 450 # max index: 26 # Max_IS_sharpe_ratio: 65.419 # Max_OOS_sharpe_ratio: 58.9484
# matrix: 451 # max index: 93 # Max_IS_sharpe_ratio: 64.9222 # Max_OOS_sha

# matrix: 529 # max index: 29 # Max_IS_sharpe_ratio: 65.1196 # Max_OOS_sharpe_ratio: 59.1495
# matrix: 530 # max index: 85 # Max_IS_sharpe_ratio: 65.8064 # Max_OOS_sharpe_ratio: 58.763
# matrix: 531 # max index: 89 # Max_IS_sharpe_ratio: 65.0296 # Max_OOS_sharpe_ratio: 59.2587
# matrix: 532 # max index: 45 # Max_IS_sharpe_ratio: 66.5785 # Max_OOS_sharpe_ratio: 58.1332
# matrix: 533 # max index: 92 # Max_IS_sharpe_ratio: 64.8888 # Max_OOS_sharpe_ratio: 59.311
# matrix: 534 # max index: 61 # Max_IS_sharpe_ratio: 67.6549 # Max_OOS_sharpe_ratio: 57.5348
# matrix: 535 # max index: 26 # Max_IS_sharpe_ratio: 66.049 # Max_OOS_sharpe_ratio: 58.5592
# matrix: 536 # max index: 59 # Max_IS_sharpe_ratio: 66.9854 # Max_OOS_sharpe_ratio: 57.8622
# matrix: 537 # max index: 12 # Max_IS_sharpe_ratio: 65.0786 # Max_OOS_sharpe_ratio: 59.2108
# matrix: 538 # max index: 11 # Max_IS_sharpe_ratio: 66.8505 # Max_OOS_sharpe_ratio: 57.9313
# matrix: 539 # max index: 60 # Max_IS_sharpe_ratio: 66.8083 # Max_OOS_sh

# matrix: 617 # max index: 29 # Max_IS_sharpe_ratio: 66.2273 # Max_OOS_sharpe_ratio: 58.398
# matrix: 618 # max index: 69 # Max_IS_sharpe_ratio: 65.6064 # Max_OOS_sharpe_ratio: 58.7788
# matrix: 619 # max index: 25 # Max_IS_sharpe_ratio: 67.321 # Max_OOS_sharpe_ratio: 57.6565
# matrix: 620 # max index: 80 # Max_IS_sharpe_ratio: 66.3476 # Max_OOS_sharpe_ratio: 58.5224
# matrix: 621 # max index: 56 # Max_IS_sharpe_ratio: 65.5248 # Max_OOS_sharpe_ratio: 58.8762
# matrix: 622 # max index: 97 # Max_IS_sharpe_ratio: 65.9272 # Max_OOS_sharpe_ratio: 58.5658
# matrix: 623 # max index: 30 # Max_IS_sharpe_ratio: 64.7469 # Max_OOS_sharpe_ratio: 59.4192
# matrix: 624 # max index: 64 # Max_IS_sharpe_ratio: 64.9414 # Max_OOS_sharpe_ratio: 59.3233
# matrix: 625 # max index: 54 # Max_IS_sharpe_ratio: 66.6494 # Max_OOS_sharpe_ratio: 58.0706
# matrix: 626 # max index: 85 # Max_IS_sharpe_ratio: 65.0248 # Max_OOS_sharpe_ratio: 59.2668
# matrix: 627 # max index: 17 # Max_IS_sharpe_ratio: 65.7543 # Max_OOS_s

# matrix: 705 # max index: 70 # Max_IS_sharpe_ratio: 65.1193 # Max_OOS_sharpe_ratio: 59.2717
# matrix: 706 # max index: 48 # Max_IS_sharpe_ratio: 68.2976 # Max_OOS_sharpe_ratio: 57.0738
# matrix: 707 # max index: 30 # Max_IS_sharpe_ratio: 65.5031 # Max_OOS_sharpe_ratio: 58.952
# matrix: 708 # max index: 43 # Max_IS_sharpe_ratio: 65.4913 # Max_OOS_sharpe_ratio: 58.8939
# matrix: 709 # max index: 71 # Max_IS_sharpe_ratio: 67.4038 # Max_OOS_sharpe_ratio: 57.7626
# matrix: 710 # max index: 20 # Max_IS_sharpe_ratio: 66.2621 # Max_OOS_sharpe_ratio: 58.3248
# matrix: 711 # max index: 77 # Max_IS_sharpe_ratio: 66.5771 # Max_OOS_sharpe_ratio: 58.1944
# matrix: 712 # max index: 44 # Max_IS_sharpe_ratio: 66.2363 # Max_OOS_sharpe_ratio: 58.4968
# matrix: 713 # max index: 8 # Max_IS_sharpe_ratio: 65.2373 # Max_OOS_sharpe_ratio: 59.0821
# matrix: 714 # max index: 73 # Max_IS_sharpe_ratio: 66.5912 # Max_OOS_sharpe_ratio: 58.1068
# matrix: 715 # max index: 87 # Max_IS_sharpe_ratio: 66.462 # Max_OOS_sh

# matrix: 793 # max index: 37 # Max_IS_sharpe_ratio: 66.722 # Max_OOS_sharpe_ratio: 58.0613
# matrix: 794 # max index: 36 # Max_IS_sharpe_ratio: 66.7117 # Max_OOS_sharpe_ratio: 58.1384
# matrix: 795 # max index: 48 # Max_IS_sharpe_ratio: 64.9737 # Max_OOS_sharpe_ratio: 59.2484
# matrix: 796 # max index: 4 # Max_IS_sharpe_ratio: 66.9199 # Max_OOS_sharpe_ratio: 57.8962
# matrix: 797 # max index: 3 # Max_IS_sharpe_ratio: 65.2661 # Max_OOS_sharpe_ratio: 59.139
# matrix: 798 # max index: 14 # Max_IS_sharpe_ratio: 65.6982 # Max_OOS_sharpe_ratio: 58.7133
# matrix: 799 # max index: 70 # Max_IS_sharpe_ratio: 64.9837 # Max_OOS_sharpe_ratio: 59.2494
# matrix: 800 # max index: 2 # Max_IS_sharpe_ratio: 65.307 # Max_OOS_sharpe_ratio: 59.0556
# matrix: 801 # max index: 71 # Max_IS_sharpe_ratio: 66.4989 # Max_OOS_sharpe_ratio: 58.2659
# matrix: 802 # max index: 54 # Max_IS_sharpe_ratio: 66.1857 # Max_OOS_sharpe_ratio: 58.4584
# matrix: 803 # max index: 15 # Max_IS_sharpe_ratio: 65.3602 # Max_OOS_sharp

# matrix: 881 # max index: 39 # Max_IS_sharpe_ratio: 66.0689 # Max_OOS_sharpe_ratio: 58.4531
# matrix: 882 # max index: 38 # Max_IS_sharpe_ratio: 64.8906 # Max_OOS_sharpe_ratio: 59.5966
# matrix: 883 # max index: 49 # Max_IS_sharpe_ratio: 65.2482 # Max_OOS_sharpe_ratio: 59.0666
# matrix: 884 # max index: 94 # Max_IS_sharpe_ratio: 64.3798 # Max_OOS_sharpe_ratio: 59.7941
# matrix: 885 # max index: 62 # Max_IS_sharpe_ratio: 65.5578 # Max_OOS_sharpe_ratio: 58.8177
# matrix: 886 # max index: 48 # Max_IS_sharpe_ratio: 66.9838 # Max_OOS_sharpe_ratio: 57.8439
# matrix: 887 # max index: 33 # Max_IS_sharpe_ratio: 65.4673 # Max_OOS_sharpe_ratio: 58.8792
# matrix: 888 # max index: 67 # Max_IS_sharpe_ratio: 65.4491 # Max_OOS_sharpe_ratio: 58.9812
# matrix: 889 # max index: 68 # Max_IS_sharpe_ratio: 65.0546 # Max_OOS_sharpe_ratio: 59.184
# matrix: 890 # max index: 52 # Max_IS_sharpe_ratio: 66.3057 # Max_OOS_sharpe_ratio: 58.2915
# matrix: 891 # max index: 70 # Max_IS_sharpe_ratio: 66.1686 # Max_OOS_

# matrix: 969 # max index: 15 # Max_IS_sharpe_ratio: 65.601 # Max_OOS_sharpe_ratio: 58.7822
# matrix: 970 # max index: 44 # Max_IS_sharpe_ratio: 64.5545 # Max_OOS_sharpe_ratio: 59.6759
# matrix: 971 # max index: 67 # Max_IS_sharpe_ratio: 65.1791 # Max_OOS_sharpe_ratio: 59.2298
# matrix: 972 # max index: 93 # Max_IS_sharpe_ratio: 65.5571 # Max_OOS_sharpe_ratio: 58.9236
# matrix: 973 # max index: 37 # Max_IS_sharpe_ratio: 66.2898 # Max_OOS_sharpe_ratio: 58.3037
# matrix: 974 # max index: 95 # Max_IS_sharpe_ratio: 65.5325 # Max_OOS_sharpe_ratio: 58.8468
# matrix: 975 # max index: 30 # Max_IS_sharpe_ratio: 65.9511 # Max_OOS_sharpe_ratio: 58.7006
# matrix: 976 # max index: 15 # Max_IS_sharpe_ratio: 65.9766 # Max_OOS_sharpe_ratio: 58.5478
# matrix: 977 # max index: 21 # Max_IS_sharpe_ratio: 65.1341 # Max_OOS_sharpe_ratio: 59.1379
# matrix: 978 # max index: 48 # Max_IS_sharpe_ratio: 65.338 # Max_OOS_sharpe_ratio: 58.976
# matrix: 979 # max index: 98 # Max_IS_sharpe_ratio: 66.3088 # Max_OOS_sh

In [4]:
# 수치 적분 적용 # 

def testAccuracy_EVT(sr_base, sr_case):
    """
    Test accuracy by numerical integration
    It does the same as testAccuracy_MC, but through numerical 
    integration of the base and case distribution
    """
    
    # 1) Parameters
    parts = 1e4
    length = 1000
    freq = 365.25 * 5 / 7.
    minX = -10
    trials = 100
    emc=0.57721566490153286 # Euler-Mascheroni constant
    
    # 2) SR distributions
    dist_base = [sr_base, ((freq + 0.5 * sr_base ** 2) / length) ** 0.5]
    dist_case = [sr_case, ((freq + 0.5 * sr_case ** 2) / length) ** 0.5]
    
    # 3) Fit Gumbel (method of moments)
    maxList = []
    for x in range(int(parts)):
        max_ = max([gauss(dist_base[0], dist_base[1]) for i in range(trials)])
        maxList.append(max_)
        
    dist_base[1] = np.std(maxList) * 6 ** 0.5 / math.pi
    dist_base[0] = np.mean(maxList) - emc * dist_base[1]
    
    # 4) Integration
    prob1 = 0
    for x in np.linspace(minX * dist_case[1], 2 * dist_case[0] - sr_base, parts):
        f_x = ss.norm.pdf(x, dist_case[0], dist_case[1])
        F_y = 1 - ss.gumbel_r.cdf(x, dist_base[0], dist_base[1])
        prob1 += f_x * F_y
        
    prob1 *= (2 * dist_case[0] - sr_base - minX * dist_case[1]) / parts
    prob2 = 1 - ss.norm.cdf(2 * dist_case[0] - sr_base, dist_case[0], dist_case[1])
    print(dist_base, dist_case)
    print(prob1, prob2, prob1 + prob2)
    return

In [5]:
testAccuracy_EVT(1000, 900)

[1051.731162010095, 7.624741747139034] [900, 20.131092689100182]
3.435450915804261e-07 0.9999996607877335 1.000000004332825
