In [2]:
import numpy as np
import pandas as pd
import itertools
import collections
import math
import datetime
import bisect
import copy
import random
import time
import heapq
import scipy
from functools import reduce
from matplotlib import pyplot as plt
import os

In [4]:
path = 'D:/github/ST5188_StatisticalProject/results/earning_filter'
files = os.listdir(path)

In [40]:
from pandas import Series

def Percentage_Profit(X):
    '''
    Return investment strategy profit
    INPUT:
        X: series, strategy to evaluate
    OUTPUT:
        res: int, profit
    '''
    res = 100*((X[-1]/X[0])-1)
    return res

def Profit_Factor(X):
    '''
    Return investment strategy profit factor
    INPUT:
        X: series, strategy to evaluate
    OUTPUT:
        res: int, profit factor
    '''
    up = 0
    down = 0
    for t in range(1,len(X)):
        diff = X[t]-X[t-1]
        if diff > 0:
            up = up + diff
        else:
            down = down - diff
    res = up/down
    return res

def Percentage_Drawdown(X):
    '''
    Return investment strategy percentage drawdown
    INPUT:
        X: series, strategy to evaluate
    OUTPUT:
        res: int, percentage drawdown
    '''
    draw_p = 0
    draw = 0
    old_max = X[0]
    for t in range(1,len(X)):
        if X[t] > old_max:
            old_max = X[t]
        else:
            diff = old_max - X[t]
            if diff > draw:
                draw = diff
                draw_p = diff/old_max
    res = 100*draw_p
    return res

def Recovery_Factor(X):
    '''
    Return investment strategy recovery factor
    INPUT:
        X: series, strategy to evaluate
    OUTPUT:
        res: int, recovery factor
    '''
    total_profit = X[-1] - X[0]
    draw = 0
    old_max = X[0]
    for t in range(1,len(X)):
        if X[t] > old_max:
            old_max = X[t]
        else:
            diff = old_max - X[t]
            if diff > draw:
                draw = diff
    res = total_profit / draw
    return res

def Sharpe_Ratio(X):
    '''
    Return investment strategy sharpe ratio
    INPUT:
        X: series, strategy to evaluate
    OUTPUT:
        res: int, sharpe ratio
    '''
    from pandas import Series
    from numpy import sqrt, std
    
    X_rend = Series(X).pct_change()[1:]
    mu = ( X[-1]/X[0] )**(252/len(X)) - 1
    sigma = sqrt(252)*std(X_rend)
    res = mu / sigma
    return res

def Sortino_Ratio(X):
    '''
    Return investment strategy sortino ratio
    INPUT:
        X: series, strategy to evaluate
    OUTPUT:
        res: int, sortino ratio
    '''
    from numpy import sqrt
    from pandas import Series
    
    X_rend = Series(X).pct_change()[1:]
    downside_risk = X_rend[X_rend < X_rend.mean()].std()
    mu = ( X[-1]/X[0] )**(252/len(X)) - 1
    sigmaD = sqrt(252)*downside_risk
    res = mu / sigmaD
    return res


def Strategy_Evaluation(X, verbose=True, out=False, n_round=3):
    '''
    Evaluate investment strategy according to well-known metrics
    INPUT:
        X: series, strategy to evaluate
        verbose: bool, manages the verbosity. Default=True
        out: bool, if True, return metrics dict as output. Default=False
        n_round: int, number of decimals considered (if <=0, no round).Default=3
    OUTPUT:
        res: dict, metrics of the strategy
    '''
    from numpy import round
    Properties = {'Percentage Profit': Percentage_Profit(X),
            'Profit Factor': Profit_Factor(X),
            'Percentage Drawdown': Percentage_Drawdown(X),
            'Recovery Factor': Recovery_Factor(X),
            'Sharpe Ratio': Sharpe_Ratio(X),
            'Sortino Ratio': Sortino_Ratio(X)}
   
    
    return Properties

In [63]:
file_path = f'{path}/{file}'
with open(file_path, 'r') as file:
       for line in file:
            print(line.strip()[0])
                

D
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
N
D
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
N
D
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
N
D
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
N
D
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
N
D
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
N
D
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
N
D
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
N
D
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
N
D
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
N
D
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
N
D
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
N


In [95]:
def read_file(file_path):
    vals = [] 
    dates = []
    d = []
    count = 0
    with open(file_path, 'r') as file:
        next(file) 
        for line in file:
            if line.strip()[0] == "2":
                date, value = line.strip().split(maxsplit=1)
                #data.append([date, float(value)])
                vals.append(float(value))
                dates.append(date)
            else:
                if len(vals)>0:
                    series = pd.Series(vals,index=dates)
                    t = Strategy_Evaluation(series, verbose=True, out=False, n_round=3)
                    d.append(t)
                
                    vals = [] 
                    dates = []
    if len(vals)>0:
        series = pd.Series(vals,index=dates)
        t = Strategy_Evaluation(series, verbose=True, out=False, n_round=3)
        d.append(t)
    df =  pd.DataFrame(d)
    stats_df = pd.DataFrame({
                'mean': df.mean(),
                'std': df.std(),
                'wse': df.min()
            }).transpose()
    return stats_df
    

In [88]:
file = files[0] 
read_file(f'{path}/{file}')

Unnamed: 0,Percentage Profit,Profit Factor,Percentage Drawdown,Recovery Factor,Sharpe Ratio,Sortino Ratio
mean,0.089513,1.340749,0.671155,0.717684,-0.097759,0.414416
std,0.790689,1.225324,0.377702,2.278362,3.776331,9.537142
min,-0.786499,0.442572,0.152418,-0.777349,-4.327433,-14.314749


In [89]:
def add_file_index(df, file_name):
    df.index = pd.MultiIndex.from_product([[file_name], df.index], names=['Method', 'Perfermance'])
    return df

In [90]:
pd.concat({ f'earning_filter_{i/10}':add_file_index(read_file(f'{path}/{files[i]}'),'') for i in range(len(files))})


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Percentage Profit,Profit Factor,Percentage Drawdown,Recovery Factor,Sharpe Ratio,Sortino Ratio
Unnamed: 0_level_1,Method,Perfermance,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
earning_filter_0.0,,mean,0.089513,1.340749,0.671155,0.717684,-0.097759,0.414416
earning_filter_0.0,,std,0.790689,1.225324,0.377702,2.278362,3.776331,9.537142
earning_filter_0.0,,min,-0.786499,0.442572,0.152418,-0.777349,-4.327433,-14.314749
earning_filter_0.1,,mean,0.083142,1.257759,0.705463,0.542977,-0.236177,0.558863
earning_filter_0.1,,std,0.782582,1.070317,0.366793,1.877728,3.775002,8.424419
earning_filter_0.1,,min,-0.73936,0.439114,0.224529,-0.90092,-4.43563,-9.804078
earning_filter_0.2,,mean,0.076744,1.203349,0.7518,0.405689,-0.315748,0.501995
earning_filter_0.2,,std,0.783946,0.976351,0.349896,1.569667,3.755499,7.892986
earning_filter_0.2,,min,-0.692223,0.360298,0.311433,-0.904069,-5.373456,-7.931769
earning_filter_0.3,,mean,0.070319,1.164814,0.805616,0.320309,-0.367188,0.316149


In [71]:
df = pd.DataFrame({file:read_file(f'{path}/{file}') for file in files}).T

  series = pd.Series(vals,index=dates)
  series = pd.Series(vals,index=dates)
  series = pd.Series(vals,index=dates)
  series = pd.Series(vals,index=dates)
  series = pd.Series(vals,index=dates)
  series = pd.Series(vals,index=dates)
  series = pd.Series(vals,index=dates)
  series = pd.Series(vals,index=dates)
  series = pd.Series(vals,index=dates)
  series = pd.Series(vals,index=dates)
  series = pd.Series(vals,index=dates)
  series = pd.Series(vals,index=dates)
  series = pd.Series(vals,index=dates)
  series = pd.Series(vals,index=dates)
  series = pd.Series(vals,index=dates)
  series = pd.Series(vals,index=dates)
  series = pd.Series(vals,index=dates)
  series = pd.Series(vals,index=dates)
  series = pd.Series(vals,index=dates)
  series = pd.Series(vals,index=dates)
  series = pd.Series(vals,index=dates)
  series = pd.Series(vals,index=dates)
  series = pd.Series(vals,index=dates)
  series = pd.Series(vals,index=dates)
  series = pd.Series(vals,index=dates)
  series = pd.Series(vals

In [96]:
file1 = 'D:/github/ST5188_StatisticalProject/results/benchmark/benchmark_100_port_test_monthly.txt'
file2 = 'D:/github/ST5188_StatisticalProject/results/quaterly/benchmark_100_port_test_quaterly.txt'


names = ["monthly","quaterly"]
files = [file1,file2]
pd.concat({names[i]:add_file_index(read_file(files[i]),'') for i in range(2)})

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Percentage Profit,Profit Factor,Percentage Drawdown,Recovery Factor,Sharpe Ratio,Sortino Ratio
Unnamed: 0_level_1,Method,Perfermance,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
monthly,,mean,0.031206,1.037161,1.166719,0.149432,-0.53056,-0.464355
monthly,,std,1.021714,0.630244,0.356855,1.01372,3.433618,5.793339
monthly,,wse,-1.269941,0.319716,0.737387,-0.905379,-6.570996,-11.190404
quaterly,,mean,-0.080801,1.081326,0.670911,0.469488,-0.129583,0.018087
quaterly,,std,0.617651,0.570225,0.444895,1.562517,3.231765,5.922988
quaterly,,wse,-1.169454,0.462507,0.183757,-0.745792,-4.30697,-8.494166


In [101]:
path = 'D:/github/ST5188_StatisticalProject/results/regression'
files = os.listdir(path)
print(files)
names = ["benchmark","ensemble","svr"]
pd.concat({names[i]:add_file_index(read_file(f'{path}/{files[i]}'),'') for i in range(3)})

['benchmark_100_port_test_monthly.txt', 'Ensemble_statistics.txt', 'SVR_statistics.txt']


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Percentage Profit,Profit Factor,Percentage Drawdown,Recovery Factor,Sharpe Ratio,Sortino Ratio
Unnamed: 0_level_1,Method,Perfermance,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
benchmark,,mean,0.031206,1.037161,1.166719,0.149432,-0.53056,-0.464355
benchmark,,std,1.021714,0.630244,0.356855,1.01372,3.433618,5.793339
benchmark,,wse,-1.269941,0.319716,0.737387,-0.905379,-6.570996,-11.190404
ensemble,,mean,0.241888,1.415428,0.516062,0.791344,1.27211,2.138571
ensemble,,std,0.522592,0.72251,0.303332,1.421771,2.875217,5.229664
ensemble,,wse,-0.672929,0.546018,0.174274,-0.702557,-3.856029,-7.81581
svr,,mean,0.061986,1.356137,0.499683,0.963473,0.720369,2.365175
svr,,std,0.53124,0.992385,0.290066,2.080245,3.790671,8.01417
svr,,wse,-0.67304,0.509087,0.163617,-0.741259,-4.120684,-6.522475
