# Results analysis: portfolio strategies

Some of the sections below may be coded in a bad fashion, because it's been done in real-time. It does not really matter, since the purpose of this script is to display results. It's not a production script.

It may not seem logical to display results from the most precise level (single/best strategy), to the least precise one (all strategies included). But we need to describe what a strategy is (see class Strategy) before any analysis. And by the way, information about best strategies are given in the Must-have section.

In [None]:
from bokeh.charts import TimeSeries, Bar
from bokeh.charts.attributes import cat, color
from bokeh.io import output_notebook, show, output_file, reset_output, gridplot, save
from bokeh.models import Span, HoverTool, Range1d, LinearAxis
from bokeh.models.sources import ColumnDataSource
from bokeh.charts.operations import blend
from bokeh.palettes import RdYlGn10, Spectral11, YlGn9
from bokeh.plotting import figure
from datetime import datetime
from pandas.tseries.offsets import *
from scipy import stats

import ast
import numpy as np
import os
import pandas as pd

In [None]:
import resultsAnalysis_utils as uti

In [None]:
from resultsAnalysis_dataLoading import *

## Must-have

In [None]:
strategiesMeanStdFile = '../results/dae/portfolios/performances_portfolios_final.csv'

In [None]:
strategiesMeanStd = pd.read_csv(filepath_or_buffer=strategiesMeanStdFile,
                                sep=';',
                                header=0)

for i, param in enumerate(parametersSetML):
    concatParams = strategiesMeanStd['concatParams']
    strategiesMeanStd[param] = concatParams.str.split('__').str[0].str.split('_').str[i]
for i, param in enumerate(parametersSetStrat):
    concatParams = strategiesMeanStd['concatParams']
    strategiesMeanStd[param] = concatParams.str.split('__').str[1].str.split('_').str[i]
    
strategiesMeanStd = strategiesMeanStd.replace('None', 0)
for col in numericML + numericStrat:
    if col != 'loss' and col != 'smoothness': strategiesMeanStd[[col]] = strategiesMeanStd[[col]].apply(pd.to_numeric)

strategiesMeanStd['sharpeRatio'] = strategiesMeanStd['mean'] / strategiesMeanStd['std']
strategiesMeanStd = strategiesMeanStd.set_index('concatParams')

In [None]:
strategiesMeanStd.sort_values(by='sharpeRatio').tail(20)

## Strategy definition

In [None]:
class Strategy:
    def __init__(self,
                 name,
                 trackerFileDir='../results/dae/portfolios/trackers/',
                 bsLoad=False):
        # Strategy name
        self.name = name
        self.specificName = None
        
        # Tracker loading
        self.tracker = pd.read_csv(trackerFileDir + self.name + '.csv',
                                   sep=';',
                                   header=[0,1],
                                   parse_dates=True,
                                   index_col=0)
        
        # Loadings:
        ## Buyers/Sellers loading (information available in tracker as well)
        self.bs = None
        if bsLoad: self.loadStrategyBS()
        ## Absolute, cumulative, and returns loading (information available in tracker as well)
        ### Aggregated level
        self.gains = None
        self.returnMean = None
        self.returnStd = None
        self.returnSharpe = None
        self.loadStrategyGains()
        ### Action level
        self.computeReturns()
        
        
        self.dailyLoss = None
        self.dailyWins = None 
        
        self.dailyPerformanceLong = None
        self.dailyPerformanceLongReturn = None
        self.dailyPerformanceShort = None
        self.dailyPerformanceShortReturn = None
        self.computeDailyLongShortPerformance()
        
        self.drawdown = None
        self.drawdownDetails = None

        self.turnover = None
        
    def computeDailyLongShortPerformance(self):
        longActions = self.tracker[self.tracker.xs('weight', level='Values', axis=1) > 0]
        self.dailyPerformanceLong = longActions.sum(axis=1, level=1)['gain']
        self.dailyPerformanceLongReturn = self.dailyPerformanceLong / longActions.sum(axis=1, level=1)['valueIn']
        
        
        shortActions = self.tracker[self.tracker.xs('weight', level='Values', axis=1) < 0]
        self.dailyPerformanceShort = shortActions.sum(axis=1, level=1)['gain']
        self.dailyPerformanceShortReturn = self.dailyPerformanceShort / shortActions.sum(axis=1, level=1)['valueIn']
        return True
    
    def computeGainLossContribution(self):
        '''
        How much does an action involved in trading contribute to the loss/gain?
        E.g: an action loses 10 euros out of a loss of 100 euros => stands for 10% of the loss.
        '''
        # Negative gains
        losingActions = self.tracker[self.tracker.xs('gain', level='Values', axis=1) < 0]
        # Sum over negative gains to compute global loss
        dailyLoss = losingActions.sum(axis=1, level=1)['gain']
        self.dailyLoss = dailyLoss
        # Compute loss contribution
        lossContribution = - losingActions.xs('gain', level='Values', axis=1).divide(dailyLoss, axis=0) * 100
        # Assign it (nested loop to prevent overwritting while further positive gain computation)
        for stock in lossContribution.columns.values:
            for day in lossContribution.index.values:
                algebricGain = self.tracker.ix[day, (stock, 'gain')]
                if (not np.isnan(algebricGain) and
                    algebricGain < 0):
                    self.tracker.ix[day, (stock, 'contribution')] = lossContribution.loc[day, stock]
        
        # Positive gains
        winningActions = self.tracker[self.tracker.xs('gain', level='Values', axis=1) > 0]
        # Sum over positive gains to compute global gain
        dailyWins = winningActions.sum(axis=1, level=1)['gain']
        self.dailyWins = dailyWins
        # Compute gain contribution
        winsContribution = winningActions.xs('gain', level='Values', axis=1).divide(dailyWins, axis=0) * 100
        # Assign it (nested loop to prevent overwritting while previous negative gain assignation has been performed)
        for stock in winsContribution.columns.values:
            for day in winsContribution.index.values:
                algebricGain = self.tracker.ix[day, (stock, 'gain')]
                if (not np.isnan(algebricGain) and
                    algebricGain > 0):
                    self.tracker.ix[day, (stock, 'contribution')] = winsContribution.loc[day, stock]
                    
        self.tracker = self.tracker.sort_index(axis=1)
        
        return True
    
    def computeReturns(self):
        returns = self.tracker.xs('gain', level='Values', axis=1) / self.tracker.xs('valueIn', level='Values', axis=1)
        for stock in self.tracker.columns.levels[0].values: self.tracker[stock, 'return'] = returns[stock]
        self.tracker = self.tracker.sort_index(1)

        return True
    
    def computeTurnover(self):
        tmp = self.tracker.xs('gain', level='Values', axis=1)
        turnover = []
        previousDay = []
        for day in tmp.index.values[:-1]:
            dday = []
            for stock in tmp.columns.values:
                if not np.isnan(tmp.ix[day, stock]): dday.append(stock)
            # Stocks in dday, not in previousDay = new stocks involved in trading
            diff = list(set(dday).difference(set(previousDay))) 
            turnover.append(len(diff) * 1. / len(dday) * 100)
            previousDay = dday
        self.turnover = turnover
        return True
    
    def loadStrategyBS(self):
        '''
        Load table of who was involved in trading each day, with its weight
        Positive weight: long leg
        Negative weight: short leg
        '''
        refDir = '../results/dae/portfolios/bs/bs_'
        f = open(refDir + self.name + '.log')
        rawData = f.readlines()
        f.close()

        bs = {}

        date = None
        for n, line in enumerate(rawData):
            if n % 2 == 0: date = pd.to_datetime(line.replace('\n','').rsplit('T')[0])
            elif line.replace('\n','') == '{}': bs[date] = {'DELBBB': np.nan}
            else:
                bs[date] = ast.literal_eval(line.replace('\n',''))
        bs = pd.DataFrame.from_dict(bs, orient="index")
        bs = bs.reindex_axis(sorted(bs.columns), axis=1)
        self.bs = bs
        return True
    
    def loadStrategyGains(self):
        refDir = '../results/dae/portfolios/values/'
        gains = pd.read_csv(filepath_or_buffer=refDir + self.name + '.csv',
                            sep=';',
                            header=0,
                            parse_dates=True)
        gains['Date'] = pd.to_datetime(gains['Date'])
        gains = gains.set_index('Date').drop(gains.columns[[0]], axis=1)
        gains['return'] = gains['AbsGain'] / self.tracker.xs('valueIn', level='Values', axis=1).sum(1)
        
        self.returnMean = gains['return'].mean(skipna=True)
        self.returnStd = gains['return'].std(skipna=True)
        self.returnSharpe = self.returnMean / self.returnStd
        
        gains['return100'] = gains['return'] + 1
        gains['cumulReturn'] = gains['return100'] 
        gains.ix[datetime(2014,1,1), 'cumulReturn'] = 100
        gains = gains.sort_index()
        gains['cumulReturn'] = gains['cumulReturn'].cumprod(axis=0)
        self.gains = gains
        return True

    def to_drawdown_series(self):
        """
        Calculates the drawdown series.

        This returns a series representing a drawdown.
        When the self.pnl is at all time highs, the drawdown
        is 0. However, when self.pnls are below high water marks,
        the drawdown series = current / hwm - 1

        The max drawdown can be obtained by simply calling .min()
        on the result (since the drawdown series is negative)

        Args:
            * self.pnls (TimeSeries or DataFrame): Series of self.pnls.

        """
        # make a copy so that we don't modify original data
        drawdown = self.pnl.copy()

        # set initial hwm (copy to avoid issues w/ overwriting)
        hwm = drawdown.ix[0].copy()
        isdf = isinstance(drawdown, pd.DataFrame)

        for idx in drawdown.index:
            tmp = drawdown.ix[idx]
            if isdf:
                hwm[tmp > hwm] = tmp
            else:
                hwm = max(tmp, hwm)

            drawdown.ix[idx] = tmp / hwm - 1

        # first row is 0 by definition
        drawdown.ix[0] = 0
        
        self.drawdown = drawdown
        
        return True
    def drawdown_details(self):
        """
        Returns a data frame with start, end, days (duration) and
        drawdown for each drawdown in a drawdown series.

        .. note::

            days are actual calendar days, not trading days

        Args:
            * drawdown (pandas.TimeSeries): A drawdown TimeSeries
                (can be obtained w/ drawdown(self.pnls).
        Returns:
            * pandas.DataFrame -- A data frame with the following
                columns: start, end, days, drawdown.

        """
        is_zero = self.drawdown == 0
        # find start dates (first day where dd is non-zero after a zero)
        start = ~is_zero & is_zero.shift(1)
        start = list(start[start == True].index)  # NOQA

        # find end dates (first day where dd is 0 after non-zero)
        end = is_zero & (~is_zero).shift(1)
        end = list(end[end == True].index)  # NOQA

        if len(start) is 0:
            return None

        # drawdown has no end (end period in dd)
        if len(end) is 0:
            end.append(self.drawdown.index[-1])

        # if the first drawdown start is larger than the first drawdown end it
        # means the drawdown series begins in a drawdown and therefore we must add
        # the first index to the start series
        if start[0] > end[0]:
            start.insert(0, self.drawdown.index[0])

        # if the last start is greater than the end then we must add the last index
        # to the end series since the drawdown series must finish with a drawdown
        if start[-1] > end[-1]:
            end.append(self.drawdown.index[-1])

        result = pd.DataFrame(columns=('start', 'end', 'days', 'drawdown'),
                              index=range(0, len(start)))

        for i in range(0, len(start)):
            dd = self.drawdown[start[i]:end[i]].min()
            result.ix[i] = (start[i], end[i], (end[i] - start[i]).days, dd)

        self.drawdownDetails = result    
        
        return True

## Benchmark

### Loading

In [None]:
benchmarkUniformFile = '10_75_False_None_adadelta_mse_200_10_tanh_linear__100000_1_True_False_True_151_None_None_None_uniform.csv'
benchmarkUniform = Strategy(name=benchmarkUniformFile.replace('.csv', ''), bsLoad=False)

In [None]:
benchmarkERCFile = '10_75_False_None_adadelta_mse_200_10_tanh_linear__100000_1_True_False_True_151_None_None_None_riskContribution.csv'
benchmarkERC = Strategy(name=benchmarkERCFile.replace('.csv', ''), bsLoad=False)

### Plot PNL

In [None]:
reset_output()
output_notebook()
fig = figure(x_axis_type="datetime")
fig.yaxis.axis_label = 'Cumulative returns'
x = benchmarkERC.gains.index
fig.line(x, benchmarkERC.gains['cumulReturn'], color='darkgreen', legend='ERC')
fig.line(x, benchmarkUniform.gains['cumulReturn'], color='darkred', legend='uniform')
show(fig)
output_file('../results/dae/portfolios/resultsAnalysis/benchmarkPNL.html')
save(fig)

## Strategy level

### Loading

In [None]:
strategyName = '10_75_False_None_adadelta_mse_200_10_tanh_linear__100000_1_True_True_False_None_None_0.4_2_riskContribution'

In [None]:
strategy = Strategy(name=strategyName)

### Residuals

Plot

In [None]:
fig = figure(background_fill_color="#E8DDCB")
hist, edges = np.histogram(strategy.gains['return'][1:], density=True, bins=40)
fig.xaxis.axis_label = 'strategy returns'
fig.yaxis.visible = None
fig.xaxis.axis_label_text_font_size = "12pt"
fig.quad(top=hist,
        bottom=0,
        left=edges[:-1],
        right=edges[1:],
        fill_color="#036564",
        line_color="#033649")

reset_output()
output_notebook()
show(fig)
output_file('../results/dae/portfolios/resultsAnalysis/retsHist.html')
save(fig)

### Daily gain and loss repartition

#### Formatting

In [None]:
def computeStats(df, what):
    frames = []

    frames.append(df.min(axis=1))
    frames.append(df.mean(axis=1))
    frames.append(df.max(axis=1))
    frames.append(df.idxmin(axis=1, skipna=True))
    frames.append(df.idxmax(axis=1, skipna=True))
    

    res = pd.DataFrame(frames)
    
    res.index = ['min' + what, 'mean' + what, 'max' + what, 'idxmin' + what, 'idxmax' + what]
    
    return res.T

In [None]:
losingActions = strategy.tracker[strategy.tracker.xs('gain', level='Values', axis=1) < 0].xs('return', level='Values', axis=1)
winningActions = strategy.tracker[strategy.tracker.xs('gain', level='Values', axis=1) > 0].xs('return', level='Values', axis=1)

In [None]:
lossStats = computeStats(losingActions, 'loss')
winStats = computeStats(winningActions, 'gain')

In [None]:
statss = pd.concat([lossStats, winStats], axis=1)
statss = statss.reset_index()

#### Plot

In [None]:
reset_output()
output_notebook()

fig = figure(x_axis_type="datetime", plot_width=1300)
fig.yaxis.axis_label = 'returns'
fig.yaxis.axis_label_text_font_size = "12pt"

source = ColumnDataSource(statss)

maxGain = fig.line('Date', 'maxgain', source=source, legend='max positive return', color='darkgreen')
fig.add_tools(HoverTool(renderers=[maxGain], tooltips=[('stock', '@idxmaxgain')]))
fig.line('Date', 'meangain', source=source, legend='average positive return', color='green')
# fig.line('Date', 'mingain', source=source, legend='minGain', color='lightgreen')

# fig.line('Date', 'maxloss', source=source, legend='minLoss', color='lightcoral')
fig.line('Date', 'meanloss', source=source, legend='average negative return', color='red')
minLoss = fig.line('Date', 'minloss', source=source, legend='max negative return', color='darkred')
fig.add_tools(HoverTool(renderers=[minLoss], tooltips=[('stock', '@idxminloss')]))

fig.legend.orientation = "top_right"

show(fig)
output_file('../results/dae/portfolios/resultsAnalysis/' + strategyName + '_dailyPnLEvolution.html')
save(fig)

### Daily long short performance

#### Leg correction

Correct long/short legs performances with the market/benchmark

In [None]:
dailyPerformanceLongReturnCorrected = strategy.dailyPerformanceLongReturn - benchmarkERC.gains[1:]['return']

In [None]:
dailyPerformanceShortReturnCorrected = strategy.dailyPerformanceShortReturn + benchmarkERC.gains[1:]['return']

#### Plot

In [None]:
reset_output()
output_notebook()

fig = figure(x_axis_type="datetime",
             width = 1200)
fig.yaxis.axis_label = 'Returns'

fig.line(strategy.tracker.index.values, dailyPerformanceLongReturnCorrected, legend='long', color='green')
fig.line(strategy.tracker.index.values, dailyPerformanceShortReturnCorrected, legend='short', color='red')

fig.legend.orientation = "top_left"

show(fig)
output_file('../results/dae/portfolios/resultsAnalysis/' + strategyName + '_dailyLongShortPerf.html')
save(fig)

### Turnover

#### Computation

In [None]:
strategy.computeTurnover()

In [None]:
np.mean(strategy.turnover)

#### Plot

In [None]:
reset_output()
output_notebook()

p = figure(x_axis_type="datetime")
p.yaxis.axis_label = 'turnover (%)'
p.line(x=strategy.tracker.index.values[1:], y=strategy.turnover[1:], color='green')
p.legend.orientation = "top_left"
show(p)

output_file('../results/dae/portfolios/resultsAnalysis/' + strategyName + '_turnover.html')
save(p)

### Actions contribution

#### Computation

In [None]:
strategy.computeGainLossContribution()

In [None]:
contribs = strategy.tracker.xs('contribution', level='Values', axis=1)

In [None]:
frames = []
frames.append(contribs.count() / contribs.shape[0] * 100)
frames.append(contribs[contribs > 0].count() / contribs.shape[0] * 100)
frames.append(contribs[contribs < 0].count() / contribs.shape[0] * 100)
df = pd.DataFrame(frames).T
df.columns = ['Total', 'Gain', 'Loss']
df = df.reset_index()
df = df.sort("Loss", ascending=False)

#### Plot

In [None]:
reset_output()
output_notebook()
bar = Bar(df,
          values=blend('Loss', 'Gain', name='pnls', labels_name='pnl'),
          label=cat(columns='Action', sort=False),
          stack=cat(columns='pnl', sort=False),
          color=color(columns='pnl', palette=list(reversed(['green', 'red'])), sort=False),
          legend='top_right',
          title='Participation repartition (%)', tooltips=[('Action', '@Action')], width=2300)
show(bar)
output_file('../results/dae/portfolios/resultsAnalysis/' + strategyName + '_actionsContribution.html')
save(bar)

### Strategy VS Benchmark

How does the strategy behave compared to benchmark?
The goal is just to plot daily returns from benchmark in function of daily returns from

In [None]:
def plotStratVSBenchmark(strat, stratName):
    reset_output()
    
    p = figure()
    p.xaxis.axis_label='strategy returns'
    p.yaxis.axis_label='benchmark returns'
    p.circle(strat.gains['return'], benchmarkERC.gains['return'])
    p.xaxis.axis_label_text_font_size = "12pt"
    p.yaxis.axis_label_text_font_size = "12pt"
    
    output_notebook()
    show(p)
    
    output_file('../results/dae/portfolios/resultsAnalysis/' + stratName + '.html')
    save(p)
    
    return True

First with best strategy

In [None]:
plotStratVSBenchmark(strategy, 'best')

Second with 1 long 1 short which is the purest strategy and the less noisy one

In [None]:
long1Short1File = '10_75_False_None_adadelta_mse_200_10_tanh_linear__100000_1_True_True_True_1_1_None_None_uniform'
long1Short1 = Strategy(name=long1Short1File)

In [None]:
plotStratVSBenchmark(long1Short1, 'long1Short1')

## ML class of model level

In [None]:
mlModel = '10_75_False_None_adadelta_mse_200_10_tanh_linear'

### Loading

In [None]:
mlPredictionsDir = '../results/dae/neuralNetwork/predictions/'

In [None]:
predictions_test = pd.read_csv(filepath_or_buffer=mlPredictionsDir + 'test_' + mlModel + '.csv',
                               sep=';',
                               header=0,
                               index_col='Date',
                               parse_dates=True)

In [None]:
predictions_train = pd.read_csv(filepath_or_buffer=mlPredictionsDir + 'train_' + mlModel + '.csv',
                                sep=';',
                                header=0,
                                index_col='Date',
                                parse_dates=True)

In [None]:
def loadStrategiesValues(mlModel, condition='True_True_False_', vals='CumulGain'):
    stratValues = {}
    refDir = '../results/dae/portfolios/values/'
    first = True
    for file in os.listdir(refDir):
        if file.startswith(mlModel) and condition in file:
            stratConf = file.rsplit('__')[1].replace('_2_riskContribution.csv', '').replace('100000_1_' + condition, '').replace('None_None_', '')
            data = pd.read_csv(filepath_or_buffer=refDir + file,
                               sep=';',
                               header=0,
                               parse_dates=True)
            if first:
                stratValues['Date'] = pd.to_datetime(data['Date'])
                first = False
            stratValues[stratConf] = data[vals].tolist()
    stratValues = pd.DataFrame.from_dict(stratValues, orient='columns')
    stratValues = stratValues.set_index('Date')
    return stratValues

### Absolute gain and returns

This is a little bit overkill. Previous methods may have been used instead.

In [None]:
stratValuesCumul =  loadStrategiesValues(mlModel=mlModel)
stratValuesCumul.head()

In [None]:
stratValuesAbs =  loadStrategiesValues(mlModel=mlModel, vals='AbsGain')
stratValuesAbs.head()

In [None]:
stratValuesAbs1 = stratValuesAbs / 200000 + 1
stratValuesAbs1.loc['2014-1-2'] = stratValuesAbs1.loc['2014-1-2'] * 100
stratValuesAbs2 = stratValuesAbs1.cumprod(axis=0)
stratValuesAbs2.head()

In [None]:
uti.plotCombinedPortfoliosPerf(stratValuesAbs2, mlModel, kind='Cumulative returns', benchmark=benchmarkERC)

## Equally weighted strategies study

In [None]:
trackerDir = '../results/dae/portfolios/trackers/'

### Matrix of mean / std / sharpe ratio of long, short, long short portfolios
For a given machine learning model ans long short portfolios

#### Computation

In [None]:
for file in os.listdir(trackerDir):
    if (mlModel in file and 'uniform' in file and '151' not in file):
        strategy = Strategy(name=file.replace('.csv', ''), bsLoad=False)
        strategiesMeanStd.ix[strategy.name, 'returnMean'] = strategy.returnMean
        strategiesMeanStd.ix[strategy.name, 'returnStd'] = strategy.returnStd
        strategiesMeanStd.ix[strategy.name, 'returnSharpe'] = strategy.returnSharpe

In [None]:
strategiesMeanStd = strategiesMeanStd.reset_index() 
ewstrategiesMeanStd = strategiesMeanStd[(strategiesMeanStd['concatParams'].str.contains(mlModel)) &
                                        (strategiesMeanStd['concatParams'].str.contains('uniform'))]

In [None]:
ewstrategiesMeanStd = strategiesMeanStd.dropna()

In [None]:
def indexCorrespondance(legSize):
    indexes = sorted(ewstrategiesMeanStd[legSize].unique())
    indexes = [(indexes, i) for i, indexes in enumerate(indexes)]
    return indexes

def matrixGeneration(metric='mean'):
    matrix = np.zeros(shape=(len(ewstrategiesMeanStd['legSizeLong'].unique()),
                             len(ewstrategiesMeanStd['legSizeShort'].unique())))
    
    indexesLong = indexCorrespondance(legSize='legSizeLong')
    indexesShort = indexCorrespondance(legSize='legSizeShort')

    for legSizeLong, indexLong in indexesLong:
        for legSizeShort, indexShort in indexesShort:
            mask = ((ewstrategiesMeanStd['legSizeLong'] == legSizeLong) &
                    (ewstrategiesMeanStd['legSizeShort'] == legSizeShort))
            tmp = ewstrategiesMeanStd[mask][metric].values
            if len(tmp) == 1: matrix[indexLong, indexShort] = tmp[0]
    
    matrix = pd.DataFrame(matrix,
                          index=sorted(ewstrategiesMeanStd['legSizeLong'].unique()),
                          columns=sorted(ewstrategiesMeanStd['legSizeShort'].unique()))
    matrix.index = matrix.index.map(lambda x: 'l_' + str(x))
    matrix.columns = matrix.columns.map(lambda x: 's_' + str(x))
    return matrix
    #         mask = (ewstrategiesMeanStd['legSizeLong'] == legSizeLong 
    #         matrix[legSizeLong, legSizeShort] = ewstrategiesMeanStd[mask]['mean']

In [None]:
matrixMean = matrixGeneration('returnMean')
matrixStd = matrixGeneration('returnStd')
matrixSharpe = matrixGeneration('returnSharpe')

#### Results saving

In [None]:
matrixMean.to_csv('../results/dae/portfolios/resultsAnalysis/return_mean_longshort.csv',
                  sep=';')
matrixStd.to_csv('../results/dae/portfolios/resultsAnalysis/return_std_longshort.csv',
                  sep=';')
matrixSharpe.to_csv('../results/dae/portfolios/resultsAnalysis/return_sharpe_longshort.csv',
                  sep=';')


### Correlations between strategies

For long and short portfolios corrected from market

#### Computation

In [None]:
what = '_1_True_False'
longStrategies = [Strategy(name=file.replace('.csv', ''),
                           bsLoad=False)
                  for file in os.listdir(trackerDir) if (mlModel in file and what in file and '151' not in file)]
# Correct returns with market/benchmark returns
for longStrategy in longStrategies:
    longStrategy.gains['returnCorrected'] = longStrategy.gains['return'] - benchmarkUniform.gains['return']

In [None]:
what = '_1_False_True'
shortStrategies = [Strategy(name=file.replace('.csv', ''),
                            bsLoad=False)
                  for file in os.listdir(trackerDir) if (mlModel in file and what in file and '151' not in file)]
# Correct returns with market/benchmark returns
for shortStrategy in shortStrategies:
    shortStrategy.gains['returnCorrected'] = shortStrategy.gains['return'] + benchmarkUniform.gains['return']

Change specific name for correlations purpose (flexible parameter)

In [None]:
for strat in longStrategies + shortStrategies:
    strat.specificName = strat.name.split('100000_1_')[1].split('_None_None_uniform')[0]

In [None]:
def computeStrategiesCorrelations(set1, columnsName, set2, indexName):
    corrsMatrix ={}
    for strategy1 in set1:
        corrs = []
        for strategy2 in set2:
            corr = strategy1.gains['returnCorrected'].corr(strategy2.gains['returnCorrected'])
            corrs.append(corr)
        corrsMatrix[strategy1.specificName] = corrs

    set2StrategiesNames = []
    for strategy2 in set2: set2StrategiesNames.append(strategy2.specificName)

    corrsMatrix[indexName] = set2StrategiesNames
    corrsMatrix = pd.DataFrame.from_dict(corrsMatrix).set_index(indexName)
    corrsMatrix.index = corrsMatrix.index.map(lambda x: int(x.split('_')[-1]))
    corrsMatrix.columns = corrsMatrix.columns.map(lambda x: int(x.split('_')[-2]))
    corrsMatrix = corrsMatrix.sort_index().sort_index(axis=1)
    corrsMatrix.index = corrsMatrix.index.map(lambda x: indexName + '_' + str(x))
    corrsMatrix.columns = corrsMatrix.columns.map(lambda x: columnsName + '_' + str(x))
    return corrsMatrix.T

#### Results saving

In [None]:
corrsMatrix_long_short = computeStrategiesCorrelations(longStrategies, 'l', shortStrategies, 's')
corrsMatrix_long_short.to_csv('../results/dae/portfolios/resultsAnalysis/strategiesCorrelations_long_short.csv',
                              sep=';')

## All strategies level

Represent all metrics on a single graph to get an overview of strategies performances

###  Computation

In [None]:
strategiesMeanStdRiskContrib = strategiesMeanStd.loc[(strategiesMeanStd['isLong'] == 'True') &
                                                     (strategiesMeanStd['isShort'] == 'True') &
                                                     (strategiesMeanStd['weight_type'] == 'riskContribution') &
                                                     (strategiesMeanStd['involvedProportion'] != 0)]

In [None]:
for config in strategiesMeanStdRiskContrib.index.values:
    strategy = Strategy(name=config)
    if np.isnan(strategy.returnMean): print config
    strategiesMeanStdRiskContrib.ix[strategy.name, 'returnMean'] = strategy.returnMean
    strategiesMeanStdRiskContrib.ix[strategy.name, 'returnStd'] = strategy.returnStd
    strategiesMeanStdRiskContrib.ix[strategy.name, 'returnSharpe'] = strategy.returnSharpe

In [None]:
sortedReturnMean = uti.rawSort(results=strategiesMeanStdRiskContrib, metricToSortBy='returnMean')

In [None]:
dataa = sortedReturnMeand
ataa = dataa.reset_index()

### Plot

In [None]:
reset_output()
p = figure(tools="pan,wheel_zoom,box_zoom,reset,resize,hover,save",
           x_range=(0, dataa.shape[0] - 1),
           y_range=(dataa['returnMean'].min(), dataa['returnMean'].max()))
p.yaxis.major_label_text_color = "darkred"
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
x = dataa['index']

z0 = np.polyfit(x, dataa['returnMean'], 10)
pol0 = np.poly1d(z0)
vals0 = [pol0(i) for i in x]

z1 = np.polyfit(x, dataa['returnStd'], 10)
pol1 = np.poly1d(z1)
vals1 = [pol1(i) for i in x]

z2 = np.polyfit(x, dataa['returnSharpe'], 10)
pol2 = np.poly1d(z2)
vals2 = [pol2(i) for i in x]

source = ColumnDataSource(
        data=dict(
        x = x,
        mean = dataa['returnMean'],
        meanpol = vals0,
        std = dataa['returnStd'],
        stdpol = vals1,
        sharpe = dataa['returnSharpe'],
        sharpepol = vals2,
        conf = dataa['concatParams']
        )
    )
# p.line('x','mean', legend='mean (€/day)', color='red', source=source, line_width=1.5)
p.line('x','meanpol', legend='mean', color='darkred', source=source, line_width=1.5)
hline = Span(location=0, dimension='width', line_color='darkred', line_width=0.5)
p.renderers.extend([hline])

meanMin = dataa['returnMean'].min()
meanMax = dataa['returnMean'].max()

stdMin = dataa['returnStd'].min()
stdMax = dataa['returnStd'].max()

sharpeMin = dataa['returnSharpe'].min()
sharpeMax = dataa['returnSharpe'].max()

p.extra_y_ranges["stdy"] = Range1d(start=stdMin, end=stdMax)
p.extra_y_ranges["sharpey"] = Range1d(start=sharpeMin, end=sharpeMax)

p.add_layout(LinearAxis(y_range_name='stdy', axis_label_text_color='green', major_label_text_color='green'), 'left')
p.add_layout(LinearAxis(y_range_name='sharpey', axis_label_text_color='orange', major_label_text_color='orange'), 'right')

p.line('x', 'std', y_range_name='stdy', color='green', line_alpha=0.25, source=source)
p.line('x', 'sharpe', y_range_name='sharpey', color='orange', line_alpha=0.25, source=source)
p.line('x', 'stdpol', legend='std', y_range_name='stdy', line_color='green', source=source)
p.line('x', 'sharpepol', legend='sharpe', y_range_name='sharpey', line_color='orange', source=source)
p.select_one(HoverTool).tooltips = [('conf','@conf')]
p.xaxis.axis_label = 'Strategies'
p.xaxis.axis_label_text_font_size = "12pt"

p.legend.orientation = "top_left"
output_notebook()
show(p)

output_file('../results/dae/portfolios/resultsAnalysis/combinedMetrics.html')
save(p)