In [1]:
import pandas as pd
import numpy as np
import sympy as sp

## References

J. Moody and M. Saffell, "Learning to trade via direct reinforcement," in IEEE Transactions on Neural Networks, vol. 12, no. 4, pp. 875-889, July 2001, doi: 10.1109/72.935097.

## Overview

Given a trading system model $F_{t}(\theta)$, the goal is to adjust the parameters $\theta$ in order to maximise $U_{t}$ (wealth).

$$ \frac{dU_{T}(\theta)}{d\theta} = 
\sum \limits _{t=1} ^ {T} 
\frac{dU_{T}}{dR_{t}} 
\{ \frac{dR_{t}}{dF_{t}} \frac{dF_{t}}{d\theta} + \frac{dR_{t}}{dF_{t-1}} \frac{dF_{t-1}}{d\theta}  \}$$

Where:
* $R_{t}$ = returns  
* $F_{t}$ = position  
* $\theta$ = model weights

## Sterling Ratio wealth function

The *Sterling Ratio* is used as a wealth function:

$$ SterlingRatio = \frac{Annualized Average Return}{Maximum Drawdown}$$
  
  
This has been approximated to the Downside Deviation Ratio (DDR):

$$ DDR_{T} = \frac{Average(R_{t})}{DD_{T}} $$

Where $$DD_{T} = (\frac{1}{T}\sum \limits _{t=1} ^ {T} min \{R_{t},0\}^2)^\frac{1}{2} $$

In [10]:
import sympy as sp

dd_t_1, rt, A_t_1 = sp.symbols('dd_t_-1, rt, A_t_-1')
Dt = (dd_t_1 * (rt - 0.5*A_t_1) - (0.5*A_t_1*rt**2)) / dd_t_1**2

display('Differential downside deviation ratio (Dt):', Dt)

dDdR = sp.diff(Dt, rt)
display('Partial derivative of Dt with respect to Rt:', dDdR)

'Differential downside deviation ratio (Dt):'

(-0.5*A_t_-1*rt**2 + dd_t_-1*(-0.5*A_t_-1 + rt))/dd_t_-1**2

'Partial derivative of Dt with respect to Rt:'

(-1.0*A_t_-1*rt + dd_t_-1)/dd_t_-1**2

Exponential moving average of returns:
$$A_{t} = A_{t-1} + n(R_{t} - A_{t-1})$$

In [11]:
def A(ret):
    T = len(ret)
    A = np.zeros(T)
    
    for t in range(NUM_RETURNS, T):
        A[t] = A[t-1] + (N * (ret[t] - A[t-1]))
        A[t] = np.nan_to_num(A[t])
        
    return A

Squared Drawdown: 
$$DD_{t}^2 = DD_{t-1}^2 + n(min(R_{t},0)^2 - DD_{t-1}^2)$$

In [12]:
def DD(ret):
    T = len(ret)
    DD = np.zeros(T)
    for t in range(NUM_RETURNS, T):
        DD[t] = DD[t-1] + (N * (min(ret[t],0)**2 - DD[t-1]))
        DD[t] = np.nan_to_num(DD[t])
        
    return DD

## Global params


In [13]:
N = 1/100
NUM_RETURNS = M = 15

## Functions to calc positions, returns

State takes the form: $[1, R_{t}, R_{t-1}, ..., R_{t-M}, F_{t-1}]$

In [14]:
def positions(rets, weights):
    '''
    Apply weights to state to determine positisons
    '''
    
    T = len(rets)
    pos = np.zeros(T)
    for t in range(M, T):
        state = np.concatenate([ [1], ret[t - M:t], [pos[t-1]] ])
        pos[t] = np.tanh(np.dot(weights, state))
        
    return pos

In [15]:
def returns(positions, rets, fee):
    '''
    Calculate realised returns based on positions and fees
    '''
    T = len(rets)
    rets = positions[0:T-1] * rets[1:T] - fee * np.abs(positions[1:T] - positions[0:T-1])
    return np.concatenate([[0], rets])

## Input data 

In [16]:
df = pd.read_csv('~/Downloads/coinbaseGBP.csv', names=['_dt', 'price', 'size'])
df['dt'] = pd.to_datetime(df['_dt'],unit='s')
df = df.set_index('dt').drop(columns=['_dt'])

rets = df['price'].pct_change().values[1:][:250]
rets.shape

(250,)