In [23]:
# def data_process(prcAll):
#     """
#     Convert raw price data into closeprice columns 
#     """
#     closePrice = []
#     for stock in range(50):
#         closePrice.extend(prcAll[stock])

#     dates = []
#     for stock in range(50):
#         for day in range(prcAll.shape[1]):
#             dates.append(day)

#     stocks = []
#     for stock in range(50):
#         stocks.extend([stock]*prcAll.shape[1])
#     full_data = pd.DataFrame({'date': dates, 'stock': stocks, 'closePrice': closePrice})

#     return full_data

In [3]:
def range_so_far(data):
    amp = []
    for j in range(50):
        single_stock_data = data[j]

        base_range = np.max(single_stock_data[-AMP_WINDOW:]) -  np.min(single_stock_data[-AMP_WINDOW:])
        amp.append(base_range)
    return amp

In [15]:
#!/usr/bin/env python

import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error


SHORT_TERM = 2              # Short term average
LONG_TERM = 15              # Long term average
PRICE_RANGE = 5             # The period to calculate price difference
AMP_WINDOW = 75            # The period to get stock amplitude

CHANGE_HOLDING = 500            # Price to change holding
AMP_LO_THRESHOLD = 12.5       # The threshold when price movement is considered low
AMP_HI_THRESHOLD = 0.5
PRICE_CHANGE_THRESHOLD = 0.025
MSE_THRESHOLD_2 = 0.02
SLOPE_THRESHOLD_2 = 2     # The threshold when price movement is considered high
MSE_THRESHOLD_1 = 0.25       # The threshold to control price volatility
SLOPE_THRESHOLD_1 = 0.2       # The threshold to control LR slope

nInst = 50
currentPos = np.zeros(nInst)
yesterday_sign = np.zeros(nInst)

def getMyPosition(prcSoFar):
	
    global currentPos, yesterday_sign

    currentPrices = prcSoFar[:,-1] # price of last day

    amp = range_so_far(prcSoFar)
	
    # Get long term and short term average prices
    for stock in range(50):
        single_stock_data = prcSoFar[stock]

        # Use short term and long term average to determine sign
        long_mean = single_stock_data[-LONG_TERM:].mean()
        short_mean = single_stock_data[-SHORT_TERM:].mean()
        today_sign = np.sign(short_mean - long_mean)

        # Use a price window to make decision
        n_day_diff = single_stock_data[-PRICE_RANGE] - single_stock_data[-1]
        n_day_range = np.max(single_stock_data[-PRICE_RANGE:]) - np.min(single_stock_data[-PRICE_RANGE:])
        # two_day_diff = single_stock_data.loc[day-3, 'closePrice'] - single_stock_data.loc[day-1, 'closePrice']

        # Calculate the MSE of price movement during the range
        n_day_gap = np.diff(single_stock_data[-PRICE_RANGE:])
        LR = LinearRegression(n_jobs=-1).fit(np.array(range(PRICE_RANGE-1)).reshape(-1, 1), n_day_gap.reshape(-1,1))
        n_day_mse = mean_squared_error(n_day_gap, LR.predict(np.array(range(PRICE_RANGE-1)).reshape(-1, 1)))
        
        if currentPos[stock] * n_day_range * np.sign(n_day_diff) > np.abs(currentPos[stock] * currentPrices[stock]) * PRICE_CHANGE_THRESHOLD and n_day_mse > np.abs(n_day_diff*MSE_THRESHOLD_2) or ((LR.coef_ * currentPos[stock] < 0)[0][0] and (np.abs(LR.coef_) > SLOPE_THRESHOLD_2)[0][0]):
            currentPos[stock] = 0

        elif np.abs(n_day_diff) <= amp[stock]/AMP_LO_THRESHOLD or (n_day_mse > np.abs(n_day_diff*MSE_THRESHOLD_1) and (np.abs(LR.coef_) < SLOPE_THRESHOLD_1)[0][0]):
            pass
            
        elif np.abs(n_day_diff) >= amp[stock]/AMP_HI_THRESHOLD:
            value = today_sign * CHANGE_HOLDING
            currentPos[stock] -= value//currentPrices[stock]
    
        else:
            value = today_sign * CHANGE_HOLDING
            currentPos[stock] += value//currentPrices[stock]
        
        yesterday_sign[stock] = today_sign
	
    return currentPos

In [16]:
#!/usr/bin/env python

import numpy as np
import pandas as pd
# from main import getMyPosition as getPosition

nInst = 0
nt = 0
commRate = 0.0010
dlrPosLimit = 10000

def loadPrices(fn):
    global nt, nInst
    #df=pd.read_csv(fn, sep='\s+', names=cols, header=None, index_col=0)
    df=pd.read_csv(fn, sep='\s+', header=None, index_col=None)
    nt, nInst = df.values.shape
    return (df.values).T

pricesFile="./data/prices.txt"
prcAll = loadPrices(pricesFile)
print ("Loaded %d instruments for %d days" % (nInst, nt))

currentPos = np.zeros(nInst)

def calcPL(prcHist):
    cash = 0
    curPos = np.zeros(nInst)
    totDVolume = 0
    totDVolumeSignal = 0
    totDVolumeRandom = 0
    value = 0
    todayPLL = []
    (_,nt) = prcHist.shape
    for t in range(500,750): 
        prcHistSoFar = prcHist[:,:t]
        newPosOrig = getMyPosition(prcHistSoFar)
        curPrices = prcHistSoFar[:,-1] #prcHist[:,t-1]
        posLimits = np.array([int(x) for x in dlrPosLimit / curPrices])
        clipPos = np.clip(newPosOrig, -posLimits, posLimits)
        newPos = np.array([np.trunc(x) for x in clipPos])
        deltaPos = newPos - curPos
        dvolumes = curPrices * np.abs(deltaPos)
        dvolume = np.sum(dvolumes)
        totDVolume += dvolume
        comm = dvolume * commRate
        cash -= curPrices.dot(deltaPos) + comm
        curPos = np.array(newPos)
        posValue = curPos.dot(curPrices)
        todayPL = cash + posValue - value
        todayPLL.append(todayPL)
        value = cash + posValue
        ret = 0.0
        if (totDVolume > 0):
            ret = value / totDVolume
        print ("Day %d value: %.2lf todayPL: $%.2lf $-traded: %.0lf return: %.5lf" % (t,value, todayPL, totDVolume, ret))

    pll = np.array(todayPLL)
    (plmu,plstd) = (np.mean(pll), np.std(pll))
    annSharpe = 0.0
    if (plstd > 0):
        annSharpe = np.sqrt(250) * plmu / plstd
    return plmu - 0.1*plstd

calcPL(prcAll)

Loaded 50 instruments for 750 days
Day 500 value: -13.08 todayPL: $-13.08 $-traded: 13081 return: -0.00100
Day 501 value: -27.71 todayPL: $-14.63 $-traded: 26139 return: -0.00106
Day 502 value: -59.88 todayPL: $-32.17 $-traded: 41719 return: -0.00144
Day 503 value: -79.68 todayPL: $-19.80 $-traded: 57638 return: -0.00138
Day 504 value: -109.01 todayPL: $-29.33 $-traded: 70046 return: -0.00156
Day 505 value: -90.12 todayPL: $18.89 $-traded: 83670 return: -0.00108
Day 506 value: -58.09 todayPL: $32.03 $-traded: 99715 return: -0.00058
Day 507 value: 29.41 todayPL: $87.49 $-traded: 112784 return: 0.00026
Day 508 value: -153.97 todayPL: $-183.37 $-traded: 127028 return: -0.00121
Day 509 value: -182.87 todayPL: $-28.90 $-traded: 138639 return: -0.00132
Day 510 value: -275.42 todayPL: $-92.56 $-traded: 159495 return: -0.00173
Day 511 value: -211.55 todayPL: $63.88 $-traded: 174485 return: -0.00121
Day 512 value: -292.46 todayPL: $-80.91 $-traded: 191269 return: -0.00153
Day 513 value: -373.05

44.32518321704107