In [1]:
import numpy as np 
import pandas as pd
import parameters as pa
import matplotlib.pyplot as plt
import time
import math
import gc
from functools import reduce
from sklearn.preprocessing import MinMaxScaler
import feather

In [2]:
import tensorflow as tf 
import tensorflow.keras as k
import tensorflow.keras.backend as tfb
from tcn import TCN

#tf.enable_eager_execution()

In [3]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
os.environ['TF_ROCM_FUSION_ENABLE'] = '1'


In [4]:
mpf_bct_raw = np.array(pd.read_csv("~/MPF/BCT/BCT.csv"))
daily_r, daily_c = mpf_bct_raw.shape
daily = np.array([s.replace("/", "-") for s in mpf_bct_raw[:,0]]).astype("datetime64")
mpf_bct = mpf_bct_raw[:,1:].astype("float64")
mpf_bct_list = mpf_bct.transpose().tolist()
lstm_size = 128
time_step = 60
batch_size = 512
output_size = 1
lr = 0.0013
shift = 1
minimum = 6
top = 3

mpf_bct_funds_name = ["MPF.BCT.AE", "MPF.BCT.CA", "MPF.BCT.CEHK",
                      "MPF.BCT.CT", "MPF.BCT.EU", "MPF.BCT.E3",
                      "MPF.BCT.E5", "MPF.BCT.E7", "MPF.BCT.E9", 
                      "MPF.BCT.FM", "MPF.BCT.GB", "MPF.BCT.GE", 
                      "MPF.BCT.GT", "MPF.BCT.HKB", "MPF.BCT.HSIT",
                      "MPF.BCT.MPFC", "MPF.BCT.RMBB", "MPF.BCT.SFP",
                      "MPF.BCT.SE2040"]

seeds = [71880, 65940, 13014, 95690, 55070,
         88714, 75294, 42252, 75969, 95359]

In [5]:
mpf_bct_daily_returns = np.array([[math.log(col[i+1]/col[i]) if col[i] != 0 else -9999 for i,a in enumerate(col[1:])] for col in mpf_bct_list], dtype='float64').transpose()
mpf_bct_daily_returns[mpf_bct_daily_returns==-9999.0] = np.nan
mpf_bct_daily_returns = pd.DataFrame(mpf_bct_daily_returns,index=daily[1:],columns=mpf_bct_funds_name,dtype='float64')

mpf_bct_returns = mpf_bct_daily_returns.fillna(-999).groupby(pd.Grouper(freq='M')).sum()
mpf_bct_returns[mpf_bct_returns<-100] = np.nan

mpf_bct_original_cost = np.array([0.0182, 0.0085, 0.0166, 0.0115, 0.0165,
                          0.0163, 0.0163, 0.0162, 0.0152, 0.0136,
                          0.0150, 0.0167, 0.0100, 0.0112, 0.0084,
                          0.0094, 0.0126, 0.0082, 0.0150]) / 12.0
mpf_bct_cs_cost = np.array([0.0069, 0.0062, 0.0069, 0.0060, 0.0069,
                   0.0062, 0.0062, 0.0062, 0.0062, 0.0062,
                   0.0055, 0.0069, 0.0060, 0.0055, 0.0050,
                   0.0040, 0.0055, 0.0062, 0.0062]) / 12.0


mpf_bct_returns = mpf_bct_returns - mpf_bct_cs_cost + mpf_bct_original_cost
monthly_r, monthly_c = mpf_bct_returns.shape
monthly = mpf_bct_returns.index

In [6]:
#tf.config.threading.set_inter_op_parallelism_threads(0)
#tf.config.threading.set_intra_op_parallelism_threads(0)
#tf.config.set_soft_device_placement(True)
k.backend.set_floatx('float32')
k.backend.set_epsilon(1e-7)

In [None]:
mpf_bct_w_all = np.zeros((monthly_r, monthly_c, len(seeds)), dtype='float32')
mpf_bct_period = mpf_bct_w_all.shape[0]
max_return, min_return = mpf_bct_returns.max(axis=0), mpf_bct_returns.min(axis=0)
mpf_bct_returns_normalized = np.zeros((monthly_r, monthly_c), dtype='float32')
mpf_bct_returns_normalized = ((mpf_bct_returns - min_return) / (max_return - min_return)) * 2 - 1

def x_roll(x, window_size):
    na = np.zeros((len(x),window_size),dtype='float32')
    for i in range(0, len(x)):
        if len(x[0:i+1]) >= window_size:
            x_cpy = x.iloc[-window_size:].to_numpy().reshape(60)
        else:
            x_cpy = np.zeros(window_size-len(x[0:i+1]))
            x_cpy = np.append(x_cpy, x.iloc[0:i+1])
        na[i,:] = x_cpy
    return na

def reset_keras():
    k.backend.clear_session()
    try:
        del classifier # this is from global space - change this as you need
    except:
        pass

    print(gc.collect()) # if it's done something you should see a number being outputted

def array_generator(x,y):
    yield(x,y)
        
for pas, seed in enumerate(seeds):
    print("seed: " + str(seed))
    #tf.random.set_random_seed(seed)
    tf.random.set_seed(seed)
    
    model = k.models.Sequential([
        TCN(64, return_sequences=True),
        TCN(16, return_sequences=False),
        k.layers.Dense(1),
    ])
    adam = k.optimizers.Adam(0.008, epsilon=1e-7)
    model.compile(optimizer=adam, loss='mse')
    
    #for col in (0, 1):
    for col in range(0,len(max_return)):
        print("col: " + str(col))
        counter = 0
        X = pd.DataFrame(mpf_bct_returns_normalized.iloc[:,col].dropna().iloc[0:], dtype='float32')
        X_month = monthly[-len(X):]
        
        y = pd.DataFrame(mpf_bct_returns_normalized.iloc[:,col].dropna().iloc[1:], dtype='float32')
        y_month = monthly[-len(y):]
        
        #for i in (180,181):
        for i in range(0,len(X)-1):
            print("row: " + str(i))
            if i > minimum:
                X_train = x_roll(X[0:i], time_step)
                X_train_r, X_train_t = X_train.shape
                #X_train = tf.data.Dataset.from_tensor_slices(X_train.reshape((X_train_r,X_train_t,1)).astype('float32')).batch(256)
                X_train = X_train.reshape((X_train_r,X_train_t,1)).astype('float32')
                
                X_test = (x_roll(X[0:i+1], time_step))[-1]
                #X_test = tf.data.Dataset.from_tensor_slices(X_test.reshape((1,60,1)).astype('float32')).batch(256)
                X_test = X_test.reshape((1,60,1)).astype('float32')
                
                #y_train = tf.data.Dataset.from_tensor_slices(y[0:i].to_numpy().reshape((i,1)).astype('float32')).batch(256)
                y_train = y[0:i].to_numpy().reshape((i,1)).astype('float32')
                
                
                #print("row data prepared")
                model.fit(X_train, y_train, epochs=36, batch_size=256, use_multiprocessing=True, verbose=0)
                #print("row data fitted")
                prediction = model.predict(X_test, batch_size=256)
                #print("row data predicted")
                del [[X_train, X_train_r, X_train_t, y_train, X_test]]
            else:
                prediction = 0
            idx = (np.where(mpf_bct_returns.index.values == np.datetime64(X_month[i+1])))[0].tolist()
            mpf_bct_w_all[idx[0], col, pas] = prediction
            del prediction
            gc.collect()
        #reset_keras()
        del [[X, y, X_month, y_month]]
        gc.collect()

seed: 71880
col: 0
row: 0
row: 1
row: 2
row: 3
row: 4
row: 5
row: 6
row: 7
row: 8
row: 9
row: 10
row: 11
row: 12
row: 13
row: 14
row: 15
row: 16
row: 17
row: 18
row: 19
row: 20
row: 21
row: 22
row: 23
row: 24
row: 25
row: 26
row: 27
row: 28
row: 29
row: 30
row: 31
row: 32
row: 33
row: 34
row: 35
row: 36
row: 37
row: 38
row: 39
row: 40
row: 41
row: 42
row: 43
row: 44
row: 45
row: 46
row: 47
row: 48
row: 49
row: 50
row: 51
row: 52
row: 53
row: 54
row: 55
row: 56
row: 57
row: 58
row: 59
row: 60
row: 61
row: 62
row: 63
row: 64
row: 65
row: 66
row: 67
row: 68
row: 69
row: 70
row: 71
row: 72
row: 73
row: 74
row: 75
row: 76
row: 77
row: 78
row: 79
row: 80
row: 81
row: 82
row: 83
row: 84
row: 85
row: 86
row: 87
row: 88
row: 89
row: 90
row: 91
row: 92
row: 93
row: 94
row: 95
row: 96
row: 97
row: 98
row: 99
row: 100
row: 101
row: 102
row: 103
row: 104
row: 105
row: 106
row: 107
row: 108
row: 109
row: 110
row: 111
row: 112
row: 113
row: 114
row: 115
row: 116
row: 117
row: 118
row: 119
row: 120
ro

In [8]:
import dill
#del model
#dill.dump_session('mpf_bct_tcn.db')
dill.load_session('mpf_bct_tcn.db')


In [None]:


for i in range(1,11):
    feather.write_dataframe(pd.DataFrame(mpf_bct_w_all[:,:,i-1]), "/home/jchan/MPF-ANN/temp/mpf_bct_tcn_w_all.feather"+str(i))

In [3]:
mpf_bct_w = np.mean(mpf_bct_w_all, axis = 2)
mpf_bct_p = np.zeros((monthly_r, monthly_c), dtype='float32')

mpf_bct_stock_return = np.zeros(monthly_r, dtype='float32')
mpf_portf_return = np.zeros(monthly_r, dtype='float32')

mpf_portf_weight = np.zeros((monthly_r, monthly_c), dtype='float32')
mpf_portf_weight_all = np.zeros((monthly_r, monthly_c), dtype='float32')

hedge = False
up = True

for col in range(0, monthly_c):
    mpf_bct_w[:,col] = ((mpf_bct_w[:,col] + 1) / 2 * (max_return[col] - min_return[col]) + min_return[col])

    
def round_percent(x):
    x = x * 100
    result = np.floor(x)
    remain = x - result
    rsum = np.sum(result)
    i = 0
    if rsum < 100:
        o = np.sort(remain)
        o = o[::-1]
        while rsum < 100:
            #print (o, i)
            if i > len(remain):
                i = 0
            if result[i] == 0:
                i+=1
                next
            result[i] = result[i] + 1
            rsum = np.sum(result)
            i+=1
    result = result / 100
    return np.array(result)

    
    
for row in range(0, monthly_r):
    mpf_bct_stock_mean = 0
    i = 0
    
    for col in range(0, monthly_c):
        if col != 1 and col != 5 and col != 6 and col != 10 and col != 13 and \
            col != 15 and col != 16 and col != 17:
                if not np.isnan(mpf_bct_returns.iloc[row, col]):
                    mpf_bct_stock_mean += mpf_bct_returns.iloc[row, col]
                i += 1
                if mpf_bct_w[row,col] < 1e-6:
                    mpf_bct_w[row,col] = 0
        else:
            if mpf_bct_w[row,col] < 0:
                mpf_bct_w[row, col] = 0
                
    mpf_bct_stock_return[row] = mpf_bct_stock_mean / i
    last = monthly_c - top
    order = np.sort(mpf_bct_w[row,:])
    for col in range(0, last):
        mpf_bct_w[row, col] = 0
    if row > 8 and mpf_bct_stock_return[row] < np.quantile(mpf_bct_stock_return, 0.35) and \
        mpf_bct_stock_return[row - 3] < np.quantile(mpf_bct_stock_return, 0.45):
        up = False
    if row > 8 and hedge and mpf_bct_stock_return[row] > np.quantile(mpf_bct_stock_return, 0.35) and \
        mpf_bct_stock_return[row - 3] > np.quantile(mpf_bct_stock_return, 0.45):
        up = True
        hedge = False
    if row > 8 and (mpf_bct_stock_return[row] < 0 and \
                    mpf_bct_stock_return[row - 1] > np.quantile(mpf_bct_stock_return, 0.75)):
        hedge = True
    mpf_bct_w_sum = np.sum(mpf_bct_w[row,:])
    
    if row <= 12 or mpf_bct_w_sum == mpf_bct_w[row, 16] or \
       mpf_bct_w_sum < 1e-6 or hedge == True:
        if row >= 24:
            mpf_bct_p[row, 10] = 0.3
            mpf_bct_p[row, 15] = 0.7
        else:
            mpf_bct_p[row, 15] = 1
    elif np.count_nonzero(mpf_bct_w[row,]) == 1 or \
        np.min(mpf_bct_stock_return[(row-3):row]) < -0.07:
        if row >= 24:
            mpf_bct_p[row,:] = mpf_bct_w[row,:] / mpf_bct_w_sum / 3
            mpf_bct_p[row, 10] += mpf_bct_p[row, 10] + 0.33
            mpf_bct_p[row, 15] += mpf_bct_p[row, 16] + 0.34
        else:
            mpf_bct_p[row,:] = mpf_bct_w[row,:] / mpf_bct_w_sum / 3
            mpf_bct_p[row, 15] += mpf_bct_p[row, 15] + 0.67
    else:
        mpf_bct_p[row,:] = mpf_bct_w[row,:] / mpf_bct_w_sum
    
    mpf_portf_weight[row,:] = round_percent(mpf_bct_p[row,:])
    

Collecting feather
  Downloading https://files.pythonhosted.org/packages/77/d1/073c848713d9987f48d0bc8415646760a069ef3ca80e9b45fdb6b4422133/feather-0.9.1dev.tar.gz
[31m    ERROR: Command errored out with exit status 1:
     command: /home/jchan/tensorflow_v2/bin/python3 -c 'import sys, setuptools, tokenize; sys.argv[0] = '"'"'/tmp/pip-install-e8_mirzz/feather/setup.py'"'"'; __file__='"'"'/tmp/pip-install-e8_mirzz/feather/setup.py'"'"';f=getattr(tokenize, '"'"'open'"'"', open)(__file__);code=f.read().replace('"'"'\r\n'"'"', '"'"'\n'"'"');f.close();exec(compile(code, __file__, '"'"'exec'"'"'))' egg_info --egg-base /tmp/pip-install-e8_mirzz/feather/pip-egg-info
         cwd: /tmp/pip-install-e8_mirzz/feather/
    Complete output (31 lines):
    Downloading http://pypi.python.org/packages/source/d/distribute/distribute-0.6.14.tar.gz
    Traceback (most recent call last):
      File "/tmp/pip-install-e8_mirzz/feather/distribute_setup.py", line 143, in use_setuptools
        raise ImportErr