For online testing, we require that the data files be similarly structured as the provided data.

This means that the filepaths and the column names should adhere strictly to previously used data.

Apart from the data files, we also require a config file to read which paths and which columns to load.

Example: Predict Aluminium Spot Price with Shanghai Future Exchange's Copper's Open Interest.

The config file, which lists the filepaths and the columns to load should look similar to:

[

    {
    
        ".../LME/.../LMAHDY.csv":["LMAHDY"],
        
        ".../SHFE/.../CU.csv":["Open Interest"]
        
    }
    
]

Notes:
Assets in different exchanges should be under folders of their respective exchange names.

Spot Prices in LME should adhere to the LM__DY for both file names and column names.

In [113]:
import joblib
import json
from copy import copy
import importlib
import os
import sys
import numpy as np
import rpy2.robjects as robjects
import pandas as pd
robjects.r('.sourceAlfunction()')
print(sys.path[0])
sys.path.insert(0, os.path.abspath(os.path.join(sys.path[0], 'NEXT/4EBaseMetal/code/utils')))
read_data = importlib.import_module("read_data")
construct_data = importlib.import_module("construct_data")
transform_data = importlib.import_module("transform_data")
normalize_feature = importlib.import_module("normalize_feature")

[1]
 "Already loaded SinglePackageAl version * 19.04.6 * "


[1]
 "Detaching it"


[1]
 "Also unloading with devtools:"


 

[1]
 "SinglePackageAl loaded. Attaching version 19.04.6"


[1]
 "Loaded AlphienData"


[1]
 "Loaded AlphienTechnicalAnalysis"


[1]
 "Loaded AlphienFundamental"


[1]
 "Loaded AlphienBaseMetals"


[1]
 "Loaded AlphienOption"


send2Log: Your working directory is: /home/chanmingwei
 


/home/chanmingwei/NEXT/4EBaseMetal/code/utils/NEXT/4EBaseMetal/code/utils


In [114]:
from datetime import datetime
def h1_function(date_range,ground_truth):
    h = 1
    time_series = None
    rgetSecurity = robjects.r('getSecurity')
    rindex = robjects.r('index')
    rgetGenOHLCV = robjects.r('getGenOHLCV')
    extra_data = None
    extra_index = None
    if ground_truth == "LMAHDY Comdty":
        lag = 40
        norm_ex = "v1"
        norm_volume = "v1"
        norm_3m_spread = "v1"
        extra_data = robjects.r('getGenOHLCV("PBLcl",zoom = \"'+date_range+'\")[,6]')
#         print(extra_data)
        extra_index = rindex(extra_data)
    elif ground_truth == "LMPBDY Comdty":
        lag = 40
        norm_ex = "v1"
        norm_volume = "v1"
        norm_3m_spread = "v1"
        extra_data = robjects.r('getGenOHLCV("PBLcl",zoom = \"'+date_range+'\")[,6]')
        extra_index = rindex(extra_data)
    elif ground_truth == "LMNIDY Comdty":
        lag = 5
        norm_ex = "v1"
        norm_volume = "v1"
        norm_3m_spread = "v1"
        extra_data = rgetSecurity(securityName= "LMAHDY Comdty", zoom = date_range)
        extra_index = rindex(extra_data)
#add the test function of the LMZSDY and LMSNDY
    elif ground_truth == "LMZSDY Comdty":
        lag = 10
        norm_ex = "v1"
        norm_volume = "v1"
        norm_3m_spread = "v1"
        extra_data = rgetSecurity(securityName= "UKX Index", zoom = date_range)
        extra_index = rindex(extra_data)
    elif ground_truth == "LMSNDY Comdty":
        lag = 40
        norm_ex = "v1"
        norm_volume = "v1"
        norm_3m_spread = "v1"
        extra_data = robjects.r('getGenOHLCV("SI", zoom =\"'+date_range+'\")[,4]')
        extra_index = rindex(extra_data)
    elif ground_truth == "LMCADY Comdty":
        lag = 5
        norm_ex = "v1"
        norm_volume = "v1"
        norm_3m_spread = "v1"
        extra_data = robjects.r('getGenOHLCV("PBLcl",zoom = \"'+date_range+'\")[,1]')
        extra_index = rindex(extra_data)
    else:
        print("ground truth val out of bounds!")
        return None
    gt_data = rgetSecurity(securityName=ground_truth,zoom=date_range)
    gt_index = rindex(gt_data)
    gt_data_list = []
    gt_index_list = []
    extra_data_list = []
    extra_index_list = []
    for i in range(len(gt_index)):
        gt_index_list.append(gt_index[i])
    for i in range(len(gt_data)):
        gt_data_list.append(gt_data[i])
    for i in range(len(extra_index)):
        extra_index_list.append(extra_index[i])
    for i in range(len(extra_data)):
        extra_data_list.append(extra_data[i])
    start_date= min(gt_index_list[0],extra_index_list[0])
    end_date =max(gt_index_list[-1],extra_index_list[-1])
    if ground_truth == "LMAHDY Comdty":
        gt_dict={'time':gt_index_list,'LME_Al_Spot':gt_data_list}
        extra_dict = {'time':extra_index_list, 'SHFE_Le_OI':extra_data_list}
    elif ground_truth == "LMPBDY Comdty":
        gt_dict={'time':gt_index_list,'LME_Le_Spot':gt_data_list}
        extra_dict = {'time':extra_index_list, 'SHFE_Le_OI':extra_data_list}
    elif ground_truth == "LMNIDY Comdty":
        gt_dict={'time':gt_index_list,'LME_Ni_Spot':gt_data_list}
        extra_dict = {'time':extra_index_list, 'LME_Al_Spot':extra_data_list}
    elif ground_truth == "LMZSDY Comdty":
        gt_dict={'time':gt_index_list,'LME_Zi_Spot':gt_data_list}
        extra_dict = {'time':extra_index_list+86400*np.ones(len(extra_index_list)), 'UKX':extra_data_list}
    elif ground_truth == "LMSNDY Comdty":
        gt_dict={'time':gt_index_list,'LME_Ti_Spot':gt_data_list}
        extra_dict = {'time':extra_index_list+86400*np.ones(len(extra_index_list)), 'COMEX_SI_lag1_Close':extra_data_list}
    elif ground_truth == "LMCADY Comdty":
        gt_dict={'time':gt_index_list,'LME_Co_Spot':gt_data_list}
        extra_dict = {'time':extra_index_list, 'SHFE_Le_Open':extra_data_list}
    else:
        print("ground truth val out of bounds!")
        return None
    gt_dataframe = pd.DataFrame(gt_dict)
    extra_dataframe = pd.DataFrame(extra_dict)
#     print(isinstance(extra_dataframe, pd.DataFrame))
    time_series = pd.DataFrame.merge(gt_dataframe,extra_dataframe,how ='outer', on = 'time')
    time_series.set_index('time')
#     print(time_series['time'])
    time_series = time_series.drop('time',axis = 1)
    

#     print(time_series)
    
    #Processing data
    time_series = read_data.process_missing_value_v3(time_series,np.min([lag,10]))
    org_cols = time_series.columns.values.tolist()
    print("Normalizing")

    #Normalize and generate technical indications
    norm_params = construct_data.normalize(time_series,vol_norm = norm_volume, spot_spread_norm=norm_3m_spread,ex_spread_norm = norm_ex)
    time_series = copy(norm_params["val"])
    del norm_params["val"]
    time_series = construct_data.technical_indication(time_series)
    cols = time_series.columns.values.tolist()
    for col in cols:
        if "_Volume" in col or "_OI" in col or "CNYUSD" in col:
            time_series = time_series.drop(col,axis = 1)
            org_cols.remove(col)
    curr = ""
    if ground_truth == "LMAHDY Comdty":
        curr = "Al"
    elif ground_truth == "LMPBDY Comdty":
        curr = "Le"
    elif ground_truth == "LMNIDY Comdty":
        curr = "Ni"
    elif ground_truth == "LMZSDY Comdty":
        curr = "Zi"
    elif ground_truth == "LMSNDY Comdty":
        curr = "Ti"
    elif ground_truth == "LMCADY Comdty":
        curr = "Co"
    else:
        print("ground truth val out of bounds!")
        return None

    norm_data = copy(normalize_feature.log_1d_return(time_series,org_cols))
    norm_data = read_data.process_missing_value_v3(norm_data,10)
    to_be_predicted = norm_data["LME_"+curr+"_Spot"]
#     if S > 1:
#         for i in range(S-1):
#             to_be_predicted = to_be_predicted + norm_data[gt_column].shift(-i-1)
    gt = (to_be_predicted > 0).shift(-1)

    start_index = 0
    if start_index < lag - 1:
        start_index = lag - 1
    end_index = len(norm_data)-1
    assert end_index >= lag - 1
#     time_series = pd.DataFrame.merge(gt_dataframe,extra_dataframe,how ='outer', on = 'time')
#     times = [datetime.fromtimestamp(time).strftime("%A, %B %d, %Y %I:%M:%S") for time in time_series['time'][norm_data.index[0]:norm_data.index[-1]]]
#     print((*times), sep = "\n")
    X_te, y_te = construct_data.construct(norm_data, gt, start_index, end_index, lag, "log_1d_return")
    X_te = transform_data.flatten(X_te)
    y_te = y_te*2 - 1
    print(len(y_te))
    #load model
    model = joblib.load("NEXT/LME_"+curr+"_Spot_h"+str(h)+"_n1.joblib")

    prediction = model.predict(X_te).reshape(X_te.shape[0],1)

    with open("NEXT/Nickel h"+str(h)+".csv","w") as out:
        out.write("Prediction,True Value\n")
        for i in range(X_te.shape[0]):
            # outputs a boolean value stating whether the prediction was an increase in spot price.
            out.write(str(prediction[i] == 1).strip("[ ").strip("]") + ","+ str(y_te[i] == 1).strip("[ ").strip("]") + "\n")

    total_no = prediction.shape[0]
    no_true = sum(np.equal(prediction,y_te))
    no_TT = sum(np.multiply(prediction+1,y_te+1))/4
    no_FF = sum(np.multiply(prediction - 1,y_te - 1))/4
    no_TF = -sum(np.multiply(prediction + 1,y_te - 1))/4
    no_FT = -sum(np.multiply(prediction - 1,y_te + 1))/4

    #some basic statistics, to identify whether our model is more prone to which type of error.
    print("Overall Accuracy:%d",no_true/total_no )
    print("TT:%d", no_TT)
    print("TF:%d", no_TF)
    print("FT:%d", no_FT)
    print("FF:%d", no_FF)
    return prediction



In [115]:
LMNIDY = h1_function("2015-01-06::2016-01-05","LMNIDY Comdty")

Normalizing
248
Overall Accuracy:%d [0.61290323]
TT:%d [47.]
TF:%d [29.]
FT:%d [67.]
FF:%d [105.]




In [123]:
LMNIDY_test = h1_function("2016-01-06::2017-01-05","LMNIDY Comdty")

Normalizing
249
Overall Accuracy:%d [0.53012048]
TT:%d [38.]
TF:%d [16.]
FT:%d [101.]
FF:%d [94.]




In [124]:
LMNIDY_holdout = h1_function("2017-01-06::","LMNIDY Comdty")

Normalizing
603
Overall Accuracy:%d [0.51409619]
TT:%d [72.]
TF:%d [60.]
FT:%d [233.]
FF:%d [238.]




In [116]:
LMCADY = h1_function("2015-01-06::2016-01-05","LMCADY Comdty")

send2Log: Fetching generic for PBL1C
 


Normalizing
234
Overall Accuracy:%d [0.59401709]
TT:%d [59.]
TF:%d [40.]
FT:%d [55.]
FF:%d [80.]




In [129]:
LMCADY_test = h1_function("2016-01-06::2017-01-05","LMCADY Comdty")

send2Log: Fetching generic for PBL1C
 


Normalizing
234
Overall Accuracy:%d [0.47435897]
TT:%d [44.]
TF:%d [44.]
FT:%d [79.]
FF:%d [67.]




In [130]:
LMCADY_holdout = h1_function("2017-01-06::","LMCADY Comdty")

send2Log: Fetching generic for PBL1C
 


Normalizing
564
Overall Accuracy:%d [0.5106383]
TT:%d [104.]
TF:%d [97.]
FT:%d [179.]
FF:%d [184.]




In [117]:
LMAHDY = h1_function("2015-01-06::2016-01-05","LMAHDY Comdty")

send2Log: Fetching generic for PBL1C
 


Normalizing
Normalizing OI:SHFE_Le_OI=>SHFE_Le_nOI
198
Overall Accuracy:%d [0.63636364]
TT:%d [47.]
TF:%d [36.]
FT:%d [36.]
FF:%d [79.]




In [118]:
LMAHDY_test = h1_function("2016-01-06::2017-01-05","LMAHDY Comdty")

send2Log: Fetching generic for PBL1C
 


Normalizing
Normalizing OI:SHFE_Le_OI=>SHFE_Le_nOI
198
Overall Accuracy:%d [0.51010101]
TT:%d [40.]
TF:%d [35.]
FT:%d [62.]
FF:%d [61.]




In [119]:
LMAHDY_holdout = h1_function("2017-01-06::","LMAHDY Comdty")

send2Log: Fetching generic for PBL1C
 


Normalizing
Normalizing OI:SHFE_Le_OI=>SHFE_Le_nOI
528
Overall Accuracy:%d [0.49431818]
TT:%d [100.]
TF:%d [117.]
FT:%d [150.]
FF:%d [161.]




In [120]:
LMPBDY = h1_function("2015-01-06::2016-01-05","LMPBDY Comdty")

send2Log: Fetching generic for PBL1C
 


Normalizing
Normalizing OI:SHFE_Le_OI=>SHFE_Le_nOI
198
Overall Accuracy:%d [0.57070707]
TT:%d [32.]
TF:%d [24.]
FT:%d [61.]
FF:%d [81.]




In [121]:
LMPBDY_test = h1_function("2016-01-06::2017-01-05","LMPBDY Comdty")

send2Log: Fetching generic for PBL1C
 


Normalizing
Normalizing OI:SHFE_Le_OI=>SHFE_Le_nOI
198
Overall Accuracy:%d [0.44949495]
TT:%d [25.]
TF:%d [30.]
FT:%d [79.]
FF:%d [64.]




In [122]:
LMPBDY_holdout = h1_function("2017-01-06::","LMPBDY Comdty")

send2Log: Fetching generic for PBL1C
 


Normalizing
Normalizing OI:SHFE_Le_OI=>SHFE_Le_nOI
528
Overall Accuracy:%d [0.47348485]
TT:%d [77.]
TF:%d [84.]
FT:%d [194.]
FF:%d [173.]




In [7]:
LMZSDY = h1_function("2015-01-06::2016-01-05","LMZSDY Comdty")

Normalizing
190
Overall Accuracy:%d [0.53157895]
TT:%d [40.]
TF:%d [46.]
FT:%d [43.]
FF:%d [61.]




In [125]:
LMZSDY_test = h1_function("2016-01-06::2017-01-05","LMZSDY Comdty")

Normalizing
191
Overall Accuracy:%d [0.5026178]
TT:%d [49.]
TF:%d [31.]
FT:%d [64.]
FF:%d [47.]




In [126]:
LMZSDY_holdout = h1_function("2017-01-06::","LMZSDY Comdty")

Normalizing
469
Overall Accuracy:%d [0.4989339]
TT:%d [101.]
TF:%d [95.]
FT:%d [140.]
FF:%d [133.]




In [8]:
LMSNDY = h1_function("2015-01-06::2016-01-05","LMSNDY Comdty")

send2Log: Fetching generic for SI1S
 


Normalizing
160
Overall Accuracy:%d [0.4875]
TT:%d [34.]
TF:%d [48.]
FT:%d [34.]
FF:%d [44.]




In [127]:
LMSNDY_test = h1_function("2016-01-06::2017-01-05","LMSNDY Comdty")

send2Log: Fetching generic for SI1S
 


Normalizing
165
Overall Accuracy:%d [0.53333333]
TT:%d [64.]
TF:%d [43.]
FT:%d [34.]
FF:%d [24.]




In [128]:
LMSNDY_holdout = h1_function("2017-01-06::","LMSNDY Comdty")

send2Log: Fetching generic for SI1S
 


Normalizing
454
Overall Accuracy:%d [0.46475771]
TT:%d [134.]
TF:%d [141.]
FT:%d [102.]
FF:%d [77.]


