All the given data after parsing & pre-processing are saved (here only BTC) then loaded again, as this notebook is aimed at analysing performance of Basic DeepLearning Models 

In [22]:
import pandas as pd

'''
Basically the training_dataset contains ~3.8 years of data points for each asset
The one I was considering at first (For BTC alone) contains ~25 days of data (out of which model learns by looking at past 12 hours and predict at a delay of 15 mins into the future)    
'''


df_asset_details = pd.read_csv('../input/c/g-research-crypto-forecasting/asset_details.csv')
print('df_asset_details: ',df_asset_details.shape)
df_sub_sample = pd.read_csv('../input/c/g-research-crypto-forecasting/example_sample_submission.csv')
print('sub_sample: ',df_sub_sample.shape)
df_sup_train = pd.read_csv('../input/c/g-research-crypto-forecasting/supplemental_train.csv').iloc[-5000000:]
print('sup_train: ',df_sup_train.shape)
df_train = pd.read_csv('../input/c/g-research-crypto-forecasting/train.csv').iloc[-5000000:]
print('train_shape: ', df_train.shape)

In [23]:
print('Train_set_time_entry_range: ',df_train['timestamp'].iloc[0], ' - ', df_train['timestamp'].iloc[-1])
print()
print('Supplemetary_train_set_time_entry_range: ', df_sup_train['timestamp'].iloc[0], ' - ', df_sup_train['timestamp'].iloc[-1])

In [24]:
df_temp = pd.concat([df_train, df_sup_train], axis = 0 )
print('df_temp_before: ', df_temp.shape)
df_temp = df_temp.drop_duplicates()
print('df_temp_after: ', df_temp.shape)

In [25]:
df_temp.sort_values('timestamp')
print('Train/Sup_set_time_entry_range: ',df_temp['timestamp'].iloc[0], ' - ', df_temp['timestamp'].iloc[-1])
print()
print(df_temp['timestamp'].value_counts())

### Visualizaton (~ Raw)

In [26]:
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_style('darkgrid')

fig, axs = plt.subplots(1,1, figsize=(16,12), dpi = 80)

sns.histplot(data = df_temp.Asset_ID, color='black', discrete=True, ax = axs, stat = 'density', kde=False)
plt.legend()

plt.tight_layout(pad=2)
plt.show()

In [27]:
'''
timestamp - A timestamp for the minute covered by the row.
Asset_ID - An ID code for the cryptoasset.
Count - The number of trades that took place this minute.
Open - The USD price at the beginning of the minute.
High - The highest USD price during the minute.
Low - The lowest USD price during the minute.
Close - The USD price at the end of the minute.
Volume - The number of cryptoasset units traded during the minute.
VWAP - The volume weighted average price for the minute.
Target - Residual log-return over 15 minute time-data. See the 'Prediction and Evaluation' section of this notebook for details of how the target is calculated.
'''
print('Done!')

In [28]:
from IPython.display import display
print('Asset_Details: ')
print()
display(df_asset_details.head())
print('Sample_Submission: ')
print()
display(df_sub_sample.head())
print('Supplementary_train_samples: ')
print()
display(df_sup_train.head())
print('Train_samples: ')
print()
display(df_train.head())
print('--'*20)
asset_info = df_sup_train.groupby('Asset_ID')['timestamp'].agg('count')
print(asset_info)
print('--'*20)
print(df_train.info())
print('--'*20)
print(df_train.isnull().sum())

In [29]:
import gc
del df_train
gc.collect()
df_train = df_temp[:]
asset_info = df_train.groupby('Asset_ID')['timestamp']
print(asset_info.agg('value_counts'))
asset_info_time = asset_info.agg('unique')


In [30]:
from pprint import pprint
inv_asset_dict = { key:value for key, value in zip(df_asset_details['Asset_ID'],df_asset_details['Asset_Name'])}
pprint(inv_asset_dict)

asset_name = list(inv_asset_dict)
print('num_assets: ', len(asset_name))


In [31]:
asset_info_open = df_train.groupby('Asset_ID')['Open']
asset_info_open = asset_info_open.agg(lambda x: list(x)) ## using anonymous function to form a list of all gropued 
asset_info_close = df_train.groupby('Asset_ID')['Close']
asset_info_close = asset_info_close.agg(lambda x: list(x)) ## using anonymous function to form a list of all gropued 
asset_info_high = df_train.groupby('Asset_ID')['High']
asset_info_high = asset_info_high.agg(lambda x: list(x)) ## using anonymous function to form a list of all gropued 
asset_info_low = df_train.groupby('Asset_ID')['Low']
asset_info_low = asset_info_low.agg(lambda x: list(x)) ## using anonymous function to form a list of all gropued 

## Asset_IDs
print(asset_info_open.head())
print()
print(asset_info_close.head())
print()
print(asset_info_high.head())
print()
print(asset_info_low.head())


In [32]:
#'''## EDA
import seaborn as sns
import matplotlib.pyplot as plt

print('**'*20,'1000 mins | 16.66 hrs data_represented', '**'*20)
print()
sns.set_style('darkgrid')
fig, axs = plt.subplots(7,2, figsize=(16,14))
step = 0
for i in range(2):
    for j in range(7):
        axs[j,i].set_title(inv_asset_dict.get(step))
        axs[j,i].scatter(asset_info_time[step][:1000],asset_info_open[step][:1000], color='red', marker='.', label = 'Open')
        axs[j,i].scatter(asset_info_time[step][:1000],asset_info_close[step][:1000], color = 'yellow', alpha = 0.2, marker='.', label='Close')
        axs[j,i].legend()
        axs[j,i].set_xlabel('time')
        axs[j,i].set_ylabel('Open/Close Price')
        step+=1

plt.tight_layout(pad=2)
#'''
print('Done!')

In [33]:
#'''
print('**'*20,'1000 mins | 16.66 hrs data_represented', '**'*20)
print()
sns.set_style('darkgrid')
fig, axs = plt.subplots(7,2, figsize=(16,14))
step = 0
for i in range(2):
    for j in range(7):
        axs[j,i].set_title(inv_asset_dict.get(step))
        axs[j,i].plot(asset_info_time[step][:1000],asset_info_high[step][:1000], ',b' ,label = 'High')
        axs[j,i].plot(asset_info_time[step][:1000],asset_info_low[step][:1000], ',r',label='Low')
        axs[j,i].legend()
        step+=1
plt.tight_layout(pad=2)#'''

print('Done!')

In [34]:
asset_info_count = df_train.groupby('Asset_ID')['Count']
asset_info_count = asset_info_count.agg(lambda x: list(x))
asset_info_vol = df_train.groupby('Asset_ID')['Volume']
asset_info_vol = asset_info_vol.agg(lambda x: list(x)) 

print(asset_info_count.head())
print()
print(asset_info_vol.head())

In [35]:
#'''
print()
print('**'*20,'|| Trades/Transactions per minute ||', '**'*20)
print()
sns.set_style('darkgrid')
fig, axs = plt.subplots(7,2, figsize=(20,18))
step = 0
for i in range(2):
    for j in range(7):
        axs[j,i].set_title(inv_asset_dict.get(step))
        axs[j,i].bar(asset_info_time[step][:1000],asset_info_count[step][:1000], edgecolor = 'black')
        axs[j,i].set_xlabel('timestamps')
        axs[j,i].set_ylabel('Trade_Count')
        step+=1
plt.tight_layout(pad=2)
plt.show()
print()
print('**'*20,'|| Volume per minute ||', '**'*20)
print()
fig, axs = plt.subplots(7,2, figsize=(20,18))
step = 0
for i in range(2):
    for j in range(7):
        axs[j,i].set_title(inv_asset_dict.get(step))
        axs[j,i].bar(asset_info_time[step][:1000],asset_info_vol[step][:1000], edgecolor = 'black')
        axs[j,i].set_xlabel('timestamps')
        axs[j,i].set_ylabel('Volume')
        step+=1
plt.tight_layout(pad=2)
plt.show()
#'''
print('Done!')

In [36]:
asset_info_targ = df_train.groupby('Asset_ID')['Target']
asset_info_targ = asset_info_targ.agg(lambda x: list(x)) 

print(asset_info_targ.head())
asset_dict = {value:key for key, value in inv_asset_dict.items()}

In [37]:
#'''
import random
print()
print('**'*20,'|| Residualised Return per minute ||', '**'*20)
print()

colors = ['cyan','green','red','blue','gold','red','black','orange','magenta','deeppink','lime','slategray','yellow','darkviolet'] ## all possible colors 
name_plot = ['Bitcoin', 'Ethereum', 'Dogecoin']


fig, axs = plt.subplots(1,1, figsize=(18,14))
#axs[0].set_title(inv_asset_dict.get(step))
for name in name_plot:
    #step = random.randint(0,len(list(asset_dict))-1) ## random_sample 3 coins
    step = asset_dict.get(name)
    axs.plot(asset_info_time[step][:10000],asset_info_targ[step][:10000], c=colors[step], label=inv_asset_dict.get(step))
axs.set_xlabel('Timestamps')
axs.set_ylabel('Res. Returns')
axs.legend()
#plt.tight_layout(pad=2)
plt.show()
#'''
print('Done!')

In [38]:
print(df_temp.info())
print('**'*40)
print(df_temp.isnull().sum())
print('**'*40)
df_temp_2 = df_temp.fillna(method = 'ffill')
print(df_temp_2.isnull().sum())

### Better-Visualization (~preprocessed)

In [39]:
import numpy as np
import gc

df_sep_dict = {}

print('df_temp_2_shape : ', df_temp_2.shape)

for key in list(asset_dict):
    asset_dict = {value:key for key, value in inv_asset_dict.items()}
    mid = asset_dict[key]
    df_mid = df_temp_2[df_temp_2['Asset_ID'] == mid] 
    df_mid.index = df_mid.timestamp
    df_mid = df_mid.drop('timestamp', axis = 1)
    print(key+' _before : ')
    print((df_mid.index[1:] - df_mid.index[:-1]).value_counts().head())
    df_mid = df_mid.reindex(range(df_mid.index[0], df_mid.index[-1]+60, 60), method = 'pad') ## gap filling in the continumm to a constant value
    print()
    print(key+' _after : ')
    print((df_mid.index[1:] - df_mid.index[:-1]).value_counts().head())
    df_sep_dict[key] = df_mid
    print('**'*20)


del df_train
del df_temp    
del df_temp_2
gc.collect()


In [40]:
#'''
fig, axs = plt.subplots(7,2, figsize=(24,18))

req_name = ['Bitcoin', 'Ethereum','Dogecoin']
for step, i in enumerate(df_asset_details['Asset_Name'].tolist()):
    if step < 7:
        axs[step,0].plot(df_sep_dict[i].index, df_sep_dict[i].Close, ',k')
        axs[step,0].set_title(i + ' (Complete_train_space) ')
        axs[step,0].set_xlabel('Time_Axis')
        axs[step,0].set_ylabel('Closing_Price')
    else:
        axs[step-7,1].plot(df_sep_dict[i].index, df_sep_dict[i].Close, ',k')
        axs[step-7,1].set_title(i + ' (Complete_train_space) ')
        axs[step-7,1].set_xlabel('Time_Axis')
        axs[step-7,1].set_ylabel('Closing_Price')

plt.tight_layout(pad=2)
plt.show()

print()
print('**'*20, 'VWAP_plot (on_complete_trian_space)', "**"*20)
print()

fig, axs = plt.subplots(7,2, figsize=(24,18))

req_name = ['Bitcoin', 'Ethereum','Dogecoin']
for step, i in enumerate(df_asset_details['Asset_Name'].tolist()):
    if step < 7:
        axs[step,0].plot(df_sep_dict[i].index, df_sep_dict[i].VWAP, 'b')
        axs[step,0].set_title(i + ' (Complete_train_space) ')
        axs[step,0].set_xlabel('Time_Axis')
        axs[step,0].set_ylabel('Closing_Price')
    else:
        axs[step-7,1].plot(df_sep_dict[i].index, df_sep_dict[i].VWAP, 'b')
        axs[step-7,1].set_title(i + ' (Complete_train_space) ')
        axs[step-7,1].set_xlabel('Time_Axis')
        axs[step-7,1].set_ylabel('Closing_Price')

plt.tight_layout(pad=2)
plt.show()
#'''
print('Done!')

In [41]:
import tensorflow as tf
import warnings 
warnings.filterwarnings('ignore')

tpu  = tf.distribute.cluster_resolver.TPUClusterResolver.connect()
tpu_strat = tf.distribute.TPUStrategy(tpu)

print("Number of accelerators: ", tpu_strat.num_replicas_in_sync)

In [42]:
import pickle

''' Dumping the Data '''
# dict_file = open('BTC_dict_data(last_btc_520k).pkl', 'wb')
# pickle.dump(df_sep_dict['Bitcoin'], dict_file)
# dict_file.close()

dict_file = open('../input/520k-btc-only/BTC_dict_data(last_btc_520k).pkl', 'rb')
df_btc = pickle.load(dict_file)
dict_file.close()

In [43]:
import pandas as pd

df_btc.index = pd.date_range('2020-12-28', periods = len(df_btc), freq='min') # use freq = 'min' to get minute data

In [44]:
df_btc.Close.plot(figsize=(16,12)) ## They daily update the data to recent one | last year data

In [45]:
import matplotlib.pyplot as plt
df_btc.loc['2021-11-1':].Close.plot(figsize=(16,12)) #localising recent past 
df_btc.iloc[-50000:].Close.plot(figsize=(16,12), c='r', alpha=0.5)
plt.show()

In [46]:
new_col_order = ['Count','Open','High','Low','Volume','VWAP','Target','Close']


display(df_btc.head())
df_btc = df_btc.reindex(columns = new_col_order)
display(df_btc.head())


In [47]:
df_btc.iloc[-50000:].Close.plot(figsize=(10,8))

In [48]:
import tensorflow as tf
import numpy as np
import gc
import math as mt

seed = 28
np.random.seed(seed)


def Data_func(data, past, delay, minn, maxx = None, shuffle=False, batch_size=128, rate=6):
    if maxx is None:
        maxx = len(data) - delay -1
    i = minn + past
    batch_no = 1

    while True:

        if shuffle:
            rows = np.random.randint(minn + past, maxx, size = batch_size)
        else:
            rows = np.arange(i, min(i + batch_size, maxx))
        
        i+=len(rows)
        if i + batch_size > maxx: ## when you increase the past too much this statement gets executed at the first batch creation // add more data to overcome
            break
        
        samples = np.zeros((len(rows), past // rate, data.shape[-1]))
        targets = np.zeros((len(rows),))
        cbase_preds = np.zeros((len(rows),))
    
        for j, row in enumerate(rows):
            
            indices = range(rows[j] - past, rows[j], rate) ## take every point
            samples[j] = data[indices]
            targets[j] = data[rows[j] + delay - 1][-1]
       
        if batch_no == 1:
            print('im here')
            new_sample = samples
            new_targets = targets
            print('Ini_samples: ', samples.shape, 'Ini_targets: ', targets.shape)
        else:
            new_sample = np.concatenate([new_sample, samples], axis=0)
            new_targets = np.concatenate([new_targets, targets], axis=0)
        batch_no+=1
        
    return new_sample, new_targets

def Baseline_func(data, past, delay, minn, maxx = None , shuffle=False, batch_size=128, rate=6):
    if maxx is None:
        maxx = len(data) - delay -1
    i = minn + past
    batch_no = 1

    while True:

        if shuffle:
            rows = np.random.randint(minn + past, maxx, size = batch_size)
        else:
            rows = np.arange(i, min(i + batch_size, maxx))
        
        i+=len(rows)
        if i + batch_size > maxx:
            break
        
        cbase_preds = np.zeros((len(rows),))
        
        for j, row in enumerate(rows):
            cbase_preds[j] = data[rows[j]][-1]
            
        if batch_no == 1:
            new_cbase_preds = cbase_preds
            print('Ini_cbase_pred: ', new_cbase_preds.shape)
        else:
            new_cbase_preds = np.concatenate([new_cbase_preds, cbase_preds], axis=0)
        batch_no+=1
        
    return new_cbase_preds



#'''    
x_train = df_btc.iloc[-50000:].copy() ## Localising training => prediction space

print()
x_train = x_train.to_numpy()

print('BTC_train: ',x_train.shape)

past = 360 ## ==>> looks !1 but 6hrs days back 

''' The problem statement involves being able to predict next 15 mins (very much possible!) residualized returns | NO DELAY '''
''' Target 15 datapoints from current point'''
delay = 0 ## Targeting after 15 mins | NOT correctt implementation 

train_split, test_split = 0.60, 0.25
train_max = round(train_split * x_train.shape[0])
val_max = (1-train_split)*x_train.shape[0]
test_max = round(test_split*val_max)
val_max = round(val_max - test_max)

''' use batch_size = 15 in accordance to the problem statement'''
bs = 15

mean = x_train[:train_max].mean(axis = 0)
x_train -= mean
std = x_train[:train_max].std(axis = 0)
x_train /= std

val_steps =  (train_max+val_max) - (train_max+1) - past ## (-past) because no target values for last  720 points
test_steps = (train_max+val_max+test_max)-(train_max+val_max+1) - past

print('Train_Max: ', mt.floor(train_max),', Val_Max: ', mt.floor(val_max),', Test_Max: ', mt.floor(test_max))
print()
#'''        
train_data = Data_func(x_train, past, delay, minn = 0, maxx = train_max - 1, batch_size=bs, shuffle=True, rate=5) ## sampling data every 5 minutes
cbase_train_data = Baseline_func(x_train, past, delay, minn = 0, maxx = train_max - 1, batch_size=bs, shuffle=True, rate=5) ## sampling data every 5 minutes

temp, _ = train_data
print('--'*20)
print('Done - Train_data: ', temp.shape)
print('Cbase_data: ', cbase_train_data.shape)
print('--'*20)
val_data = Data_func(x_train, past, delay, minn = train_max+1, maxx = train_max + val_max - 1, batch_size=bs, rate=5)
cbase_val_data = Baseline_func(x_train, past, delay, minn = train_max+1, maxx = train_max + val_max - 1, batch_size= bs, rate=5)

temp, _ = val_data 
print('--'*20)
print('Done - Val_data: ', temp.shape)
print('Cbase_data: ', cbase_val_data.shape)
print('--'*20)
#'''
test_data = Data_func(x_train, past, delay, minn = train_max+val_max+1, batch_size=bs, rate = 5)
cbase_test_data = Baseline_func(x_train, past, delay, minn = train_max+val_max+1, batch_size= bs, rate = 5)

temp, _ = test_data
print('--'*20)
print('Done - Test_data: ', temp.shape)
print('Cbase_data: ', cbase_test_data.shape)
print('--'*20)

train_sample, train_label = train_data ## sample contains return 
val_sample, val_label = val_data
test_sample, test_label = test_data

del temp
del train_data
del val_data
del test_data
gc.collect()

In [49]:
import numpy as np
import tensorflow as tf
import gc

print('train_sample: ', train_sample.shape, 'train_label: ', train_label.shape)
tr_dataset = tf.data.Dataset.from_tensor_slices((train_sample, train_label)).repeat().batch(bs) #.cache().prefetch(tf.data.AUTOTUNE) ## use repeat() before batching while using TPU
val_dataset = tf.data.Dataset.from_tensor_slices((val_sample, val_label)).repeat().batch(bs) #.cache().prefetch(tf.data.AUTOTUNE)
test_dataset = tf.data.Dataset.from_tensor_slices((test_sample, test_label)).repeat().batch(bs) #.cache().prefetch(tf.data.AUTOTUNE)
del train_sample 
del train_label
gc.collect()

In [50]:
### CommonSense Model
import numpy as np
from statsmodels.tools.eval_measures import meanabs, rmse

target_std = np.std(val_label)

'''always predicting the current return to be the 15 min later return '''

print('CommonSense - BaseLines - MAE -----')
print()
print('__Val__')
## using present info (latest info) as our prediction after 'delay' mins
print('MAE: ',meanabs(cbase_val_data, val_label))
print('RMSE: ', rmse(cbase_val_data, val_label))
print()
print('__Test__')
## using present info (latest info) as our prediction after 'delay' mins
print('MAE: ',meanabs(cbase_test_data, test_label))
print('RMSE: ', rmse(cbase_test_data, test_label))
print()

In [51]:
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style('darkgrid')

fig, axs = plt.subplots(1,1, figsize = (15,8))

axs.plot(np.arange(0,1500), cbase_test_data[:1500], label = 'Common_Baseline') 
axs.plot(np.arange(0,1500), test_label[:1500], label = 'Real_Data')
axs.legend()
plt.show()

### USE DNN and 1D_CNN + RNN

In [None]:
dnn_model = tf.keras.models.Sequential([
    tf.keras.layers.Input(shape=(72,8)),
    tf.keras.layers.Dense(128, activation = 'linear'),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(128, activation = 'relu'),
    tf.keras.layers.Dense(1)
    
])
es = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)
dnn_model.compile(optimizer='Adam', loss='mse', metrics=['mae'])
dnn_model.fit(tr_dataset, epochs=50, validation_data = val_dataset, validation_steps = val_steps, steps_per_epoch =128, callbacks=[es])
print('Training_Done ... ')

In [52]:
dnn_model = tf.keras.models.load_model('../input/all-models/dnn_model.h5')

In [53]:
print(dnn_model.evaluate(val_dataset, steps=128))
print(dnn_model.evaluate(test_dataset, steps=128))

In [54]:
test_pred_data = [(sample, target) for sample, target in test_dataset.take(1)]
test_sample, test_target = test_pred_data[0] ## next 32 data-points | accounting for 32 mins prediction
x_lin = np.arange(0,test_target.shape[0]) 
def inv_norm(test_target):
    test_target = np.ravel(test_target)
    test_target = test_target * std[-1]
    test_target = test_target + mean[-1]
    return test_target

test_pred = dnn_model.predict(test_sample)
inv_test_pred = inv_norm(test_pred)
# test_pred_parent = inv_stdn(parent_model.predict(test_sample))
inv_test_target = inv_norm(test_target)

fig = plt.figure(figsize=(15,8))
plt.plot(x_lin, inv_test_target, 'k', label = 'test')
plt.plot(x_lin, inv_test_pred, 'r',  label = 'dnn_model')
# plt.plot(x_lin, test_pred_parent, 'g', label = 'parent_model')
plt.legend()
plt.show()

In [55]:
from statsmodels.tools.eval_measures import meanabs

print('MAE: ', meanabs(inv_test_target, inv_test_pred))
print('Mean_target: ', np.mean(inv_test_target))
print('Mean_P_Error: ', (meanabs(inv_test_target, inv_test_pred) / np.mean(inv_test_target)) * 100, '%')

In [None]:
dnn_model.save('./dnn_model.h5')

In [None]:
cnn_rnn_model = tf.keras.models.Sequential([
    tf.keras.layers.Input(shape=(72,8)),
    tf.keras.layers.Conv1D(128, 5),
    tf.keras.layers.Dense(264, activation='relu'),
    tf.keras.layers.GRU(264, return_sequences=True),
    tf.keras.layers.GRU(264),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(1)
]) 

cnn_rnn_model.compile(optimizer='Adam', loss='mse', metrics=['mae'])
cnn_rnn_model.summary()

es = tf.keras.callbacks.EarlyStopping(patience=5)
cnn_rnn_model.fit(tr_dataset, epochs=20, validation_data = val_dataset, validation_steps = val_steps, steps_per_epoch =128, callbacks=[es])
print('Training_Done...')

In [56]:
cnn_rnn_model = tf.keras.models.load_model('../input/all-models/cnn_rnn_model.h5')

In [57]:
print(cnn_rnn_model.evaluate(val_dataset, steps=128))
print(cnn_rnn_model.evaluate(test_dataset, steps=128))

In [58]:
cnn_rnn_model.save('./cnn_rnn_model.h5')
print('saved cnn_rnn_model')

In [59]:
test_pred_data = [(sample, target) for sample, target in test_dataset.take(1)]
test_sample, test_target = test_pred_data[0] ##after validation set => next 32 datapoints | accounting for next 32 min prediction
x_lin = np.arange(0,test_target.shape[0]) 
def inv_norm(test_target):
    test_target = np.ravel(test_target)
    test_target = test_target * std[-1]
    test_target = test_target + mean[-1]
    return test_target

test_pred = cnn_rnn_model.predict(test_sample)
inv_test_pred = inv_norm(test_pred)
# test_pred_parent = inv_stdn(parent_model.predict(test_sample))
inv_test_target = inv_norm(test_target)

fig = plt.figure(figsize=(15,8))
plt.plot(x_lin, inv_test_target, 'k', label = 'test')
plt.plot(x_lin, inv_test_pred, 'r',  label = 'dnn_model')
# plt.plot(x_lin, test_pred_parent, 'g', label = 'parent_model')
plt.legend()
plt.show()

In [60]:
print('MAE: $ ' + str(meanabs(inv_test_target, inv_test_pred)))
print('Mean_target: $ ' + str(np.mean(inv_test_target)))
print('Mean_P_Error: ', (meanabs(inv_test_target, inv_test_pred) / np.mean(inv_test_target)) * 100, '%')

#### EXtended PREDiction | For DNN Model

In [61]:
test_pred_data = [(sample, target) for sample, target in test_dataset.take(3)]

test_list, target_list = test_pred_data[0]

for x_test, x_test_target in test_pred_data[1:]:
    test_list = tf.concat([test_list, x_test], axis=0)
    target_list = tf.concat([target_list, x_test_target], axis=0)



In [62]:
x_lin = np.arange(0,target_list.shape[0]) 
test_pred = dnn_model.predict(test_list)

print('Prediction_Shape: ', test_pred.shape,'Target_Shape: ',target_list.shape)

def inv_norm(test_target):
    test_target = np.ravel(test_target)
    test_target = test_target * std[-1]
    test_target = test_target + mean[-1]
    return test_target

inv_target_list = inv_norm(target_list)
inv_test_pred = inv_norm(test_pred)

fig = plt.figure(figsize=(15,8))
plt.plot(x_lin, inv_target_list, 'k', label = 'test')
plt.plot(x_lin, inv_test_pred, 'r',  label = 'dnn_model')
# plt.plot(x_lin, test_pred_parent, 'g', label = 'parent_model')
plt.legend()
plt.show()

In [63]:
from statsmodels.tools.eval_measures import rmse

print('MAE: $ ' + str(meanabs(inv_target_list, inv_test_pred)))
print('Mean_target: $ ' + str(np.mean(inv_target_list)))
print('Mean_P_Error: ', (meanabs(inv_target_list, inv_test_pred) / np.mean(inv_target_list)) * 100, '%')

print('RMSE: $ ' + str(rmse(inv_target_list, inv_test_pred)))

SESSION STOPPER

In [None]:
import time
for i in range(50):
    print('Time: ', i)
    time.sleep(30*60)

In [None]:
'''import gresearch_crypto as gs
env = gs.make_env()
iter_test = env.iter_test()
for (test_df, _) in iter_test:
    sample_prediction_df['Target'] = 0  
    env.predict(sample_prediction_df)'''