In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, matthews_corrcoef
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras import metrics
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Conv1D
from tensorflow.keras.layers import MaxPooling1D
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tcn import TCN
import os
import random
import tensorflow as tf

In [2]:
# resetting the seeds for reproducibility
def reset_random_seeds():
    n = 1
    os.environ['PYTHONHASHSEED'] = str(n)
    tf.random.set_seed(n)
    np.random.seed(n)
    random.seed(n)

reset_random_seeds()

# import data
df = pd.read_csv('Boruta_onchain_data.csv')
df1 = pd.read_csv('all_data.csv')

df = df[df['timestamp'] >= '2013-03-11'].reset_index(drop=True)

#onchain_data_new
#TA_data
#all_data
#Boruta_data
#Boruta_TA_data
#Boruta_onchain_data

In [3]:
# separate the inputs and target
X = df.drop('timestamp', axis=1)

# create binary classification for price movement. this assigns 1 to y if price will move upward next day.
price = pd.DataFrame()
price['today'] = df1['price-ohlc-usd-c']
price['next day'] = price['today'].shift(-1)
y = price['next day']

# Drop the last row where 'next day' would be NaN after shifting
X = X[:-1]
y = y.dropna()

# separate training data from testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

In [4]:
# scale the input data
scaler = StandardScaler()

# Reshape X_train and X_test if they are 1D
if X_train.ndim == 1:
    X_train = X_train.to_numpy().reshape(-1, 1)
if X_test.ndim == 1:
    X_test = X_test.to_numpy().reshape(-1, 1)

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# reshape the input data for CNN-LSTM (samples, timesteps, features)
def create_sequences(data, timesteps):
    X = []
    for i in range(len(data) - timesteps + 1):
        X.append(data[i:i + timesteps])
    return np.array(X)

timesteps = 5
X_train_reshaped = create_sequences(X_train_scaled, timesteps)
X_test_reshaped = create_sequences(X_test_scaled, timesteps)
y_train = y_train[timesteps - 1:]
y_test = y_test[timesteps - 1:]

In [5]:
# define the TCN model
model = Sequential()

# First TCN layer
model.add(TCN(input_shape=(timesteps, X_train_reshaped.shape[2]), 
              nb_filters=64, 
              kernel_size=3, 
              dilations=[1, 2, 4, 8], 
              activation='relu',
              return_sequences=True))  # Ensures 3D output for the next TCN layer
model.add(Dropout(0.5))

# Second TCN layer
model.add(TCN(nb_filters=64, 
              kernel_size=3, 
              dilations=[1, 2, 4, 8, 16], 
              activation='relu',
              return_sequences=False))  # Returns 2D output for Dense layer
model.add(Dropout(0.5))

# Dense output layer
model.add(Dense(1, activation='relu'))

# compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_absolute_error', metrics=['mean_absolute_error', 'mean_squared_error'])

  super(TCN, self).__init__(**kwargs)





In [6]:
model.save("tcn_r_model.h5")



In [7]:
# train with the training dataset
early_stopping = EarlyStopping(monitor='val_loss', patience=50)
history = model.fit(X_train_reshaped, y_train, epochs=100, batch_size=50, validation_split=0.1, callbacks=[early_stopping])

Epoch 1/100
[1m54/54[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 19ms/step - loss: 3012.1135 - mean_absolute_error: 3012.1135 - mean_squared_error: 20978066.0000 - val_loss: 6591.0708 - val_mean_absolute_error: 6591.0708 - val_mean_squared_error: 84246096.0000
Epoch 2/100
[1m54/54[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 1004.7516 - mean_absolute_error: 1004.7516 - mean_squared_error: 3028024.5000 - val_loss: 4165.7529 - val_mean_absolute_error: 4165.7529 - val_mean_squared_error: 36618900.0000
Epoch 3/100
[1m54/54[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 769.3553 - mean_absolute_error: 769.3553 - mean_squared_error: 1837991.0000 - val_loss: 3122.6692 - val_mean_absolute_error: 3122.6692 - val_mean_squared_error: 21400754.0000
Epoch 4/100
[1m54/54[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 668.0773 - mean_absolute_error: 668.0773 - mean_squared_error: 1416267.3750 - val_loss: 2677.7

In [None]:
# predict with testing dataset
y_pred = model.predict(X_test_reshaped)

In [None]:
# evaluate the prediction performance
print("Root Mean Squared Error:", np.sqrt(mean_squared_error(y_test, y_pred)))
print("Mean Absolute Error:", mean_absolute_error(y_test, y_pred))
print("R^2 Score:", r2_score(y_test, y_pred))

In [None]:
time = df['timestamp']
time_train, time_test = train_test_split(time, test_size=0.2, shuffle=False)
time_test = time_test[timesteps:]

# Flatten y_pred to be a 1-dimensional array
y_pred_flat = y_pred.flatten()

# Create a DataFrame with columns time_test, y_test, and y_pred
pred_res = pd.DataFrame({'date': time_test, 'actual': y_test.values, 'prediction': y_pred_flat, 'value': price['today'][-748:]})

pred_res.to_csv('pred2/tcn_uni_data.csv', index=False)