Surrogate modeling

1. This script integrates Pyro for MCMC and TensorFlow for the neural network.
2. Given the large number of sequences and the time it might take for MCMC to sample for each sequence, this script can be time-consuming. Depending on the size of your data, you might want to consider using a subset or parallelizing the MCMC sampling process.
3. This script does not utilize GPU acceleration for Pyro or TensorFlow, but both support it. Leveraging a GPU can improve performance.

In [1]:
import tensorflow as tf

: 

: 

In [1]:
import logging
import os

import torch
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import pyro

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import pyro.distributions as dist

In [3]:
from pyro.infer import MCMC, NUTS

In [None]:
# 1. Load Data
df = pd.read_csv('Binance_1INCHBTC_d.csv', skiprows=1)
df.head()

Unnamed: 0,Unix,Date,Symbol,Open,High,Low,Close,Volume 1INCH,Volume BTC,tradecount
0,1690243200000,2023-07-25,1INCHBTC,1e-05,1.1e-05,1e-05,1e-05,399437.3,4.161505,1429
1,1690156800000,2023-07-24,1INCHBTC,1.1e-05,1.1e-05,1e-05,1e-05,1888337.1,19.60979,4082
2,1690070400000,2023-07-23,1INCHBTC,1.1e-05,1.1e-05,1.1e-05,1.1e-05,503804.1,5.51554,1344
3,1689984000000,2023-07-22,1INCHBTC,1.1e-05,1.1e-05,1.1e-05,1.1e-05,771830.4,8.620545,2201
4,1689897600000,2023-07-21,1INCHBTC,1.1e-05,1.2e-05,1.1e-05,1.1e-05,1217603.6,13.674184,3080


In [5]:
# 1. Load the data 
close_prices = df['Close'].values
close_prices[0:10]

array([1.035e-05, 1.043e-05, 1.088e-05, 1.101e-05, 1.121e-05, 1.101e-05,
       1.122e-05, 1.204e-05, 1.336e-05, 1.427e-05])

In [6]:
# 2. Prepare sequences
window_size = 10
sequences = [close_prices[i: i + window_size] for i in range(len(close_prices) - window_size + 1)]

In [7]:
# 3. Markov Chain using Pyro

def markov_model(data):
    mu = pyro.sample('mu', dist.Normal(0, 10))
    sigma = pyro.sample('sigma', dist.HalfNormal(10))
    for i, value in enumerate(data):
        pyro.sample(f"data_{i}", dist.Normal(mu, sigma), obs=value)

def get_mcmc_samples(data, num_samples=1000, warmup_steps=200):
    nuts_kernel = NUTS(markov_model)
    mcmc = MCMC(nuts_kernel, num_samples=num_samples, warmup_steps=warmup_steps)
    mcmc.run(torch.tensor(data))
    return mcmc.get_samples()

def predict_next_value(data):
    samples = get_mcmc_samples(data[:-1])
    predicted_mu = samples['mu'].mean().item()
    return predicted_mu

In [8]:
# 4. Create training data using MCMC predictions
X = [seq[:9] for seq in sequences]
y = [predict_next_value(seq) for seq in sequences]

Sample:  59%|█████▉    | 710/1200 [02:03,  3.54it/s, step size=8.39e-04, acc. prob=0.670]

In [None]:
# 5. Surrogate modeling using Neural Network

model = tf.keras.Sequential([
    tf.keras.layers.Dense(32, activation='relu', input_shape=(9,)),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(1)
])

model.compile(optimizer='adam', loss='mse')
model.fit(np.array(X), np.array(y), epochs=50, batch_size=32)

In [None]:
# 6. Evaluate the model

# Splitting data into train and test
split_idx = int(0.8 * len(X))
X_train, X_test = X[:split_idx], X[split_idx:]
y_train, y_test = y[:split_idx], y[split_idx:]

# Make predictions on test set
y_pred = model.predict(X_test)

# Calculate MSE and MAE
mse = tf.keras.losses.MeanSquaredError()
mae = tf.keras.losses.MeanAbsoluteError()

mse_value = mse(y_test, y_pred).numpy()
mae_value = mae(y_test, y_pred).numpy()

print(f"MSE: {mse_value}, MAE: {mae_value}")