In [1]:
import numpy as np 
import pandas as pd 
from matplotlib import pyplot as plt
import tensorflow
from tensorflow import keras
from sklearn.metrics import mean_squared_error

### Importing data

In [2]:
file_path="C:/Users/naujo/Documents/STUDIJOS/Bachelor's Thesis Project/Bitcoin Historical Data/btcusd_1-min_data.csv"

df = pd.read_csv(file_path)

### Cutting data

In [3]:
cut_ts=	1514764800
df=df.loc[df['Timestamp'] >= cut_ts].reset_index(drop=True)

df['Datetime'] = pd.to_datetime(df['Timestamp'], unit='s', utc=True)
df = df.set_index('Datetime')

daily_close = df['Close'].resample('1D').last().dropna()

### Define indicator functions

In [None]:
def rsi(series: pd.Series, period: int = 14):
    delta = series.diff()
    up    = delta.clip(lower=0)
    down  = -delta.clip(upper=0)

    roll_up   = up.rolling(period).mean()
    roll_down = down.rolling(period).mean()
    rs = roll_up / roll_down
    return 100 - (100 / (1 + rs))

def ema(series: pd.Series, span: int):
    return series.ewm(span=span, adjust=False).mean()

def ew_corr(x: pd.Series, y: pd.Series, span: int = 7):
    return x.ewm(span=span, adjust=False).corr(y)

### Log Returns and EWC of Log Returns and Volume, 7-day and 14-day EMA.

In [5]:
df['Log_Returns']=np.log(df['Close']/df['Close'].shift(1))

daily = (df.resample('1D').agg({'Close': 'last','Volume': 'sum'}).dropna())

daily['RSI14']=rsi(daily['Close'], 14)

daily['Log_Returns'] = np.log(daily['Close'] / daily['Close'].shift(1))

daily['Log_Vol_Diff']=np.log(daily['Volume'] / daily['Volume'].shift(1))

daily['EW_Corr7'] = (daily['Log_Returns'].ewm(span=7, adjust=False).corr(daily['Log_Vol_Diff']))

daily['EMA_7']  = daily['Close'].ewm(span=7,  adjust=False).mean()
daily['EMA_14'] = daily['Close'].ewm(span=14, adjust=False).mean()

### Defining datasets

In [None]:
target_col = 'Log_Returns'

lookback = 14

data = daily.dropna().copy()

cols=[]
for i in range(lookback, 0, -1):
    col = f'{target_col}_t-{i}'
    data[col] = data[target_col].shift(i)
    cols.append(col)

data['Log_Vol_Diff'] = data['Log_Vol_Diff']
supervised = data.dropna()

X = supervised[cols + ['EMA_7', 'EMA_14', 'EW_Corr7', 'Log_Vol_Diff', 'RSI14']].values
y = supervised[target_col].values
dates = supervised.index 

### Creating training and test datasets

In [None]:

rewind = 31


split_idx   = len(X) - rewind
X_train, X_test       = X[:split_idx],  X[split_idx:]
y_train, y_test       = y[:split_idx],  y[split_idx:]
dates_train, dates_test = dates[:split_idx], dates[split_idx:]

print(f"Train shape: {X_train.shape},  Test shape: {X_test.shape}")
assert len(X_test) == rewind, "Hold-out length doesn’t match 'rewind'!"

from sklearn.preprocessing import StandardScaler

scaler_X = StandardScaler().fit(X_train)
scaler_y = StandardScaler().fit(y_train.reshape(-1, 1))

X_train_s = scaler_X.transform(X_train)
X_test_s  = scaler_X.transform(X_test)
y_train_s = scaler_y.transform(y_train.reshape(-1, 1)).ravel()


Train shape: (2634, 19),  Test shape: (31, 19)


### Model fitting

In [8]:
X_train_lstm = X_train_s.reshape(X_train_s.shape[0], 1, X_train_s.shape[1])
X_test_lstm  = X_test_s.reshape (X_test_s.shape[0],  1, X_test_s.shape[1])

from keras.models import Sequential
from keras.layers import LSTM, Dense

regressor = Sequential([
    LSTM(50, activation='tanh', return_sequences=True, input_shape=(1, 19)),
    LSTM(25, activation='tanh', ),
    Dense(32, activation='relu'),
    Dense(1)
])

regressor.compile(optimizer='adam', loss='mean_squared_error')

regressor.fit(X_train_lstm, y_train_s, epochs=50, batch_size=32, validation_split=0.1, verbose=1)

pred_s = regressor.predict(X_test_lstm, verbose=0)
pred_lstm   = scaler_y.inverse_transform(pred_s).ravel()

rmse = np.sqrt(mean_squared_error(y_test, pred_lstm))
print(f"Test RMSE: {rmse:.5f}")

  super().__init__(**kwargs)


Epoch 1/50
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - loss: 1.0841 - val_loss: 0.5677
Epoch 2/50
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.8429 - val_loss: 0.4322
Epoch 3/50
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.6592 - val_loss: 0.3324
Epoch 4/50
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.5507 - val_loss: 0.3231
Epoch 5/50
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.5524 - val_loss: 0.2993
Epoch 6/50
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.4838 - val_loss: 0.2909
Epoch 7/50
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.4697 - val_loss: 0.3030
Epoch 8/50
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.4410 - val_loss: 0.2864
Epoch 9/50
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

In [None]:
k        = 30 
n_sims   = 1_000
rng      = np.random.default_rng(42)

rewind=rewind
last_obs   = supervised.iloc[-rewind].copy()
start_px   = daily["Close"].iloc[-rewind]

train_pred = scaler_y.inverse_transform(
                regressor.predict(X_train_lstm, verbose=0)
            ).ravel()
residuals  = y_train - train_pred

def make_feature_tensor(row_like_series: pd.Series) -> np.ndarray:
    feats   = row_like_series[cols +['EMA_7', 'EMA_14', 'Volume', 'Log_Vol_Diff', 'RSI14']].values
    X_scaled = scaler_X.transform(feats.reshape(1, -1))
    return X_scaled.reshape(1, 1, -1)

In [11]:
sim_log_ret = np.zeros((n_sims, k), dtype=float)

for p in range(n_sims):
    row = last_obs.copy()
    ema7, ema14 = row['EMA_7'], row['EMA_14']

    for t in range(k):
        mu_t = scaler_y.inverse_transform(
                   regressor.predict(make_feature_tensor(row), verbose=0)
               ).item()

       
        eps_t = rng.choice(residuals)
        r_t   = mu_t + eps_t
        sim_log_ret[p, t] = r_t

       
        for i in range(lookback, 1, -1):
            row[f'{target_col}_t-{i}'] = row[f'{target_col}_t-{i-1}']
        row[f'{target_col}_t-1'] = r_t
        row[target_col]          = r_t

        ema7   = (r_t + ema7*6)  / 7
        ema14  = (r_t + ema14*13)/14
        row['EMA_7'], row['EMA_14'] = ema7, ema14


In [None]:
import matplotlib.dates as mdates


anchor_dt = daily.index[-rewind] 
anchor_ix = daily.index.get_loc(anchor_dt)
forecast_dates = daily.index[anchor_ix+1 : anchor_ix+1+k]
assert len(forecast_dates) == k, "daily index too short for k!"

price_paths_df = pd.DataFrame(price_paths, columns=forecast_dates)

median_path = price_paths_df.median()
quantiles = price_paths_df.quantile([0.025, 0.975])
p_low = quantiles.loc[0.025]
p_high = quantiles.loc[0.975]

realised = daily.loc[forecast_dates, "Close"]

fig, ax = plt.subplots(figsize=(10,5))

ax.plot(price_paths_df.T, color="grey", alpha=0.03)
ax.plot(forecast_dates, median_path, lw=2, label="median")
ax.fill_between(forecast_dates, p_low, p_high,
                alpha=0.22, label="95 % CI")
ax.plot(forecast_dates, realised, "o-", lw=1.5, label="realised", color='green')


ax.set_xlim(forecast_dates[0], forecast_dates[-1])
ax.set_title(f"Monte-Carlo fan: {k} business days after {anchor_dt.date()}")
ax.set_ylabel("Price")
ax.legend(frameon=False)
ax.grid(True)

ax.xaxis.set_major_locator(mdates.DayLocator(interval=5))
ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m-%d"))
fig.autofmt_xdate()
plt.tight_layout(); plt.show()

