In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam
import yfinance as yf
import datetime
from sklearn.preprocessing import MinMaxScaler

## Import data

In [None]:
start_date = datetime.datetime(2017, 10, 31)
end_date = datetime.datetime(2024, 1, 1)
btc_info = yf.Ticker("BTC-USD")

# pass the parameters as the taken dates for start and end
df = btc_info.history(start = start_date, end = end_date)

In [None]:
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2017-10-31 00:00:00+00:00,6132.02002,6470.430176,6103.330078,6468.399902,2311379968,0.0,0.0
2017-11-01 00:00:00+00:00,6440.970215,6767.310059,6377.879883,6767.310059,2870320128,0.0,0.0
2017-11-02 00:00:00+00:00,6777.77002,7367.330078,6758.720215,7078.5,4653770240,0.0,0.0
2017-11-03 00:00:00+00:00,7087.529785,7461.290039,7002.939941,7207.759766,3369860096,0.0,0.0
2017-11-04 00:00:00+00:00,7164.47998,7492.859863,7031.279785,7379.950195,2483800064,0.0,0.0


In [None]:
df = df.drop(columns=['Dividends', 'Stock Splits'])

In [None]:
df.columns = ['open', 'high', 'low', 'close', 'vol']

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2253 entries, 2017-10-31 00:00:00+00:00 to 2023-12-31 00:00:00+00:00
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   open    2253 non-null   float64
 1   high    2253 non-null   float64
 2   low     2253 non-null   float64
 3   close   2253 non-null   float64
 4   vol     2253 non-null   int64  
dtypes: float64(4), int64(1)
memory usage: 105.6 KB


### Feature add

In [None]:
# Moving Average function
def moving_average(data, period):
    return data.rolling(window=period).mean()

# Calculate RSI function
def calculate_rsi(data, period=14):
    delta = data.diff(1)
    gain = (delta.where(delta > 0, 0)).rolling(window=period).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=period).mean()

    rs = gain / loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

In [None]:
# Adding new columns for 10-day and 30-day moving averages
df['MA10'] = moving_average(df['close'], 10)
df['MA30'] = moving_average(df['close'], 30)

# Adding a new column for RSI
df['RSI'] = calculate_rsi(df['close'], 14)


In [None]:
df = df[df.index > '2018-01-01']

In [None]:
# Prepare the volume and price differences, normalize volume
BTC_vol = df["vol"].values
df_diff = df.diff().dropna()
df_diff["vol"] = np.log(1 + BTC_vol[:-1])

In [None]:
df_aligned = df.loc[df_diff.index]

### Train, Test split

In [None]:
# Train data
# Period : From start of 2018 to end of 2022
mask_train = (df_diff.index >= "2018-01-01") & (df_diff.index < "2023-01-01")
df_train = df_diff.loc[mask_train].copy()
train_close = df_aligned.loc[mask_train, "close"].values
df_train["Relative_Close"] = train_close / train_close[0]

In [None]:
# Test data
# Period : Whole 2023
mask_test = (df_diff.index >= "2023-01-01") & (df_diff.index < "2024-01-01")  # December 2018 for testing
df_test = df_diff.loc[mask_test].copy()
test_close = df_aligned.loc[mask_test, "close"].values
df_test["Relative_Close"] = test_close / train_close[0]


In [None]:
df_train.head()

Unnamed: 0_level_0,open,high,low,close,vol,MA10,MA30,RSI,Relative_Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2018-01-03 00:00:00+00:00,1353.200195,128.200195,1680.900391,218.900391,23.547415,127.52002,118.12666,4.522984,1.0
2018-01-04 00:00:00+00:00,292.5,166.900391,-322.299805,398.200195,23.548915,157.260059,122.75,4.629498,1.026196
2018-01-05 00:00:00+00:00,206.5,1965.499023,680.599609,1830.299805,23.804405,132.97002,104.6,15.151098,1.146602
2018-01-06 00:00:00+00:00,1984.899414,7.201172,1561.799805,97.5,23.894668,168.85,-12.423307,-2.332564,1.153016
2018-01-07 00:00:00+00:00,65.201172,-132.800781,-676.899414,-1049.400391,23.630964,187.109961,-3.060026,-1.421186,1.083981


In [None]:
df_test.head()

Unnamed: 0_level_0,open,high,low,close,vol,MA10,MA30,RSI,Relative_Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2023-01-01 00:00:00+00:00,-55.759766,1.453125,3.714844,77.583984,23.142672,-20.526172,-15.452669,3.587225,1.093683
2023-01-02 00:00:00+00:00,77.595703,128.904297,50.994141,63.390625,22.94728,-10.848242,-7.325521,12.652816,1.097853
2023-01-03 00:00:00+00:00,63.337891,1.103516,50.142578,-8.613281,23.216287,-16.789844,-15.020964,-20.369304,1.097287
2023-01-04 00:00:00+00:00,-8.642578,204.138672,45.392578,183.380859,23.355376,2.125195,-3.719596,13.807905,1.109351
2023-01-05 00:00:00+00:00,183.267578,-80.564453,122.519531,-26.501953,23.636798,-8.306836,-8.425586,-1.848586,1.107607


In [None]:
# Generate dataset function
def generate_dataset(df, seq_len):
    X_list, y_list = [], []
    for i in range(len(df) - seq_len):
        X_list.append(df.iloc[i:(i+seq_len), :].values)
        y_list.append(df["close"].iloc[i + seq_len])
    return np.array(X_list), np.array(y_list)

In [None]:
LAG = 1

In [None]:
# Prepare training and test datasets
X_train, y_train = generate_dataset(df_train, LAG)
X_test, y_test = generate_dataset(pd.concat((df_train.iloc[-LAG:], df_test)), LAG)

In [None]:
# num_samples, num_timesteps, num_features = X_train.shape
# X_train_reshaped = X_train.reshape(-1, num_features)
# scaler = MinMaxScaler(feature_range=(0, 1))
# X_train_scaled = scaler.fit_transform(X_train_reshaped)
# X_train_scaled = X_train_scaled.reshape(num_samples, num_timesteps, num_features)

In [None]:
# num_samples_test, num_timesteps, num_features = X_test.shape
# X_test_reshaped = X_test.reshape(-1, num_features)
# X_test_scaled = scaler.transform(X_test_reshaped)a
# X_test_scaled = X_test_scaled.reshape(num_samples_test, num_timesteps, num_features)

In [None]:
# Model architecture
tf.keras.utils.set_random_seed(4002)

model = Sequential([
    LSTM(50, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])),
    Dense(1)
])

model.compile(optimizer='adam', loss='mse')

In [None]:
# Train the model
model.fit(X_train, y_train, epochs=20, validation_split=0.2, verbose=1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x7f26dac45cf0>

In [None]:
# Predict
predictions = model.predict(X_test)



In [None]:
# Evaluate
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(y_test, predictions)
print(f'Test MSE: {mse}')

Test MSE: 443687.1770404504
