In [None]:
import pandas as pd
import numpy as np
data = pd.read_csv('/content/drive/MyDrive/data.csv')

    # Convert 'Timestamp' column from milliseconds to seconds
data['datetime'] = pd.to_datetime(data['Timestamp'] // 1000, unit='s')

    # Set the 'datetime' column as the index
data.set_index('datetime', inplace=True)
data.drop(['Volume BTC', 'Timestamp', 'Symbol', 'Date'], axis=1, inplace=True)

    # Rename columns to match Backtrader's expectations
data.rename(columns={'Open': 'open', 'High': 'high', 'Low': 'low', 'Close': 'close', 'Volume USD': 'volume'}, inplace=True)
data.dropna(inplace=True)

In [None]:
url = 'https://anaconda.org/conda-forge/libta-lib/0.4.0/download/linux-64/libta-lib-0.4.0-h166bdaf_1.tar.bz2'
!curl -L $url | tar xj -C /usr/lib/x86_64-linux-gnu/ lib --strip-components=1
url = 'https://anaconda.org/conda-forge/ta-lib/0.4.19/download/linux-64/ta-lib-0.4.19-py310hde88566_4.tar.bz2'
!curl -L $url | tar xj -C /usr/local/lib/python3.10/dist-packages/ lib/python3.10/site-packages/talib --strip-components=3
import talib

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.layers import Input, Dense, LSTM, Conv1D, Dropout, MultiHeadAttention, Flatten, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler
import tensorflow as tf

import os
import talib

# Directory Setup
checkpoint_dir = "/content/drive/MyDrive/checkpoint20"
saved_model_dir = "/content/drive/MyDrive/saved_mode20"
os.makedirs(checkpoint_dir, exist_ok=True)
os.makedirs(saved_model_dir, exist_ok=True)

checkpoint_path = os.path.join(checkpoint_dir, "model_checkpoint.weights.h5")
epoch_file_path = os.path.join(checkpoint_dir, "last_epoch.txt")
# Function to load the last epoch
def load_last_epoch():
    if os.path.exists(epoch_file_path):
        with open(epoch_file_path, "r") as f:
            return int(f.read().strip())
    return 0  # Return 0 if no epoch file found, i.e., start from scratch

# Function to save the current epoch
def save_last_epoch(epoch):
    with open(epoch_file_path, "w") as f:
        f.write(str(epoch))

# Define 30-minute horizon for prediction
horizon = 30
sequence_length = 30

# Data Preprocessing
def preprocess_data(data):
    # Add technical indicators
    data['EMA_20'] = data['close'].ewm(span=20).mean()
    data['ATR'] = (data['high'] - data['low']).rolling(window=14).mean()
    data['OBV'] = (np.sign(data['close'].diff()) * data['volume']).cumsum()
    data['close'] = data['close']
    # 1. RSI
    data['RSI'] = talib.RSI(data['close'], timeperiod=14)
    # 2. Moving Averages
    data['SMA_20'] = talib.SMA(data['close'], timeperiod=20)
    data['SMA_50'] = talib.SMA(data['close'], timeperiod=50)
    data['EMA_20'] = talib.EMA(data['close'], timeperiod=20)
    data['EMA_50'] = talib.EMA(data['close'], timeperiod=50)
    # 3. MACD
    data['MACD'], data['MACD_signal'], data['MACD_hist'] = talib.MACD(data['close'], fastperiod=12, slowperiod=26, signalperiod=9)
    # 4. Bollinger Bands
    data['upper_band'], data['middle_band'], data['lower_band'] = talib.BBANDS(data['close'], timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)
    # 5. Volume Analysis (On-Balance Volume)
    data['OBV'] = talib.OBV(data['close'], data['volume'])
    # Drop NaN values
    data.dropna(inplace=True)

    # Scale features
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(data[['close', 'RSI', 'SMA_20', 'SMA_50', 'MACD', 'upper_band',
                                             'lower_band', 'EMA_20', 'ATR', 'OBV']])
    return scaled_data, scaler

def create_sequences(data, sequence_length, horizon):
    X, y = [], []
    for i in range(sequence_length, len(data) - horizon):
        X.append(data[i - sequence_length:i])  # Past 30 minutes as input
        y.append(data[i + horizon - 1, 0])    # Closing price 30 minutes ahead as target
    return np.array(X), np.array(y)

# Hybrid Model
def build_hybrid_model(input_shape):
    inputs = Input(shape=input_shape)

    # CNN Layer
    cnn_layer = Conv1D(filters=32, kernel_size=3, activation='relu', padding='same')(inputs)
    cnn_flattened = Flatten()(cnn_layer)

    # LSTM Layer
    lstm_layer = LSTM(units=64, return_sequences=True)(inputs)

    # Attention Layer
    attention_layer = MultiHeadAttention(num_heads=4, key_dim=32)(lstm_layer, lstm_layer)
    attention_flattened = Flatten()(attention_layer)

    # Concatenate CNN and Attention outputs
    concatenated = Concatenate()([cnn_flattened, attention_flattened])

    # Fully Connected Layers
    dense_layer = Dense(64, activation='relu')(concatenated)
    dropout_layer = Dropout(0.3)(dense_layer)
    outputs = Dense(1)(dropout_layer)

    model = Model(inputs, outputs)
    model.compile(optimizer=Adam(learning_rate=0.0001), loss='mean_squared_error')
    return model

# Load Dataset
scaled_data, scaler = preprocess_data(data)
X, y = create_sequences(scaled_data, sequence_length, horizon)

# Train-Test Split
split_ratio = 0.8
train_size = int(len(X) * split_ratio)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Build and Train Model
model = build_hybrid_model((X_train.shape[1], X_train.shape[2]))

# Check for existing checkpoints
initial_epoch = load_last_epoch()
if os.path.exists(checkpoint_path):
    print(f"Checkpoint found. Loading weights from {checkpoint_path}...")
    model.load_weights(checkpoint_path)
else:
    print("No checkpoint found. Starting training from scratch.")

# Learning Rate Scheduler
lr_schedule = LearningRateScheduler(lambda epoch: 1e-4 * (0.9 ** epoch))

# Model Checkpoint Callback
# Model Checkpoint Callback (Updated)
checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path, save_weights_only=True, save_best_only=False, save_freq='epoch')
# Custom Callback to Save Epoch after Each Epoch
class EpochSaverCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        save_last_epoch(epoch + 1)  # Save the epoch number at the end of the epoch
# Train Model
model.fit(
    X_train, y_train,
    epochs=50,
    initial_epoch=initial_epoch,
    batch_size=32,
    validation_data=(X_test, y_test),
    callbacks=[checkpoint_callback, lr_schedule, EpochSaverCallback()]
)
# Save Model
model.save(os.path.join(saved_model_dir, "model.keras"))
print(f"Model saved at {saved_model_dir}/model.keras")

# Save the last epoch number
with open(epoch_file_path, "w") as f:
    f.write(str(initial_epoch + 50))  # Saving the epoch number after training is done.