In [None]:
pip install numpy tensorflow matplotlib sqlalchemy pandas psycopg2 urllib3 scipy

In [None]:
import pandas as pd
import numpy as np
import glob
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, LSTM, Flatten, Concatenate, Dropout, Conv1D, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from scipy.stats.mstats import winsorize

In [None]:
# Step 1: Load all CSV files into a single DataFrame
path = 'data/klines'  # use your path
all_files = glob.glob(path + "/*.csv")

li = []

for filename in all_files:
    df = pd.read_csv(filename, index_col=None, header=0)
    df.columns = ['Open time','Open','High','Low','Close','Volume','Close time','Quote asset volume','Number of trades','Taker buy base asset volume','Taker buy quote asset volume','Ignore']
    li.append(df)

frame = pd.concat(li, axis=0, ignore_index=True)

# Step 2: Convert timestamps to readable dates and type casting
frame['Open time'] = pd.to_datetime(frame['Open time'], unit='ms')
frame['Close time'] = pd.to_datetime(frame['Close time'], unit='ms')

# Adding additional time features
time_features = ['Open time', 'Close time']
for feature in time_features:
    frame[f'{feature} hour'] = frame[feature].dt.hour
    frame[f'{feature} day of week'] = frame[feature].dt.weekday
    frame[f'{feature} week of year'] = frame[feature].dt.isocalendar().week
    frame[f'{feature} month'] = frame[feature].dt.month

# Continue with type casting for other columns
for col in ['Open', 'High', 'Low', 'Close', 'Volume', 'Quote asset volume', 'Number of trades', 'Taker buy base asset volume', 'Taker buy quote asset volume']:
    frame[col] = frame[col].astype(float)

# Normalize using Min-Max Scaling
min_max_scaler = lambda x: (x - x.min()) / (x.max() - x.min())
frame[['Open', 'High', 'Low', 'Close', 'Volume', 'Quote asset volume', 'Number of trades', 'Taker buy base asset volume', 'Taker buy quote asset volume']] = frame[['Open', 'High', 'Low', 'Close', 'Volume', 'Quote asset volume', 'Number of trades', 'Taker buy base asset volume', 'Taker buy quote asset volume']].apply(min_max_scaler)

# Optionally, handle outliers
# from scipy.stats.mstats import winsorize
frame[['Open', 'High', 'Low', 'Close', 'Volume']] = frame[['Open', 'High', 'Low', 'Close', 'Volume']].apply(lambda x: winsorize(x, limits=[0.01, 0.01]))

# Example of preparing data for LSTM or other models
# This part would depend on how you want to use these features in your model setup
features = frame[['Open', 'High', 'Low', 'Close', 'Volume', 'Quote asset volume', 'Number of trades', 'Taker buy base asset volume', 'Taker buy quote asset volume', 'Open time hour', 'Open time day of week', 'Open time week of year', 'Open time month', 'Close time hour', 'Close time day of week', 'Close time week of year', 'Close time month']].values
# target and data preparation for LSTM or model would go here


In [None]:
def create_optimized_model(window_size, feature_size, action_size):
    input_layer = Input(shape=(window_size, feature_size))
    
    # Convolutional Branch
    conv_branch = Conv1D(filters=64, kernel_size=3, activation='relu')(input_layer)
    conv_branch = BatchNormalization()(conv_branch)
    conv_branch = Dropout(0.3)(conv_branch)
    conv_branch = Flatten()(conv_branch)
    
    # LSTM Branch
    lstm_branch = LSTM(50, return_sequences=True)(input_layer)
    lstm_branch = BatchNormalization()(lstm_branch)
    lstm_branch = Dropout(0.3)(lstm_branch)
    lstm_branch = Flatten()(lstm_branch)
    
    # Combining both branches
    concatenated = Concatenate()([conv_branch, lstm_branch])
    dense = Dense(100, activation='relu', kernel_regularizer=l2(0.01))(concatenated)
    dense = Dropout(0.3)(dense)
    dense = Dense(25, activation='relu', kernel_regularizer=l2(0.01))(dense)
    dense = Dropout(0.2)(dense)
    output = Dense(action_size, activation='sigmoid')(dense)  # Changed to 'action_size' for generalization
    
    model = Model(inputs=input_layer, outputs=output)
    model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
    
    return model

# Training Function Modified for Generalization and Efficiency
def train_model(model, data, actions, rewards, epochs=10, batch_size=32):
    action_size = model.output_shape[-1]  # Generalizing for any action size
    history = {'loss': []}
    
    for epoch in range(epochs):
        losses = []
        for start_idx in range(0, data.shape[0] - batch_size + 1, batch_size):
            batch_data = data[start_idx:start_idx + batch_size]
            batch_actions = actions[start_idx:start_idx + batch_size]
            batch_rewards = rewards[start_idx:start_idx + batch_size]
            
            current_qs = model.predict(batch_data)
            updated_qs = current_qs.copy()
            for i in range(batch_size):
                updated_qs[i, batch_actions[i]] = batch_rewards[i]  # Generalized for multi-output
            
            loss = model.train_on_batch(batch_data, updated_qs)
            losses.append(loss)
        
        avg_loss = np.mean(losses)
        history['loss'].append(avg_loss)
        print(f"Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}")

    return history

# Plotting Function
def plot_history(history):
    plt.figure(figsize=(10, 5))
    plt.plot(history['loss'], label='Training loss')
    plt.title('Training Loss Over Epochs')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

# Example usage
window_size = 100  # example value, adjust based on your temporal input size
feature_size = features.shape[1]  # from your existing 'features' array
action_size = 3  # for example, if actions are coded as 0, 1, 2

model = create_optimized_model(window_size, feature_size, action_size)

history = train_model(model, data, actions, rewards)
plot_history(history)

model.summary()

