In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers
from collections import deque
import random


# Define Model Architecture
def build_model(state_size, action_size):
    model = tf.keras.Sequential([
        layers.Dense(16, activation='relu', input_dim=state_size),
        layers.Dense(16, activation='relu'),
        layers.Dense(action_size, activation='linear')  # Q-values for actions
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='mse')
    return model

# Reward Function
def calculate_reward(action, current_price, next_price):
    if action == 0:  # Buy
        return next_price - current_price
    elif action == 1:  # Sell
        return current_price - next_price
    else:  # Hold
        return 0

# Data Preprocessing
def preprocess_data(df):
    df['Price_diff'] = df['Value'].diff().fillna(0)
    df['Moving_avg'] = df['Value'].rolling(window=10).mean().fillna(df['Value'].mean())
    return df

# Training RL Model on Data Chunks
def train_model_on_chunks(data, model, state_size, action_size, chunk_size=100, episodes=10, batch_size=16):
    memory = deque(maxlen=200)
    gamma = 0.95  # Discount factor
    epsilon = 1.0  # Exploration rate
    epsilon_min = 0.01
    epsilon_decay = 0.995

    # Split data into chunks
    for start_idx in range(0, len(data), chunk_size):
        chunk_data = data.iloc[start_idx:start_idx + chunk_size]
        print(f"Training on chunk {start_idx} to {start_idx + chunk_size}")

        for episode in range(episodes):
            state = chunk_data[['Value', 'Price_diff', 'Moving_avg']].iloc[0].values
            state = np.reshape(state, [1, state_size])

            for t in range(1, len(chunk_data) - 1):
                # Choose action (exploration vs exploitation)
                if np.random.rand() <= epsilon:
                    action = random.randrange(action_size)
                else:
                    action = np.argmax(model.predict(state, verbose=0)[0])

                # Get next state
                next_state = chunk_data[['Value', 'Price_diff', 'Moving_avg']].iloc[t].values
                next_state = np.reshape(next_state, [1, state_size])

                # Calculate reward
                current_price = chunk_data.iloc[t - 1]['Value']
                next_price = chunk_data.iloc[t]['Value']
                reward = calculate_reward(action, current_price, next_price)

                # Store in memory
                memory.append((state, action, reward, next_state))

                # Experience replay
                if len(memory) > batch_size:
                    minibatch = random.sample(memory, batch_size)
                    states = np.array([m[0].flatten() for m in minibatch])
                    next_states = np.array([m[3].flatten() for m in minibatch])
                    next_q_values = model.predict(next_states, verbose=0)

                    for i, (s, a, r, ns) in enumerate(minibatch):
                        target = r + (gamma * np.amax(next_q_values[i]) if t != len(chunk_data) - 1 else r)
                        target_f = model.predict(s, verbose=0)
                        target_f[0][a] = target
                        model.fit(s, target_f, epochs=1, verbose=0)

                state = next_state

            # Decay exploration rate
            if epsilon > epsilon_min:
                epsilon *= epsilon_decay

    return model

# Evaluation
def evaluate_model(model, data, state_size):
    test_state = data[['Value', 'Price_diff', 'Moving_avg']].iloc[-1].values
    test_state = np.reshape(test_state, [1, state_size])

    actions_taken = []
    profits = []
    recommendations = []
    timestamps = []

    for t in range(len(data) - 1):
        action = np.argmax(model.predict(test_state, verbose=0)[0])
        actions_taken.append(action)

        reward = calculate_reward(action, data.iloc[t]['Value'], data.iloc[t + 1]['Value'])
        profits.append(reward)

        timestamps.append(data.index[t + 1])
        if action == 0:
            recommendations.append('Buy')
        elif action == 1:
            recommendations.append('Sell')
        else:
            recommendations.append('Hold')

    recommendation_df = pd.DataFrame({
        'Timestamp': timestamps,
        'Action': recommendations,
        'Profit': profits
    })

    best_actions_df = recommendation_df[recommendation_df['Profit'] > 0]

    print("Best Actions with Positive Profit:")
    print(best_actions_df)

    total_profit = np.sum(profits)
    print(f'Total Profit: {total_profit}')

# Main Code
# Load your data (assuming CSV with columns 'Time' and 'Value')
data = pd.read_csv('DowJones.csv')
data['Date'] = pd.to_datetime(data['Date'])
data.set_index('Date', inplace=True)
data = preprocess_data(data)

# Initialize model
state_size = 3  # Value, Price_diff, Moving_avg
action_size = 3  # Buy, Sell, Hold
model = build_model(state_size, action_size)

# Train model on data chunks
chunk_size = 100
model = train_model_on_chunks(data, model, state_size, action_size, chunk_size=chunk_size, episodes=10)

# Evaluate the trained model
evaluate_model(model, data, state_size)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on chunk 0 to 100
Training on chunk 100 to 200
Training on chunk 200 to 300
Training on chunk 300 to 400
Training on chunk 400 to 500
Training on chunk 500 to 600
Training on chunk 600 to 700
Training on chunk 700 to 800
Training on chunk 800 to 900
Training on chunk 900 to 1000
Training on chunk 1000 to 1100
Training on chunk 1100 to 1200
Training on chunk 1200 to 1300
Training on chunk 1300 to 1400
Training on chunk 1400 to 1500
Training on chunk 1500 to 1600
Training on chunk 1600 to 1700
Training on chunk 1700 to 1800
Training on chunk 1800 to 1900
Training on chunk 1900 to 2000
Training on chunk 2000 to 2100
Best Actions with Positive Profit:
      Timestamp Action  Profit
3    1977-09-30    Buy    7.97
7    1977-10-28    Buy   14.38
9    1977-11-11    Buy   35.95
11   1977-11-25    Buy    8.66
14   1977-12-16    Buy    0.09
...         ...    ...     ...
2073 2017-07-14    Buy  223.40
2075 2017-07-28    Buy  250.24
2076 2017-08-04    Buy  262.50
2079 2017-08-25    Buy  1