In [1]:
import pandas as pd
import numpy as np
import datetime

# Parameters for the synthetic data generation
num_records = 1000
start_date = datetime.datetime(2020, 1, 1)
date_range = pd.date_range(start_date, periods=num_records, freq='H')

# Generate synthetic data
np.random.seed(42)  # For reproducibility
asset_1_prices = np.cumsum(np.random.randn(num_records)) + 100
asset_2_prices = np.cumsum(np.random.randn(num_records)) + 150
asset_3_prices = np.cumsum(np.random.randn(num_records)) + 200
portfolio_values = asset_1_prices + asset_2_prices + asset_3_prices + np.random.randn(num_records) * 10
market_index = np.cumsum(np.random.randn(num_records)) + 1000

# Create a DataFrame
data = {
    'timestamp': date_range,
    'Asset_1': asset_1_prices,
    'Asset_2': asset_2_prices,
    'Asset_3': asset_3_prices,
    'Portfolio_Value': portfolio_values,
    'Market_Index': market_index
}

df = pd.DataFrame(data)

# Save the DataFrame to a CSV file
csv_file_path = 'financial_portfolio_data.csv'
df.to_csv(csv_file_path, index=False)
print(f"Dataset saved to {csv_file_path}")


Dataset saved to financial_portfolio_data.csv


In [2]:
# Load the dataset to preview
loaded_df = pd.read_csv(csv_file_path)
print(loaded_df.head())


             timestamp     Asset_1     Asset_2     Asset_3  Portfolio_Value  \
0  2020-01-01 00:00:00  100.496714  151.399355  199.324822       432.142816   
1  2020-01-01 01:00:00  100.358450  152.323989  199.180303       443.258892   
2  2020-01-01 02:00:00  101.006138  152.383619  198.387883       447.641586   
3  2020-01-01 03:00:00  102.529168  151.736683  198.079922       471.222649   
4  2020-01-01 04:00:00  102.295015  152.434906  196.186307       456.481759   

   Market_Index  
0    999.136506  
1    999.105303  
2    999.123320  
3    999.595950  
4    998.229092  


In [3]:
import gym
from gym import spaces

class PortfolioEnv(gym.Env):
    def __init__(self, df):
        super(PortfolioEnv, self).__init__()
        self.df = df
        self.current_step = 0
        self.action_space = spaces.Discrete(3)  # Actions: 0 = hold, 1 = buy, 2 = sell
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(5,), dtype=np.float32)
        self.balance = 10000
        self.portfolio_value = self.balance

    def reset(self):
        self.current_step = 0
        self.balance = 10000
        self.portfolio_value = self.balance
        return self._next_observation()

    def _next_observation(self):
        obs = self.df.iloc[self.current_step][['Asset_1', 'Asset_2', 'Asset_3', 'Portfolio_Value', 'Market_Index']].values
        return obs

    def step(self, action):
        obs = self._next_observation()
        self.current_step += 1

        reward = 0
        if action == 1:  # Buy
            self.balance -= obs[3]  # Subtract portfolio value
            reward = obs[3]  # Reward is the portfolio value
        elif action == 2:  # Sell
            self.balance += obs[3]  # Add portfolio value
            reward = -obs[3]  # Negative reward as penalty for selling

        self.portfolio_value = self.balance + obs[3]
        done = self.current_step >= len(self.df) - 1
        return obs, reward, done, {}

    def render(self, mode='human'):
        print(f'Step: {self.current_step}, Balance: {self.balance}, Portfolio Value: {self.portfolio_value}')

# Create the environment
env = PortfolioEnv(loaded_df)


In [4]:
from collections import deque
import random

class QLearningAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.q_table = {}

    def get_qs(self, state):
        return self.q_table.get(tuple(state.flatten()), np.zeros(self.action_size))

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        qs = self.get_qs(state)
        return np.argmax(qs)

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = reward + self.gamma * np.amax(self.get_qs(next_state))
            qs = self.get_qs(state)
            qs[action] = target
            self.q_table[tuple(state.flatten())] = qs
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

# Initialize the agent
state_size = env.observation_space.shape[0]
action_size = env.action_space.n
agent = QLearningAgent(state_size, action_size)

# Train the agent
episodes = 500
batch_size = 32

for e in range(episodes):
    state = env.reset()
    state = np.reshape(state, [1, state_size])

    for time in range(200):
        action = agent.act(state)
        next_state, reward, done, _ = env.step(action)
        next_state = np.reshape(next_state, [1, state_size])

        agent.remember(state, action, reward, next_state, done)
        state = next_state

        if done:
            print(f"Episode: {e+1}/{episodes}, Score: {reward}, Epsilon: {agent.epsilon:.2}")
            break

        if len(agent.memory) > batch_size:
            agent.replay(batch_size)


In [5]:
# Evaluate the agent
state = env.reset()
state = np.reshape(state, [1, state_size])
total_reward = 0

for time in range(500):
    action = agent.act(state)
    next_state, reward, done, _ = env.step(action)
    next_state = np.reshape(next_state, [1, state_size])
    total_reward += reward
    state = next_state

    if done:
        print(f"Final Balance: {total_reward}")
        break
