In [31]:
import cira

cira.auth.KEY_FILE = "../../alpc_key.json"
assert cira.auth.check_keys(), "the set keys dose not work"

In [32]:
portfolio = cira.Portfolio()

In [33]:
import pandas as pd 
import numpy as np
from typing import List, Dict

In [34]:
from datetime import datetime

assets_symbols = ["MSFT", "TSLA", "AMZN"]
stk_hist_data = {}
IS_CACHED = True

for SYMBOL in assets_symbols:
    SYM_HIST_FILE = f"./{SYMBOL}.csv"

    stk = cira.Stock(SYMBOL)

    if not IS_CACHED:
        start = datetime(2015, 7, 1)
        end = datetime(2023, 7, 1)
        stk.save_historical_data(SYM_HIST_FILE, start, end)
    
    data = stk.load_historical_data(SYM_HIST_FILE)
    stk_hist_data[SYMBOL] = data



In [35]:

df = pd.DataFrame()

for sym, hist in stk_hist_data.items():
    df[sym] = hist["close"]

df["CASH"] = 1.0

df

Unnamed: 0_level_0,MSFT,TSLA,AMZN,CASH
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2016-01-04 05:00:00+00:00,54.80,223.41,636.99,1.0
2016-01-05 05:00:00+00:00,55.05,223.43,633.79,1.0
2016-01-06 05:00:00+00:00,54.05,219.04,632.65,1.0
2016-01-07 05:00:00+00:00,52.17,215.65,607.94,1.0
2016-01-08 05:00:00+00:00,52.33,211.00,607.05,1.0
...,...,...,...,...
2023-06-26 04:00:00+00:00,328.60,241.05,127.33,1.0
2023-06-27 04:00:00+00:00,334.57,250.21,129.18,1.0
2023-06-28 04:00:00+00:00,335.85,256.24,129.04,1.0
2023-06-29 04:00:00+00:00,335.05,257.50,127.90,1.0


In [41]:
from tqdm import tqdm

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import random

# Define the Q-network
class QNetwork(nn.Module):
    def __init__(self, state_size, action_size):
        super(QNetwork, self).__init__()
        self.fc1 = nn.Linear(state_size, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, action_size)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Define the Deep Q-Learning agent
class DQNAgent:
    def __init__(self, state_size, action_size, gamma=0.99, epsilon=1.0, epsilon_decay=0.995, epsilon_min=0.01):
        self.state_size = state_size
        self.action_size = action_size
        self.gamma = gamma
        self.epsilon = epsilon
        self.epsilon_decay = epsilon_decay
        self.epsilon_min = epsilon_min
        self.memory = []
        self.model = QNetwork(state_size, action_size)
        self.target_model = QNetwork(state_size, action_size)
        self.target_model.load_state_dict(self.model.state_dict())
        self.criterion = nn.MSELoss()
        self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)

    def select_action(self, state):
        # Epsilon-greedy policy
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        else:
            with torch.no_grad():
                q_values = self.model(torch.Tensor(state))
                return torch.argmax(q_values).item()

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def replay(self, batch_size):
        if len(self.memory) < batch_size:
            return

        # Sample a random minibatch from the replay memory
        minibatch = random.sample(self.memory, batch_size)

        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                with torch.no_grad():
                    target_q_values = self.target_model(torch.Tensor(next_state))
                    target = reward + self.gamma * torch.max(target_q_values).item()

            # Get the current Q-values
            current_q_values = self.model(torch.Tensor(state))
            target_q_values = current_q_values.clone()

            # Update the Q-value for the chosen action
            target_q_values[0][action] = target

            # Compute the loss and backpropagate
            loss = self.criterion(current_q_values, target_q_values)
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

        # Update target network every few episodes
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

        if self.epsilon < self.epsilon_min:
            self.epsilon = self.epsilon_min

        if self.memory:
            self.target_model.load_state_dict(self.model.state_dict())

# Example usage
state_size = 4  # adjust based on your environment
action_size = 2  # adjust based on your environment
agent = DQNAgent(state_size, action_size)

# Training loop
for episode in range(1000):  # You may need more or fewer episodes
    state = env.reset()  # Replace 'env' with your environment
    state = np.reshape(state, [1, state_size])

    for time_step in range(500):  # Adjust the maximum time steps as needed
        action = agent.select_action(state)
        next_state, reward, done, _ = env.step(action)
        next_state = np.reshape(next_state, [1, state_size])

        # Remember the experience and train the agent
        agent.remember(state, action, reward, next_state, done)
        agent.replay(batch_size=32)

        state = next_state

        if done:
            break
