In [30]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np

# Define trading environment
class TradingEnvironment:
    def __init__(self, data, initial_cash):
        self.data = data  # Historical market data
        self.initial_cash = initial_cash
        self.reset()

    def reset(self):
        self.current_step = 0
        self.cash = self.initial_cash
        self.position = 0  # Number of shares held
        self.done = False
        return self._get_observation()

    def _get_observation(self):
        return self.data[self.current_step]

    def step(self, action):
        if self.done:
            raise ValueError("Episode is done, call reset to start a new episode")

        reward = 0
        price = self.data[self.current_step]
        next_price = self.data[self.current_step + 1]

        if action == 0:  # Buy
            if self.cash >= price:
                self.position += 1
                self.cash -= price
            else:
                reward -= 1  # Penalize invalid actions
        elif action == 1:  # Sell
            if self.position > 0:
                reward += (next_price - price)  # Profit/loss
                self.position -= 1
                self.cash += next_price
            else:
                reward -= 1  # Penalize invalid actions

        self.current_step += 1
        if self.current_step >= len(self.data) - 1:
            self.done = True

        next_state = self._get_observation()
        return next_state, reward, self.done

# Define DQN model
class DQNModel(nn.Module):
    def __init__(self, input_size, output_size):
        super(DQNModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, output_size)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Generate random price data
data = np.random.randint(50, 150, size=100)

# Hyperparameters
initial_cash = 10000
discount_factor = 0.99
num_episodes = 1000

# Initialize trading environment and DQN model
env = TradingEnvironment(data, initial_cash)
input_size = len(data)
output_size = 3  # Buy, Sell, Hold
dqn_model = DQNModel(input_size, output_size)
optimizer = optim.Adam(dqn_model.parameters(), lr=0.001)
loss_fn = nn.MSELoss()

# Training loop
for episode in range(num_episodes):
    state = env.reset()
    done = False
    while not done:
        # Select action using DQN model
        q_values = dqn_model(torch.Tensor(state))
        action = torch.argmax(q_values).item()

        # Take action in the environment
        next_state, reward, done = env.step(action)

        # Compute target Q-value for DQN update
        target_q_values = q_values.clone()
        if not done:
            next_q_values = dqn_model(torch.Tensor(next_state))
            target_q_values[action] = reward + discount_factor * torch.max(next_q_values).item()
        else:
            target_q_values[action] = reward
        
        # Update DQN model
        loss = loss_fn(q_values, target_q_values)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        state = next_state

    # Print episode information
    print(f"Episode {episode + 1}: Total reward = {env.cash + env.position * env.data[-1]}")


RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x98 and 100x64)