In [1]:
import pandas as pd

In [2]:
df=pd.read_csv("/content/merged_output.csv")
df

Unnamed: 0,Date,Close_3_y,EMA_diff_3_y,RSI_3_y,Close_1,EMA_diff_1,RSI_1,Close_3_x,EMA_diff_3_x,RSI_3_x
0,2012-01-19,182.350006,5.931909,0.768826,216.131775,6.809177,0.835075,182.350006,5.931909,0.768826
1,2012-01-20,189.710007,6.696169,0.815111,216.774872,7.477983,0.838738,189.710007,6.696169,0.815111
2,2012-01-23,192.220001,7.493994,0.827775,216.774872,8.111250,0.838738,192.220001,7.493994,0.827775
3,2012-01-24,200.520004,8.492884,0.861547,219.594635,8.792986,0.855102,200.520004,8.492884,0.861547
4,2012-01-25,200.190002,9.430081,0.854373,228.499115,9.698632,0.892276,200.190002,9.430081,0.854373
...,...,...,...,...,...,...,...,...,...,...
1868,2019-08-26,65.750000,-8.216647,0.452379,110.550003,-40.467256,0.297506,65.750000,-8.216647,0.452379
1869,2019-08-27,67.150002,-8.251858,0.485199,120.349998,-40.691261,0.420026,67.150002,-8.251858,0.485199
1870,2019-08-28,65.650002,-8.326030,0.453817,116.349998,-41.005524,0.390119,65.650002,-8.326030,0.453817
1871,2019-08-29,65.050003,-8.410894,0.441516,114.300003,-41.348453,0.375367,65.050003,-8.410894,0.441516


In [3]:
# Define the split date
split_date = '2017-09-28'

# Split into training and testing sets based on the date
train_df = df[df['Date'] < split_date]
test_df = df[df['Date'] >= split_date]

In [5]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

# Parameters
input_dim = 6  # EMA and RSI for each of the three stocks
output_dim = 7  # Actions: Buy stock 1, Sell stock 1, Buy stock 2, Sell stock 2,Buy stock 3, Sell stock 3, Hold
learning_rate = 0.0005
gamma = 0.98
epsilon = 1.0
epsilon_decay = 0.995
epsilon_min = 0.01
initial_cash = 100

# Define the Q-Network
class QNetwork(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(QNetwork, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, output_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize the Q-Network, optimizer, and loss function
q_network = QNetwork(input_dim, output_dim)
optimizer = optim.Adam(q_network.parameters(), lr=learning_rate)
criterion = nn.MSELoss()

def select_action(state, last_action):
    # Epsilon-greedy strategy
    if np.random.rand() <= epsilon:
        possible_actions = list(range(output_dim))
        if last_action is not None and last_action != 4:  # Allow repeating only the Hold action
            possible_actions.remove(last_action)  # Exclude the last action if it's not "Hold"
        action = np.random.choice(possible_actions) if possible_actions else np.random.randint(output_dim)
    else:
        with torch.no_grad():
            state_tensor = torch.FloatTensor(state).unsqueeze(0)
            q_values = q_network(state_tensor)
            action = torch.argmax(q_values).item()
            if action == last_action and action != 4:  # Prevent repeating the last action unless it's "Hold"
                possible_actions = list(range(output_dim))
                possible_actions.remove(last_action)
                action = np.random.choice(possible_actions) if possible_actions else action
    return action

def train(state, action, reward, next_state):
    state_tensor = torch.FloatTensor(state).unsqueeze(0)
    next_state_tensor = torch.FloatTensor(next_state).unsqueeze(0)
    reward_tensor = torch.FloatTensor([reward])

    with torch.no_grad():
        next_q_values = q_network(next_state_tensor)
        target = reward_tensor + gamma * torch.max(next_q_values)

    q_values = q_network(state_tensor)
    current_q_value = q_values[0, action]

    loss = criterion(current_q_value, target)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

In [13]:
# Train for multiple epochs
epochs = 20
for epoch in range(epochs):
    stock_counts = np.array([1, 1, 1])  # Initial stock counts
    cash = initial_cash
    last_action = None  # Initialize last action tracker

    total_days = len(train_df)
    actions_taken = []
    portfolio_values = []
    action_details = []
    previous_portfolio_value = cash + (stock_counts[0] * train_df.iloc[0]['Close_1']) + (stock_counts[1] * train_df.iloc[0]['Close_3_x']) + (stock_counts[2] * train_df.iloc[0]['Close_3_y'])

    for index, row in train_df.iterrows():
        ema_x = row['EMA_diff_1']
        rsi_x = row['RSI_1']
        ema_y = row['EMA_diff_3_x']
        rsi_y = row['RSI_3_x']
        ema_z = row['EMA_diff_3_y']
        rsi_z = row['RSI_3_y']
        close_x = row['Close_1']
        close_y = row['Close_3_x']
        close_z = row['Close_3_y']

        # Calculate portfolio value
        total_value = cash + (stock_counts[0] * close_x) + (stock_counts[1] * close_y) + (stock_counts[2] * close_z)

        # Define state
        state = np.array([ema_x, rsi_x, ema_y, rsi_y, ema_z, rsi_z])

        # Select action
        action = select_action(state, last_action)
        actions_taken.append(action)

        # Execute action
        if action == 0:
            if cash >= close_x:
                stock_counts[0] += 1
                cash -= close_x
                action_details.append(f"Day {index+1}: Buy stock 1")
            else:
                action_details.append(f"Day {index+1}: Attempted to Buy stock 1, but not enough cash")
        elif action == 1:
            if stock_counts[0] > 0:
                stock_counts[0] -= 1
                cash += close_x
                action_details.append(f"Day {index+1}: Sell stock 1")
            else:
                action_details.append(f"Day {index+1}: Attempted to Sell stock 1, but not enough stocks")
        elif action == 2:
            if cash >= close_y:
                stock_counts[1] += 1
                cash -= close_y
                action_details.append(f"Day {index+1}: Buy stock 2")
            else:
                action_details.append(f"Day {index+1}: Attempted to Buy stock 2, but not enough cash")
        elif action == 3:
            if stock_counts[1] > 0:
                stock_counts[1] -= 1
                cash += close_y
                action_details.append(f"Day {index+1}: Sell stock 2")
            else:
                action_details.append(f"Day {index+1}: Attempted to Sell stock 2, but not enough stocks")
        elif action == 4:
            if cash >= close_z:
                stock_counts[2] += 1
                cash -= close_z
                action_details.append(f"Day {index+1}: Buy stock 3")
            else:
                action_details.append(f"Day {index+1}: Attempted to Buy stock 3, but not enough cash")
        elif action == 5:
            if stock_counts[2] > 0:
                stock_counts[2] -= 1
                cash += close_z
                action_details.append(f"Day {index+1}: Sell stock 3")
            else:
                action_details.append(f"Day {index+1}: Attempted to Sell stock 3, but not enough stocks")
        elif action == 6:
            action_details.append(f"Day {index+1}: Hold")

        # Calculate portfolio value
        portfolio_value = cash + (stock_counts[0] * close_x) + (stock_counts[1] * close_y) + (stock_counts[2] * close_z)
        portfolio_values.append(portfolio_value)

        # Calculate reward
        reward = portfolio_value - previous_portfolio_value
        previous_portfolio_value = portfolio_value

        # Prepare next state
        if index < total_days - 1:
            next_row = train_df.iloc[index + 1]
            next_state = np.array([next_row['EMA_diff_1'], next_row['RSI_1'], next_row['EMA_diff_3_x'], next_row['RSI_3_x'], next_row['EMA_diff_3_y'], next_row['RSI_3_y']])
        else:
            next_state = state  # Optionally handle the last step differently

        # Train the Q-Network
        train(state, action, reward, next_state)

        # Update last action
        last_action = action

        # Decay epsilon
        if epsilon > epsilon_min:
            epsilon *= epsilon_decay

    # Print results for each epoch
    print(f"Epoch {epoch+1}/{epochs}")
    print(f"Total days: {total_days}")
    print(f"Actions taken: {actions_taken}")
    print(f"Final portfolio value: {portfolio_values[-1]}")
    print()

# Optional: Save results to CSV after the last epoch
results_df = pd.DataFrame({
    'Day': range(total_days),
    'Action': actions_taken,
    'Portfolio_Value': portfolio_values,
    'Action_Details': action_details
})

# Uncomment the line below to save the results
# results_df.to_csv("portfolio_results.csv", index=False)


  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1/20
Total days: 1400
Actions taken: [0, 5, 0, 4, 0, 1, 0, 6, 0, 2, 0, 3, 0, 5, 0, 4, 0, 4, 0, 3, 0, 2, 0, 3, 0, 6, 0, 1, 0, 1, 0, 3, 0, 2, 0, 4, 0, 1, 0, 1, 0, 2, 0, 4, 0, 4, 0, 5, 0, 5, 0, 5, 0, 4, 0, 1, 0, 2, 0, 6, 0, 2, 0, 2, 0, 4, 0, 2, 0, 5, 0, 1, 0, 5, 0, 3, 6, 5, 6, 2, 3, 2, 3, 2, 5, 2, 1, 2, 0, 2, 1, 2, 5, 2, 1, 2, 1, 2, 3, 2, 1, 2, 6, 2, 3, 2, 6, 2, 5, 2, 3, 1, 3, 6, 3, 1, 3, 0, 3, 5, 3, 4, 3, 6, 3, 0, 3, 2, 3, 2, 3, 6, 3, 2, 3, 0, 3, 5, 3, 1, 3, 6, 3, 2, 3, 4, 3, 0, 3, 5, 3, 1, 3, 5, 3, 1, 3, 5, 3, 0, 3, 6, 3, 0, 3, 0, 3, 4, 3, 2, 3, 6, 3, 6, 3, 6, 3, 5, 3, 2, 3, 0, 3, 6, 3, 0, 3, 2, 3, 6, 3, 2, 3, 5, 3, 5, 3, 6, 3, 2, 3, 6, 3, 1, 3, 4, 3, 5, 3, 4, 3, 1, 3, 0, 3, 2, 3, 0, 3, 0, 3, 2, 3, 2, 3, 0, 3, 5, 3, 0, 3, 5, 3, 0, 3, 2, 3, 0, 3, 6, 3, 0, 3, 2, 3, 0, 3, 4, 3, 5, 3, 2, 3, 1, 3, 5, 3, 2, 3, 4, 3, 2, 3, 1, 3, 1, 3, 4, 3, 6, 3, 4, 3, 5, 3, 2, 3, 2, 3, 0, 3, 2, 3, 5, 3, 4, 3, 4, 3, 2, 3, 2, 3, 6, 3, 0, 3, 5, 3, 0, 1, 3, 0, 3, 5, 3, 4, 3, 6, 3, 4, 3, 0, 3, 1, 3, 6, 3, 2,

In [14]:
stock_counts

array([1, 3, 4])

In [15]:
# Testing Phase
# Make sure the Q-Network is in evaluation mode
q_network.eval()

# Initialize test parameters
test_stock_counts = np.array([1, 1, 1])  # Initial stock counts for testing
test_cash = initial_cash
test_last_action = None  # Initialize last action tracker for testing
test_total_days = len(test_df)
test_actions_taken = []
test_portfolio_values = []
test_action_details = []
test_previous_portfolio_value = test_cash + (test_stock_counts[0] * test_df.iloc[0]['Close_1']) + (test_stock_counts[1] * test_df.iloc[0]['Close_3_x']) + (test_stock_counts[2] * test_df.iloc[0]['Close_3_y'])

# Testing loop
for index, row in test_df.iterrows():
    # Get technical indicators
    ema_x = row['EMA_diff_1']
    rsi_x = row['RSI_1']
    ema_y = row['EMA_diff_3_x']
    rsi_y = row['RSI_3_x']
    ema_z = row['EMA_diff_3_y']
    rsi_z = row['RSI_3_y']
    close_x = row['Close_1']
    close_y = row['Close_3_x']
    close_z = row['Close_3_y']

    # Calculate portfolio value
    total_value = test_cash + (test_stock_counts[0] * close_x) + (test_stock_counts[1] * close_y) + (test_stock_counts[2] * close_z)

    # Define state
    test_state = np.array([ema_x, rsi_x, ema_y, rsi_y, ema_z, rsi_z])

    # Select action using the trained network (no epsilon-greedy during testing)
    with torch.no_grad():
        state_tensor = torch.FloatTensor(test_state).unsqueeze(0)
        q_values = q_network(state_tensor)
        test_action = torch.argmax(q_values).item()

        # Prevent repeating the last action unless it's "Hold"
        if test_action == test_last_action and test_action != 6:  # 6 corresponds to "Hold"
            possible_actions = list(range(output_dim))
            possible_actions.remove(test_last_action)
            test_action = np.random.choice(possible_actions) if possible_actions else test_action

    test_actions_taken.append(test_action)

    # Execute action in the test environment
    if test_action == 0:
        if test_cash >= close_x:
            test_stock_counts[0] += 1
            test_cash -= close_x
            test_action_details.append(f"Day {index+1}: Buy stock 1")
        else:
            test_action_details.append(f"Day {index+1}: Attempted to Buy stock 1, but not enough cash")
    elif test_action == 1:
        if test_stock_counts[0] > 0:
            test_stock_counts[0] -= 1
            test_cash += close_x
            test_action_details.append(f"Day {index+1}: Sell stock 1")
        else:
            test_action_details.append(f"Day {index+1}: Attempted to Sell stock 1, but not enough stocks")
    elif test_action == 2:
        if test_cash >= close_y:
            test_stock_counts[1] += 1
            test_cash -= close_y
            test_action_details.append(f"Day {index+1}: Buy stock 2")
        else:
            test_action_details.append(f"Day {index+1}: Attempted to Buy stock 2, but not enough cash")
    elif test_action == 3:
        if test_stock_counts[1] > 0:
            test_stock_counts[1] -= 1
            test_cash += close_y
            test_action_details.append(f"Day {index+1}: Sell stock 2")
        else:
            test_action_details.append(f"Day {index+1}: Attempted to Sell stock 2, but not enough stocks")
    elif test_action == 4:
        if test_cash >= close_z:
            test_stock_counts[2] += 1
            test_cash -= close_z
            test_action_details.append(f"Day {index+1}: Buy stock 3")
        else:
            test_action_details.append(f"Day {index+1}: Attempted to Buy stock 3, but not enough cash")
    elif test_action == 5:
        if test_stock_counts[2] > 0:
            test_stock_counts[2] -= 1
            test_cash += close_z
            test_action_details.append(f"Day {index+1}: Sell stock 3")
        else:
            test_action_details.append(f"Day {index+1}: Attempted to Sell stock 3, but not enough stocks")
    elif test_action == 6:
        test_action_details.append(f"Day {index+1}: Hold")

    # Calculate portfolio value after the action
    test_portfolio_value = test_cash + (test_stock_counts[0] * close_x) + (test_stock_counts[1] * close_y) + (test_stock_counts[2] * close_z)
    test_portfolio_values.append(test_portfolio_value)

    # Update last action
    test_last_action = test_action

# Print results
print(f"Total days (Test): {test_total_days}")
print(f"Actions taken (Test): {test_actions_taken}")
print(f"Final portfolio value (Test): {test_portfolio_values[-1]}")

# Optional: Save test results to CSV
test_results_df = pd.DataFrame({
    'Day': range(test_total_days),
    'Action': test_actions_taken,
    'Portfolio_Value': test_portfolio_values,
    'Action_Details': test_action_details
})

# test_results_df.to_csv("test_portfolio_results.csv", index=False)

Total days (Test): 473
Actions taken (Test): [4, 0, 4, 3, 4, 5, 4, 6, 4, 0, 4, 2, 4, 0, 4, 5, 4, 1, 4, 2, 4, 6, 0, 3, 0, 3, 0, 5, 6, 6, 6, 6, 0, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 0, 3, 0, 1, 0, 1, 0, 6, 0, 5, 0, 3, 0, 5, 0, 4, 6, 6, 6, 6, 6, 6, 6, 6, 6, 3, 6, 3, 4, 3, 2, 3, 0, 3, 5, 6, 6, 6, 6, 6, 0, 5, 0, 1, 0, 1, 0, 6, 0, 3, 0, 6, 6, 6, 4, 5, 4, 2, 4, 0, 4, 2, 4, 2, 4, 3, 4, 5, 4, 5, 4, 6, 4, 3, 4, 3, 4, 5, 4, 6, 4, 5, 4, 3, 4, 0, 4, 6, 4, 0, 4, 2, 4, 1, 4, 6, 4, 3, 4, 2, 4, 2, 4, 0, 4, 2, 4, 1, 4, 0, 4, 6, 4, 0, 4, 6, 4, 0, 4, 6, 4, 1, 4, 6, 4, 5, 4, 5, 4, 3, 4, 2, 4, 3, 4, 2, 4, 2, 4, 1, 4, 3, 4, 0, 4, 1, 4, 3, 4, 0, 4, 1, 4, 5, 4, 6, 4, 1, 4, 3, 4, 2, 4, 1, 4, 1, 4, 1, 4, 1, 4, 6, 4, 1, 4, 5, 4, 3, 4, 1, 4, 6, 4, 1, 4, 3, 4, 3, 4, 0, 4, 3, 4, 1, 4, 1, 4, 2, 4, 0, 4, 5, 4, 1, 4, 6, 4, 2, 4, 5, 4, 5, 4, 2, 4, 1, 4, 5, 4, 3, 4, 1, 4, 6, 4, 2, 4, 6, 4, 6, 4, 2, 4, 6, 4, 2, 4, 6, 4, 2, 4, 6, 4, 6, 4, 3, 4, 1, 4, 5, 4, 0, 4, 1, 4, 2, 4, 0, 4, 1, 4, 6, 4, 2, 4, 6, 4, 2, 4, 5, 4, 

In [16]:
cash

36.563217163086165

In [17]:
initial_cash

100

In [18]:
stock_counts

array([1, 3, 4])