In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# Define the actor network
class Actor(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Actor, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        action_probs = self.softmax(x)
        return action_probs

# Define the critic network
class Critic(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(Critic, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, 1)
        
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        value = self.fc2(x)
        return value

# Hyperparameters
input_size = 10
hidden_size = 64
output_size = 2 
learning_rate = 0.0095
num_episodes = 3000

# Initialize actor and critic networks
actor = Actor(input_size, hidden_size, output_size)
critic = Critic(input_size, hidden_size)
actor_optimizer = optim.Adam(actor.parameters(), lr=learning_rate)
critic_optimizer = optim.Adam(critic.parameters(), lr=learning_rate)

# Initialize variables for accuracy calculation
correct_predictions = 0
total_predictions = 0

# Load dataset
data_path = 'test_value.csv' 
df = pd.read_csv(data_path)
data = df['value'].values

# Split the dataset into train and test sets
train_data, test_data = train_test_split(data, test_size=0.3, random_state=42)


# Training loop
for episode in range(num_episodes):
    # Generate a sequence
    start_index = np.random.randint(len(train_data) - 10)
    sequence = train_data[start_index : start_index + 10]
    
    # Convert the sequence to PyTorch tensor
    state = torch.FloatTensor(sequence).view(1, -1)
    
    # Actor chooses an action based on the sequence
    action_probs = actor(state)
    action = torch.multinomial(action_probs, 1)
    
    # Take the action and observe the next state and reward
    next_state = torch.cat((state[:, 1:], action.float()), dim=1)
    reward = torch.tensor(1.0 if action == sequence[-1] else 0.0)
    
    # Critic evaluates the current state
    value = critic(state)
    
    # Update the critic using the TD error
    td_error = reward - value
    critic_loss = td_error.pow(2)
    critic_optimizer.zero_grad()
    critic_loss.backward()
    critic_optimizer.step()
    
    # Update the actor using the critic's feedback
    actor_loss = -torch.log(action_probs.squeeze(0)[action]) * td_error.detach()
    actor_optimizer.zero_grad()
    actor_loss.backward()
    actor_optimizer.step()

    
    # Update accuracy variables
    total_predictions += 1
    if action == sequence[-1]:
        correct_predictions += 1

# Calculate accuracy
accuracy = correct_predictions / total_predictions
print("Accuracy:", accuracy)

# Testing the trained actor
total_test_predictions = 0
for i in range(10):
    start_index = np.random.randint(len(test_data) - 10)
    test_sequence = test_data[start_index : start_index + 10] #you can put your test sequence here in the form of list e.g x = [1,0,1,1,0,0,0,0,0,0]; len(x) = 10
    test_state = torch.FloatTensor(test_sequence).view(1, -1)
    test_action_probs = actor(test_state)
    predicted_action = torch.argmax(test_action_probs)
    print("Predicted action:", predicted_action.item())

    # Update accuracy variables
    total_test_predictions += 1
    if action == test_sequence[-1]:
        correct_predictions += 1

# Calculate accuracy
test_accuracy = correct_predictions / total_predictions
print("Accuracy:", accuracy)



# Initialize variables 
correct_test_predictions = 0
total_test_predictions = 0
start_index = 1

# Initialize lists to store true and predicted values
true_values = []
predicted_values = []

# Testing loop
for _ in range(len(data)-10):
    test_sequence = data[start_index : start_index + 10]
    test_state = torch.FloatTensor(test_sequence).view(1, -1)
    test_action_probs = actor(test_state)
    predicted_action = torch.argmax(test_action_probs)
    
    true_action = test_sequence[-1]  # Actual next step (0 or 1)
    
    true_values.append(true_action)
    predicted_values.append(predicted_action.item())

    start_index += 1


# Save results to a CSV file
results_dict = {'True Value': true_values, 'Predicted Value': predicted_values}
results_df = pd.DataFrame(results_dict)
results_df.to_csv('results.csv', index=False)


Accuracy: 0.982
Predicted action: 0
Predicted action: 1
Predicted action: 1
Predicted action: 0
Predicted action: 1
Predicted action: 0
Predicted action: 1
Predicted action: 0
Predicted action: 0
Predicted action: 1
Accuracy: 0.982
