In [None]:
import numpy as np
import pandas as pd
import os
import time
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, hamming_loss

# Define the path to the folder containing the subfolders
data_folder = r"Processed_Data"

# Define the Q-learning parameters
num_actions = 5
alpha = 0.001
gamma = 0.9
epsilon = 0.1
num_episodes = 100

# Define empty lists to store the rewards and episodes
train_rewards = []
train_episodes = []
test_rewards = []
test_episodes = []

# Define the evaluation metrics function
def evaluate_metrics(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred, average='micro')
    precision = precision_score(y_true, y_pred, average='micro')
    f1 = f1_score(y_true, y_pred, average='micro')
    hamming = hamming_loss(y_true, y_pred)
    return accuracy, recall, precision, f1, hamming

# Define the state function
def get_state(row):
    state = int(row['Beat_Number']) - 1  # Subtract 1 to convert to zero-based index
    return state

# Define the Q-learning function
def q_learning(df, num_states):
    # Initialize the Q-table
    Q = np.zeros((num_states, num_actions))

    # Define the action function
    def get_action(state):
        if np.random.uniform(0, 1) < epsilon:
            # Choose a random action
            action = np.random.randint(num_actions)
        else:
            # Choose the action with the highest Q-value
            action = np.argmax(Q[state])
        confidence = np.max(Q[state])
        return action, confidence

    # Define empty lists to store the true and predicted labels for training and testing
    y_true_train = []
    y_pred_train = []
    y_true_test = []
    y_pred_test = []
    

    # Loop through the episodes
    for episode in range(num_episodes):
        # Reset the state to the beginning of the file
        state = 0

        # Initialize the total reward for the episode
        total_reward = 0

        # Loop through the rows of the dataframe
        for i, row in df.iterrows():
            # Get the current state and action
            state = get_state(row)
            action, confidence = get_action(state)

            # Get the reward for the current action
            reward = 0
            if row['Diagnostic_Label'] == action:
                # Give a reward of 1 for accurate classification
                reward = 1
            else:
                # Give a reward of -1 for misclassification
                reward = -1

            # Update the Q-value for the current state-action pair
            next_state = get_state(df.iloc[i + 1]) if i + 1 < len(df) else state
            Q[state, action] = Q[state, action] + alpha * (
                        reward + gamma * np.max(Q[next_state]) - Q[state, action])

            # Update the state and total reward
            state = next_state
            total_reward += reward + confidence

             # Append the true label and predicted label for training evaluation
            y_true_train.append(row['Diagnostic_Label'])
            y_pred_train.append(action)

            # Append the true label and predicted label for testing evaluation
            y_true_test.append(row['Diagnostic_Label'])
            y_pred_test.append(action)


        # Append the total reward and episode to the training rewards and episodes lists
        train_rewards.append(total_reward)
        train_episodes.append(episode + 1)

        # Calculate the evaluation metrics for the episode
        if (episode + 1) % 10 == 0:
            # Perform testing
            test_reward = test_agent(Q, num_states, num_episodes=50)

            # Append the total reward and episode to the test rewards and episodes lists
            test_rewards.append(test_reward)
            test_episodes.append(episode + 1)

            # Calculate the mean reward up to the current episode
            mean_reward = np.mean(test_rewards)

            # Print the mean reward
            print("Mean Reward at Episode {}: {:.2f}".format(episode + 1, mean_reward))

            # Plot the rewards versus episode numbers for testing
            plt.plot(test_episodes, test_rewards)
            plt.xlabel('Episode')
            plt.ylabel('Total Reward')
            plt.title('Testing: Rewards vs. Episodes')
            plt.show()

            # Plot the rewards versus episode numbers for training
            plt.plot(train_episodes, train_rewards)
            plt.xlabel('Episode')
            plt.ylabel('Total Reward')
            plt.title('Training: Rewards vs. Episodes')
            plt.show()
            
            # Calculate the evaluation metrics for training
            accuracy_train, recall_train, precision_train, f1_train, hamming_train = evaluate_metrics(y_true_train, y_pred_train)

            print("Training Metrics:")
            print("Accuracy: {:.2f}".format(accuracy_train))
            print("Recall: {:.2f}".format(recall_train))
            print("Precision: {:.2f}".format(precision_train))
            print("F1 Score: {:.2f}".format(f1_train))
            print("Hamming Loss: {:.2f}".format(hamming_train))

                        # Calculate the evaluation metrics for testing
            accuracy_test, recall_test, precision_test, f1_test, hamming_test = evaluate_metrics(y_true_test, y_pred_test)

            print("Testing Metrics:")
            print("Accuracy: {:.2f}".format(accuracy_test))
            print("Recall: {:.2f}".format(recall_test))
            print("Precision: {:.2f}".format(precision_test))
            print("F1 Score: {:.2f}".format(f1_test))
            print("Hamming Loss: {:.2f}".format(hamming_test))

    # Convert the true and predicted labels to numpy arrays
    y_true_train = np.array(y_true_train)
    y_pred_train = np.array(y_pred_train)
    y_true_test = np.array(y_true_test)
    y_pred_test = np.array(y_pred_test)
    
    # Save the Q-table
    np.save('q_table.npy', Q)

    # Return the Q-table
    return Q


def test_agent(Q, num_states, num_episodes=50):
    # Initialize the total reward for testing
    total_reward = 0

    # Loop through the episodes for testing
    for episode in range(num_episodes):
        # Reset the state to the beginning of the file
        state = 0

        # Loop through the rows of the dataframe
        for i, row in df.iterrows():
            # Get the current state
            state = get_state(row)

            # Choose the action with the highest Q-value
            action = np.argmax(Q[state])

            # Get the reward for the current action
            reward = 0
            if row['Diagnostic_Label'] == action:
                # Give a reward of 1 for accurate classification
                reward = 1
            else:
                # Give a reward of -1 for misclassification
                reward = -1

            # Update the state and total reward
            state = get_state(df.iloc[i + 1]) if i + 1 < len(df) else state
            total_reward += reward

    return total_reward


# Initialize an empty list to store the Q-tables
Q_list = []

# Loop through each subfolder and read in the CSV files
for subdir in os.listdir(data_folder):
    subfolder_path = os.path.join(data_folder, subdir)
    if os.path.isdir(subfolder_path):
        for file in os.listdir(subfolder_path):
            file_path = os.path.join(subfolder_path, file)
            if file_path.endswith(".csv"):
                # Read in the CSV file
                df = pd.read_csv(file_path)

                # Drop the NaN values in the 'Beat_Number' column
                df = df.dropna(subset=['Beat_Number'])

                # Group the dataframe by Beat_Number and drop the Beat_Number column
                df = df.groupby('Beat_Number').first().reset_index()
                df = df[['Time_Samples', 'LeadV1', 'R_Peaks', 'Diagnostic_Label', 'Beat_Number']]

                # Set the number of states to be equal to the number of beats
                num_states = df.shape[0]

                # Run Q-learning on the dataframe
                Q = q_learning(df, num_states)
                Q_list.append(Q)

avg_Q_table = np.vstack(Q_list).mean(axis=0)
np.save('Q_agent', avg_Q_table)
print(avg_Q_table)