In [2]:
import numpy as np
import pandas as pd
import os
import time

# Define the path to the folder containing the subfolders
data_folder = r"LeadV1 Mix"

# Define the Q-learning parameters
num_actions = 5
alpha = 0.01
gamma = 0.9
epsilon = 0.1
num_episodes = 100

# Define the accuracy function
def get_accuracy(num_correct, num_beats, total_confidence):
    accuracy = num_correct / num_beats
    confidence = total_confidence / num_beats
    return accuracy, confidence

# Define the softmax function
def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0)

# Define the Q-learning function
def q_learning(df, num_states):
    # Initialize the Q-table
    Q = np.zeros((num_states, num_actions))

    # Define the state function
    def get_state(row):
        state = int(row['Beat_Number']) - 1 # Subtract 1 to convert to zero-based index
        return state

    # Define the action function
    def get_action(state):
        if np.random.uniform(0, 1) < epsilon:
            # Choose a random action
            action = np.random.randint(num_actions)
        else:
            # Choose the action with the highest Q-value
            action = np.argmax(Q[state])
        confidence = softmax(Q[state])[action]
        return action, confidence
    
    # Initialize the number of correct classifications
    num_correct = 0
    num_beats = 0
    total_confidence = 0

    # Loop through the episodes
    for episode in range(num_episodes):
        start_time = time.time()
        # Reset the state to the beginning of the file
        state = 0

        # Initialize the total reward for the episode
        total_reward = 0

        # Initialize the time taken for classification
        time_taken = 0

        # Loop through the rows of the dataframe
        for i, row in df.iterrows():
            # Get the current state and action
            state = get_state(row)
            action, confidence = get_action(state)

            # Get the reward for the current action
            reward = 0
            if row['Diagnostic_Label'] == action:
                # Give a reward of 1 for accurate classification
                reward = 1
                num_correct += 1
            else:
                # Give a reward of -1 for misclassification
                reward = -1

            # Calculate the time taken for classification
            start_time = time.time()
            next_state = get_state(df.iloc[i+1]) if i+1 < len(df) else state
            end_time = time.time()
            time_taken += end_time - start_time

            # Add a negative penalty to the reward based on the time taken for classification
            reward -= time_taken

            # Update the Q-value for the current state-action pair
            Q[state, action] = Q[state, action] + alpha * (reward + gamma * np.max(Q[next_state]) - Q[state, action])

            # Update the state and total reward
            state = next_state
            total_reward += reward + confidence
            # Update the total beats
            num_beats += 1
            total_confidence += confidence

        # Print the total reward and accuracy for the episode
        accuracy = get_accuracy(num_correct, num_beats, total_confidence)
        print("Episode {}: Total Reward = {}, Accuracy = {:.2f}, Confidence Probability = {:.2f}, Time of Classification = {:.4f} seconds".format(episode+1, total_reward, accuracy[0], accuracy[1], time_taken))

       # Print the Q-table
        print("Q-table:")
        print(Q)

    # Return the Q-table
    return Q


# Initialize an empty list to store the Q-tables
Q_list = []


# Loop through each subfolder and read in the CSV files
for subdir in os.listdir(data_folder):
    subfolder_path = os.path.join(data_folder, subdir)
    if os.path.isdir(subfolder_path):
        for file in os.listdir(subfolder_path):
            file_path = os.path.join(subfolder_path, file)
            if file_path.endswith(".csv"):
                # Read in the CSV file
                df = pd.read_csv(file_path)

                # Drop in the NaN values in the 'Beat_Number' column 
                #print(df['Beat_Number'].isnull().sum())
                df = df.dropna(subset=['Beat_Number'])

                # Check if the 'Beat_Number' column exists in the dataframe
                if 'Beat_Number' in df.columns:
                    # Group the dataframe by Beat_Number and drop the Beat_Number column
                    df = df.groupby('Beat_Number').first().reset_index()
                    df = df[['Time_Samples', 'LeadV1', 'R_Peaks', 'Diagnostic_Label','Beat_Number']]

                    # Set the number of states to be equal to the number of beats
                    num_states = df.shape[0]

                    # Run Q-learning on the dataframe
                    Q = q_learning(df, num_states)
                    policy = np.argmax(Q, axis=1)
                    print(policy)
                    Q_list.append(Q)

avg_Q_table = np.vstack(Q_list).mean(axis=0)
print(avg_Q_table)




Episode 1: Total Reward = -310.45892610550135, Accuracy = 0.01, Confidence Probability = 0.20, Time of Classification = 0.1095 seconds
Q-table:
[[-0.01        0.          0.          0.          0.        ]
 [-0.01        0.          0.          0.          0.        ]
 [-0.01        0.          0.          0.          0.        ]
 ...
 [-0.01109451  0.          0.          0.          0.        ]
 [-0.01109451  0.          0.          0.          0.        ]
 [-0.01109451  0.          0.          0.          0.        ]]
Episode 2: Total Reward = -298.8629457974194, Accuracy = 0.02, Confidence Probability = 0.20, Time of Classification = 0.0787 seconds
Q-table:
[[-0.01       -0.01        0.          0.          0.        ]
 [-0.01       -0.01        0.          0.          0.        ]
 [-0.0199      0.          0.          0.          0.        ]
 ...
 [-0.01109451 -0.0107875   0.          0.          0.        ]
 [-0.01109451 -0.0107875   0.          0.          0.        ]
 [-0.0110

KeyError: ['Beat_Number']