In [None]:
# Step 1: Load and Preprocess the Data
import pandas as pd
from sklearn.preprocessing import KBinsDiscretizer

# Load data
file_path = 'PreComplication_Data_Imputed.xlsx'
df = pd.read_excel(file_path)

# Select relevant columns
state_columns = ['Gender', 'Age', 'BMI', 'WBCs', 'Na', 'Hb', 'K']
action_column = 'SurgicalProcedure'
reward_column = 'ComplicatedAppendicitis'

# Discretize continuous features for manageable state space
continuous_columns = ['Age', 'BMI', 'WBCs', 'Na', 'Hb', 'K']
binner = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy='quantile')
df[continuous_columns] = binner.fit_transform(df[continuous_columns])

# Ensure all state columns are integer (discrete)
df[state_columns] = df[state_columns].astype(int)

# Preview the processed data
print("Processed data sample:")
print(df[state_columns + [action_column, reward_column]].head())


In [None]:
# Step 2: Define State-Action Space and Initialize Q-Table

import numpy as np

# Define state and action components
state_features = ['Gender', 'Age', 'BMI', 'WBCs', 'Na', 'Hb', 'K']
actions = [0, 1]  # 0 = Open surgery, 1 = Laparoscopy

# Get the number of unique values in each discretized feature
gender_bins = df['Gender'].nunique()
age_bins = df['Age'].nunique()
bmi_bins = df['BMI'].nunique()
wbc_bins = df['WBCs'].nunique()
na_bins = df['Na'].nunique()
hb_bins = df['Hb'].nunique()
k_bins = df['K'].nunique()

# Define the shape of the Q-table
q_table_shape = (gender_bins, age_bins, bmi_bins, wbc_bins, na_bins, hb_bins, k_bins, len(actions))

# Initialize Q-table with zeros
q_table = np.zeros(q_table_shape)

print("Q-table shape:", q_table.shape)


NB
The shape confirms everything is structured correctly for reinforcement learning:
1. 2 options for Gender
2. 3 bins each for Age, BMI, WBCs, Na, Hb, and K
3. 2 actions: Open surgery and Laparoscopy

In [None]:
import numpy as np

# Hyperparameters
alpha = 0.1      # Learning rate
gamma = 0.9      # Discount factor
episodes = 1000  # Number of training iterations

# Re-initialize the Q-table (if necessary)
q_table = np.zeros((2, 3, 3, 3, 3, 3, 3, 2))  # Shape confirmed earlier

# Start training
for episode in range(episodes):
    # Shuffle data each episode for randomness
    data_shuffled = df.sample(frac=1).reset_index(drop=True)

    for idx, row in data_shuffled.iterrows():
        # Get the current state and action
        state = row[state_columns].tolist()
        action = row[action_column]
        reward = -1 if row[reward_column] == 1 else 1  # -1 for complication, +1 for no complication

        # In this simplified setup, we assume next_state = current state
        next_state = state

        # Q-learning update
        current_q = q_table[tuple(state + [action])]
        max_future_q = np.max(q_table[tuple(next_state)])
        new_q = (1 - alpha) * current_q + alpha * (reward + gamma * max_future_q)
        q_table[tuple(state + [action])] = new_q

        # 🔍 Step 4B: Add logging every 100 episodes
        if episode % 100 == 0 and idx == 0:  # Only log once per episode
            print(f"Episode {episode} | Updated Q{tuple(state + [action])} = {new_q:.4f}")

print("Training complete.")


In [None]:
# Step 3: Train the Q-learning agent
# We’ll loop through the dataset and update the Q-table using the standard Q-learning update rule:
# Q(s, a) ← Q(s, a) + α × [r + γ × max Q(s′, a′) − Q(s, a)]
# Where:
# s is the current state
# a is the action taken
# r is the reward
# s' is the next state
# α is the learning rate
# γ is the discount factor

import numpy as np

# Hyperparameters
alpha = 0.1   # Learning rate
gamma = 0.95  # Discount factor
epochs = 100  # Number of iterations over the dataset

# Train Q-learning agent
for epoch in range(epochs):
    for index, row in df.iterrows():
        # Extract current state and action
        state = (
            row['Gender'],
            row['Age'],
            row['BMI'],
            row['WBCs'],
            row['Na'],
            row['Hb'],
            row['K']
        )
        action = row[action_column]
        reward = 1 - row[reward_column]  # Reward = 1 if NOT complicated appendicitis (we want to avoid complications)

        # In this context, assume same state because we have no temporal transitions
        next_state = state

        # Q-learning update
        current_q = q_table[state][action]
        max_future_q = np.max(q_table[next_state])
        new_q = current_q + alpha * (reward + gamma * max_future_q - current_q)
        q_table[state][action] = new_q


In [None]:
# Step 4A: Predict Best Treatment Using Q-table

# Function to get best treatment from Q-table
def predict_best_treatment(state_row, q_table):
    gender, age, bmi, wbcs, na, hb, k = state_row
    return np.argmax(q_table[gender, age, bmi, wbcs, na, hb, k])

# Apply to the whole dataset
df['PredictedTreatment'] = df[state_columns].apply(lambda row: predict_best_treatment(row, q_table), axis=1)

# Compare predicted vs actual
comparison = df[['SurgicalProcedure', 'PredictedTreatment']]
print("Comparison of actual vs predicted treatments:")
print(comparison.head(10))

# Calculate accuracy
accuracy = (df['SurgicalProcedure'] == df['PredictedTreatment']).mean()
print(f"\nPrediction Accuracy: {accuracy:.2%}")
