# ⚠️ WARNING: EDUCATIONAL USE ONLY ⚠️

This notebook is strictly for **educational and research purposes only**.

Do **NOT** use it for:
- Professional medical advice
- Diagnosis
- Treatment
- Safety-critical decision making

The author are **not responsible** for any misuse or damages.

---

**Please read this carefully before running any code in this notebook.**


In [None]:
# ===============================================
# ⚠️ RUNTIME WARNING: EDUCATIONAL USE ONLY ⚠️
# ===============================================

def educational_warning():
    border = "=" * 100
    print("\n" + border)
    print("⚠️  WARNING: EDUCATIONAL USE ONLY  ⚠️".center(100))
    print("This notebook is strictly for EDUCATIONAL and RESEARCH purposes only.".center(100))
    print("\nDO NOT use this notebook for:".center(100))
    print("- Professional medical advice".center(100))
    print("- Diagnosis".center(100))
    print("- Treatment".center(100))
    print("- Safety-critical decision making".center(100))
    print("\nThe authors are NOT responsible for any misuse or damages.".center(100))
    print(border + "\n")

# Display the warning
educational_warning()

# Stop execution until user acknowledges
import sys
sys.exit("Execution stopped: Please read the warning above before using this code.")


In [None]:
import numpy as np

# Define possible states (health status for diabetes)
states = ['Controlled', 'Uncontrolled']

# Define possible actions (treatment options for diabetes)
actions = ['Insulin', 'Oral Medication', 'Diet Adjustment', 'Exercise']

# Initialize Q-table with zeros
q_table = np.zeros((len(states), len(actions)))
print(q_table)
# Define hyperparameters

learning_rate = 0.1
discount_factor = 0.9
episodes = 1000
epsilon = 0.1  # Exploration rate

# Reward structure (specific to diabetes treatment)
rewards = {
    ('Controlled', 'Insulin'): 10,
    ('Controlled', 'Oral Medication'): 8,
    ('Controlled', 'Diet Adjustment'): 6,
    ('Controlled', 'Exercise'): 7,
    ('Uncontrolled', 'Insulin'): -5,
    ('Uncontrolled', 'Oral Medication'): -2,
    ('Uncontrolled', 'Diet Adjustment'): 0,
    ('Uncontrolled', 'Exercise'): 1
}

# Simulated environment transition function
def transition(state, action):
    if state == 'Controlled':
        if action in ['Insulin', 'Exercise']:
            return 'Controlled'
        else:
            return 'Uncontrolled'
    else:  # state == 'Uncontrolled'
        if action in ['Insulin', 'Exercise']:
            return 'Controlled'
        else:
            return 'Uncontrolled'

# Function to choose an action based on an epsilon-greedy policy
def choose_action(state, epsilon):
    if np.random.uniform(0, 1) < epsilon:
        return np.random.choice(actions)  # Exploration
    else:
        state_index = states.index(state)
        return actions[np.argmax(q_table[state_index])]  # Exploitation

# Train using a specific algorithm
def train_algorithm(algorithm):
    global q_table  # Use the global q_table variable
    if algorithm == 'q_learning':
        q_table = np.zeros((len(states), len(actions)))  # Reset Q-table for Q-learning
    elif algorithm == 'sarsa':
        q_table = np.zeros((len(states), len(actions)))  # Reset Q-table for SARSA

    for episode in range(episodes):
        state = np.random.choice(states)  # Start with a random state
        action = choose_action(state, epsilon)  # Choose initial action

        done = False
        steps = 0  # Count the number of steps to prevent infinite loops

        while not done:
            next_state = transition(state, action)

            # Get the reward
            reward = rewards.get((state, action), 0)

            if algorithm == 'q_learning':
                # Q-learning update rule
                next_state_index = states.index(next_state)
                best_next_action = np.argmax(q_table[next_state_index])

                q_table[states.index(state), actions.index(action)] += learning_rate * (
                    reward + discount_factor * q_table[next_state_index, best_next_action] - q_table[states.index(state), actions.index(action)]
                )

            elif algorithm == 'sarsa':
                # SARSA update rule
                next_action = choose_action(next_state, epsilon)
                next_state_index = states.index(next_state)
                next_action_index = actions.index(next_action)

                q_table[states.index(state), actions.index(action)] += learning_rate * (
                    reward + discount_factor * q_table[next_state_index, next_action_index] - q_table[states.index(state), actions.index(action)]
                )

                action = next_action  # Update action to the next action

            # Transition to the next state
            state = next_state

            # Stopping condition (to avoid infinite loops)
            steps += 1
            if steps >= 100:
                done = True
            elif state == 'Controlled' and action == 'Insulin':
                done = True

# Main loop to handle user input and algorithm selection
while True:
    # Get algorithm type from user input with validation
    algorithm = input("Enter the algorithm you want to use ('q_learning' or 'sarsa'): ").strip().lower()
    while algorithm not in ['q_learning', 'sarsa']:
        print("Invalid input. Please enter 'q_learning' or 'sarsa'.")
        algorithm = input("Enter the algorithm you want to use ('q_learning' or 'sarsa'): ").strip().lower()

    # Train with the selected algorithm
    print(f"Training with {algorithm}...")
    train_algorithm(algorithm)
    print(f"Completed training with {algorithm}.")

    # Display the learned Q-table
    print("Learned Q-table:")
    print(q_table)

    # Making treatment decisions based on the trained Q-table
    def suggest_treatment(current_state):
        state_index = states.index(current_state)
        action_index = np.argmax(q_table[state_index])
        return actions[action_index]

    # Example: Suggest treatment for a patient in the 'Uncontrolled' state
    patient_state = input('Enter a Diabetes patient state (Controlled or Uncontrolled): ')
    suggested_treatment = suggest_treatment(patient_state)
    print(f"Suggested treatment for a '{patient_state}' patient: {suggested_treatment}")

    # Ask if the user wants to continue with another algorithm
    continue_choice = input("Do you want to continue with another algorithm? (yes/no): ").strip().lower()
    if continue_choice != 'yes':
        break
