<a href="https://colab.research.google.com/github/AlessandraSalanitri/DataScienceProject/blob/main/Reinforcement_Learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np
import pandas as pd
import random

df_cleaned = pd.read_csv("processed_medicine_data.csv")

print(df_cleaned.columns)

Index(['substitute0', 'substitute1', 'substitute2', 'substitute3',
       'substitute4', 'use0', 'use1', 'use2', 'use3', 'use4', 'sideEffect0',
       'sideEffect1', 'sideEffect2', 'sideEffect3', 'sideEffect4',
       'sideEffect5', 'sideEffect6', 'sideEffect7', 'sideEffect8',
       'sideEffect9', 'sideEffect10', 'sideEffect11', 'sideEffect12',
       'sideEffect13', 'sideEffect14', 'sideEffect15', 'sideEffect16',
       'sideEffect17', 'sideEffect18', 'sideEffect19', 'sideEffect20',
       'sideEffect21', 'sideEffect22', 'sideEffect23', 'sideEffect24',
       'sideEffect25', 'sideEffect26', 'sideEffect27', 'sideEffect28',
       'sideEffect29', 'sideEffect30', 'sideEffect31', 'sideEffect32',
       'sideEffect33', 'sideEffect34', 'sideEffect35', 'sideEffect36',
       'sideEffect37', 'sideEffect38', 'sideEffect39', 'sideEffect40',
       'sideEffect41'],
      dtype='object')


In [3]:
import gym
from gym import spaces

class DrugPrescriptionEnv(gym.Env):
    """Custom Environment for drug prescription and substitution decisions."""

    def __init__(self, df):
        super(DrugPrescriptionEnv, self).__init__()

        # create the substitutes dictionary
        self.df = df
        self.substitutes = self.create_substitutes_dict(df)

        # available drugs in the dataset
        self.states = list(self.substitutes.keys())
        self.state = None  # Current drug

        # substitute drug
        self.action_space = spaces.Discrete(len(self.states))

        self.observation_space = spaces.Discrete(len(self.states))

    def create_substitutes_dict(self, df):
        """Create a dictionary mapping drugs to valid substitutes (excluding 'Unknown')."""
        substitutes_dict = {}

        for _, row in df.iterrows():
            drug = row["substitute0"]  # Primary drug
            substitutes = row[["substitute1", "substitute2", "substitute3", "substitute4"]].dropna().tolist()

            # Remove 'Unknown' values- keep only valid substitutes
            substitutes = [sub for sub in substitutes if sub.lower() != "unknown"]

            if substitutes:  # Only store drugs with valid substitutes
                substitutes_dict[drug] = substitutes

        return substitutes_dict

    def reset(self):
        """Reset the environment by selecting a random drug as the initial state."""
        self.state = random.choice(self.states)
        return self.state

    def step(self, action):
        """Take an action (substituting a drug) and return the next state, reward, and done flag."""

        if self.state not in self.substitutes or not self.substitutes[self.state]:
            return self.state, -1, True

        # Select a substitute drug
        new_state = random.choice(self.substitutes[self.state])

        # Encourage using a substitute
        reward = 1 if new_state in self.substitutes else -1

        # simulation stops if no substitutes exist
        done = new_state not in self.substitutes

        # Update state
        self.state = new_state

        return new_state, reward, done

    def render(self):
        """Print the current state."""
        print(f"Current Drug: {self.state}")


In [5]:
df_cleaned = pd.read_csv("processed_medicine_data.csv")

# Initialize the environment
env = DrugPrescriptionEnv(df_cleaned)

# random drug
state = env.reset()
print(f"Initial State: {state}")

# substituting the drug
if state in env.substitutes:
    action = random.choice(env.substitutes[state])  #substitute
    new_state, reward, done = env.step(action)
    print(f"Action Taken: {state} -> {action}")
    print(f"New State: {new_state}, Reward: {reward}, Done: {done}")
else:
    print("No valid substitutes available.")


Initial State: Theodep 5 mg/25 mg Tablet
Action Taken: Theodep 5 mg/25 mg Tablet -> Depik Forte 5 mg/25 mg Tablet
New State: Sycodep 5 mg/25 mg Tablet, Reward: -1, Done: True


# Implement Q-Learning for Drug Substitution

In [11]:
import json

class DrugPrescriptionEnv:
    def __init__(self, df):
        self.df = df
        self.states = df.index.tolist()
        self.substitutes = {
            row.Index: [getattr(row, col) for col in df.columns if "substitute" in col and pd.notna(getattr(row, col))]
            for row in df.itertuples()
        }
        self.current_state = None

    def reset(self):
        """Resets environment to a random drug (state)"""
        self.current_state = random.choice(self.states)
        return self.current_state

    def step(self, action):
        """Takes a substitution action, returning the new state, reward, and done flag"""
        if action in self.states:
            self.current_state = action
            reward = -1
            done = False
        else:
            reward = -10
            done = True
        return self.current_state, reward, done

# Initialize environment
env = DrugPrescriptionEnv(df_cleaned)

# Q-learning Parameters
alpha = 0.1
gamma = 0.9
epsilon = 1.0
epsilon_decay = 0.995
epsilon_min = 0.1
num_episodes = 500
max_steps = 50

# Initialize Q-table
q_table = {drug: {sub: 0 for sub in env.substitutes.get(drug, [])} for drug in env.states}

# Q-learning algorithm
for episode in range(num_episodes):
    state = env.reset()
    done = False
    steps = 0

    while not done and steps < max_steps:
        # Explore or Exploit
        if random.uniform(0, 1) < epsilon:
            action = random.choice(env.substitutes[state]) if env.substitutes[state] else None  # explore
        else:
            action = max(q_table[state], key=q_table[state].get) if q_table[state] else None  # exploit

        if not action:
            break

        # action
        new_state, reward, done = env.step(action)

        # Update Q-value using Bellman Equation
        old_value = q_table[state][action]
        next_max = max(q_table[new_state].values()) if new_state in q_table and q_table[new_state] else 0
        q_table[state][action] = (1 - alpha) * old_value + alpha * (reward + gamma * next_max)

        state = new_state
        steps += 1

    epsilon = max(epsilon_min, epsilon * epsilon_decay)

    # training progress
    if episode % 50 == 0:
        print(f"Episode {episode}/{num_episodes} completed")

print("Q-learning training completed!")

# Save Q-table .JSON
with open("q_table.json", "w") as f:
    json.dump(q_table, f)
print("Q-table saved successfully!")


Episode 0/500 completed
Episode 50/500 completed
Episode 100/500 completed
Episode 150/500 completed
Episode 200/500 completed
Episode 250/500 completed
Episode 300/500 completed
Episode 350/500 completed
Episode 400/500 completed
Episode 450/500 completed
Q-learning training completed!
Q-table saved successfully!


# Testing the Trained Q-Learning Model

In [12]:
state = env.reset()
print(f"Initial Drug: {state}")

for _ in range(5):  # Simulate 5 substitutions
    if state not in q_table or not q_table[state]:
        print("No valid substitutes available.")
        break

    # Choose best substitute based on Q-table
    action = max(q_table[state], key=q_table[state].get)
    new_state, reward, done = env.step(action)

    print(f"Action Taken: {state} -> {action}")
    print(f"New Drug: {new_state}, Reward: {reward}, Done: {done}")

    state = new_state
    if done:
        break


Initial Drug: 79321
Action Taken: 79321 -> Alidec 100 Injection
New Drug: 79321, Reward: -10, Done: True
