In [1]:
import gym
from gym import spaces
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers
from collections import deque
import random

# Function to calculate weight based on age and unit type
def calculate_weight(age_days, unit_type):
    if unit_type == 'NICU':
        gestational_age_weeks = np.random.uniform(24, 40)
        weight_kg = 0.5 + (gestational_age_weeks - 24) * 0.2 + np.random.normal(0, 0.2)
        return max(weight_kg, 0.5)
    else:
        if age_days < 365:
            weight_kg = 2 + (age_days / 365) * 8


# Function to calculate weight based on age and unit type
def calculate_weight(age_days, unit_type):
    if unit_type == 'NICU':
        gestational_age_weeks = np.random.uniform(24, 40)
        weight_kg = 0.5 + (gestational_age_weeks - 24) * 0.2 + np.random.normal(0, 0.2)
        return max(weight_kg, 0.5)
    else:
        if age_days < 365:
            weight_kg = 2 + (age_days / 365) * 8 + np.random.normal(0, 0.5)
        else:
            weight_kg = 10 + (age_days / 365) * 2 + np.random.normal(0, 1.5)
        return max(weight_kg, 2)

# Generate synthetic data
def generate_synthetic_data(num_samples=1000):
    sex_options = ['Male', 'Female']
    medical_history_nicu = ['Prematurity', 'RDS', 'BPD', 'Congenital Anomaly']
    medical_history_picu = ['Asthma', 'Pneumonia', 'Post-Surgical Recovery', 'Sepsis']
    profiles = {
        'normal': {'VT_mean': 6, 'VT_std': 1.5, 'RR_mean': 45, 'RR_std': 10, 'SpO2_base': 93, 'min_duration': 10, 'max_duration': 25},
        'CLD': {'VT_mean': 5.5, 'VT_std': 1.2, 'RR_mean': 50, 'RR_std': 12, 'SpO2_base': 88, 'min_duration': 15, 'max_duration': 35},
        'RDS': {'VT_mean': 5, 'VT_std': 1.0, 'RR_mean': 55, 'RR_std': 15, 'SpO2_base': 85, 'min_duration': 20, 'max_duration': 40}
    }

    time_series_data = []

    for i in range(num_samples):
        unit_type = np.random.choice(['NICU', 'PICU'])

        if unit_type == 'NICU':
            age_days = np.random.randint(0, 28)
        else:
            age_days = np.random.randint(29, 6570)

        weight_kg = calculate_weight(age_days, unit_type)
        sex = np.random.choice(sex_options)
        medical_history = np.random.choice(medical_history_nicu if unit_type == 'NICU' else medical_history_picu)
        profile = np.random.choice(list(profiles.keys()))
        patient_profile = profiles[profile]

        duration = np.random.randint(patient_profile['min_duration'], patient_profile['max_duration'])
        VT_base = np.random.normal(patient_profile['VT_mean'], patient_profile['VT_std'])
        RR_base = np.random.normal(patient_profile['RR_mean'], patient_profile['RR_std'])
        PIP_base = np.random.normal(22, 3)
        PEEP_base = np.random.normal(5, 1)
        FiO2_base = np.random.uniform(30, 60)
        SpO2_base = patient_profile['SpO2_base']
        Compliance_base = np.random.uniform(1, 1.5)
        Resistance_base = np.random.uniform(0.2, 0.35)
        PS_base = np.random.normal(10, 2)

        for t in range(duration):
            PEEP = max(PEEP_base - 0.05 * t, 3)
            PIP = max(PIP_base - 0.1 * t, 15)
            FiO2 = max(FiO2_base - 0.2 * t, 21)
            PS = max(PS_base - 0.1 * t, 5)
            VT = VT_base * (1 - 0.02 * (PEEP_base - PEEP)) * (1 + 0.01 * Compliance_base)
            RR = RR_base * (1 + 0.01 * (PEEP_base - PEEP)) * (1 + 0.02 * Resistance_base)
            SpO2 = SpO2_base - 0.5 * (PEEP_base - PEEP) - 0.2 * (PIP_base - PIP)

            if np.random.rand() < 0.05:
                SpO2 -= np.random.uniform(1, 5)
                RR += np.random.uniform(5, 10)
                VT -= np.random.uniform(0.2, 0.5)
            elif np.random.rand() < 0.05:
                SpO2 += np.random.uniform(1, 5)
                RR -= np.random.uniform(5, 10)
                VT += np.random.uniform(0.2, 0.5)

            VT += np.random.normal(0, 0.1)
            RR += np.random.normal(0, 0.5)
            SpO2 += np.random.normal(0, 0.3)

            SpO2 = min(max(SpO2, 80), 100)
            VT = max(VT, 3)
            RR = max(RR, 20)

            time_series_data.append([unit_type, age_days, weight_kg, sex, medical_history, t*5, VT, RR, PIP, PEEP, FiO2, SpO2, Compliance_base, Resistance_base, PS])

    columns = ["Unit", "Age (days)", "Weight (kg)", "Sex", "Medical History", "Time (min)", "VT (mL/kg)", "RR (breaths/min)", "PIP (cm H2O)", "PEEP (cm H2O)",
               "FiO2 (%)", "SpO2 (%)", "Compliance (mL/cm H2O)", "Resistance (cm H2O/L/s)", "PS (cm H2O)"]
    return pd.DataFrame(time_series_data, columns=columns)

# Generate the data
synthetic_data = generate_synthetic_data()


In [2]:
import gym
from gym import spaces

class WeaningEnv(gym.Env):
    def __init__(self, synthetic_data):
        super(WeaningEnv, self).__init__()
        self.synthetic_data = synthetic_data
        self.current_step = 0

        # Action space: adjustments to ventilator settings
        self.action_space = spaces.Box(low=-1, high=1, shape=(4,), dtype=np.float32)

        # Observation space: patient state
        self.observation_space = spaces.Box(low=0, high=100, shape=(10,), dtype=np.float32)
        self.state = None
        self.done = False
        self.reset()

    def reset(self):
        self.current_step = 0
        patient = self.synthetic_data.sample().iloc[0]
        self.state = np.array([patient['VT (mL/kg)'], patient['RR (breaths/min)'], patient['PIP (cm H2O)'], 
                               patient['PEEP (cm H2O)'], patient['FiO2 (%)'], patient['SpO2 (%)'], 
                               patient['Compliance (mL/cm H2O)'], patient['Resistance (cm H2O/L/s)'], 
                               patient['PS (cm H2O)'], patient['Age (days)']])
        self.done = False
        return self.state
    
    def step(self, action):
        self.state[:4] += action  # Apply action to ventilator settings
        self.state = np.clip(self.state, 0, 100)  # Keep within bounds
        self.current_step += 1

        # Reward is negative of the weaning time + penalty for unsafe conditions
        reward = -1  # Time penalty
        if self.state[5] > 95:  # Example: SpO2 too high, indicating over-oxygenation
            reward -= 10
        if np.all(self.state[:6] < 10):  # Example condition for successful weaning
            reward += 100
            self.done = True

        if self.current_step >= 50:  # End episode after fixed number of steps
            self.done = True

        return self.state, reward, self.done, {}

    def render(self, mode='human'):
        print(f"Step: {self.current_step}, State: {self.state}")


In [3]:
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)  # Replay buffer
        self.gamma = 0.95  # Discount factor
        self.epsilon = 1.0  # Exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self._build_model()  # Q-network
        self.target_model = self._build_model()  # Target Q-network
        self.update_target_model()

    def _build_model(self):
        # Neural network to approximate the Q-function
        model = tf.keras.Sequential()
        model.add(layers.Dense(24, input_dim=self.state_size, activation='relu'))
        model.add(layers.Dense(24, activation='relu'))
        model.add(layers.Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(lr=self.learning_rate))
        return model

    def update_target_model(self):
        # Copy weights from the Q-network to the target Q-network
        self.target_model.set_weights(self.model.get_weights())

    def remember(self, state, action, reward, next_state, done):
        # Store the experience in memory
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        # Epsilon-greedy action selection
        if np.random.rand() <= self.epsilon:
            return np.random.uniform(-1, 1, self.action_size)  # Random action for exploration
        act_values = self.model.predict(state)
        return act_values[0]  # Best action according to the Q-network

    def replay(self, batch_size):
        # Sample a minibatch from memory and train the Q-network
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target += self.gamma * np.amax(self.target_model.predict(next_state)[0])
            target_f = self.model.predict(state)
            target_f[0] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def load(self, name):
        # Load a pre-trained model
        self.model.load_weights(name)

    def save(self, name):
        # Save the trained model
        self.model.save_weights(name)


: 

In [4]:
# Initialize the environment and the DQN agent
env = WeaningEnv(synthetic_data)
state_size = env.observation_space.shape[0]
action_size = env.action_space.shape[0]
agent = DQNAgent(state_size, action_size)
episodes = 1000  # Number of episodes to train the agent
batch_size = 32  # Size of the minibatch for replay

# Training loop
for e in range(episodes):
    state = env.reset()  # Reset the environment at the beginning of each episode
    state = np.reshape(state, [1, state_size])  # Reshape for the neural network

    for time in range(500):  # Maximum number of steps per episode
        action = agent.act(state)  # Get action from the agent
        next_state, reward, done, _ = env.step(action)  # Take action in the environment
        next_state = np.reshape(next_state, [1, state_size])  # Reshape for the neural network
        agent.remember(state, action, reward, next_state, done)  # Store the experience in memory
        state = next_state  # Update the state to the next state

        if done:  # If the episode is finished
            agent.update_target_model()  # Update the target model
            print(f"Episode: {e}/{episodes}, Time: {time}, Epsilon: {agent.epsilon:.2}")
            break

        if len(agent.memory) > batch_size:  # Train the agent with a minibatch if memory is sufficient
            agent.replay(batch_size)

# Save the trained model
agent.save("dqn_ventilator_model.h5")






2024-09-04 16:24:47.204576: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Episode: 0/1000, Time: 49, Epsilon: 0.92
Episode: 1/1000, Time: 49, Epsilon: 0.72
Episode: 2/1000, Time: 49, Epsilon: 0.56
Episode: 3/1000, Time: 49, Epsilon: 0.44
Episode: 4/1000, Time: 49, Epsilon: 0.34
Episode: 5/1000, Time: 49, Epsilon: 0.27
Episode: 6/1000, Time: 49, Epsilon: 0.21
Episode: 7/1000, Time: 49, Epsilon: 0.16
Episode: 8/1000, Time: 49, Epsilon: 0.13
Episode: 9/1000, Time: 49, Epsilon: 0.1
Episode: 10/1000, Time: 49, Epsilon: 0.079
Episode: 11/1000, Time: 49, Epsilon: 0.062
Episode: 12/1000, Time: 49, Epsilon: 0.048
Episode: 13/1000, Time: 49, Epsilon: 0.038
Episode: 14/1000, Time: 49, Epsilon: 0.029
Episode: 15/1000, Time: 49, Epsilon: 0.023
Episode: 16/1000, Time: 49, Epsilon: 0.018
Episode: 17/1000, Time: 49, Epsilon: 0.014
Episode: 18/1000, Time: 49, Epsilon: 0.011


2024-09-04 17:44:54.813383: W tensorflow/core/data/root_dataset.cc:273] Optimization loop failed: CANCELLED: Operation was cancelled


Episode: 19/1000, Time: 49, Epsilon: 0.01
Episode: 20/1000, Time: 49, Epsilon: 0.01
Episode: 21/1000, Time: 49, Epsilon: 0.01
Episode: 22/1000, Time: 49, Epsilon: 0.01
Episode: 23/1000, Time: 49, Epsilon: 0.01
