In [4]:

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers
from collections import deque
import random

# Set random seed for reproducibility
np.random.seed(42)

# Parameters for synthetic data
num_samples = 1000

# Generate synthetic plasma data
data = {
    'plasma_density': np.random.uniform(1e20, 1e21, num_samples),  # particles/mÂ³
    'plasma_temperature': np.random.uniform(10e6, 20e6, num_samples),  # Kelvin
    'magnetic_field_strength': np.random.uniform(0.5, 5.0, num_samples),  # Tesla
    'elongation': np.random.uniform(1.0, 2.0, num_samples),  # dimensionless
    'normalized_pressure': np.random.uniform(0.1, 0.5, num_samples),  # dimensionless
    'radius': np.random.uniform(0.5, 2.0, num_samples),  # meters
    'growth_rate': np.random.uniform(0.01, 0.2, num_samples),  # dimensionless
}

# Create a DataFrame
df = pd.DataFrame(data)

# Generate stability labels based on some conditions
def label_stability(row):
    # Simple rule-based logic for stability
    if (row['plasma_density'] > 1e20 and row['plasma_temperature'] < 15e6 and
        row['magnetic_field_strength'] > 2.0 and row['elongation'] < 1.5):
        return 1  # stable
    else:
        return 0  # unstable

# Apply the labeling function
df['stability'] = df.apply(label_stability, axis=1)

# Hyperparameters for the DRL model
STATE_SIZE = 7  # Number of features
ACTION_SIZE = 2  # Example: increase (0), decrease (1)
MEMORY_SIZE = 2000
BATCH_SIZE = 32
GAMMA = 0.95  # Discount factor
EPSILON = 1.0  # Exploration rate
EPSILON_DECAY = 0.995
EPSILON_MIN = 0.01
LEARNING_RATE = 0.001

# Experience Replay Memory
class ReplayMemory:
    def __init__(self):
        self.memory = deque(maxlen=MEMORY_SIZE)

    def add(self, experience):
        self.memory.append(experience)

    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)

# Define the DRL Model
class PlasmaGuardAI:
    def __init__(self):
        self.memory = ReplayMemory()
        self.model = self._build_model()

    def _build_model(self):
        model = tf.keras.Sequential()
        model.add(layers.Dense(24, input_dim=STATE_SIZE, activation='relu'))
        model.add(layers.Dense(24, activation='relu'))
        model.add(layers.Dense(ACTION_SIZE, activation='linear'))
        model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE))
        return model

    def act(self, state):
        if np.random.rand() <= EPSILON:
            return random.randrange(ACTION_SIZE)  # Explore
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])  # Exploit

    def remember(self, state, action, reward, next_state, done):
        self.memory.add((state, action, reward, next_state, done))

    def replay(self):
        if len(self.memory.memory) < BATCH_SIZE:
            return
        minibatch = self.memory.sample(BATCH_SIZE)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target += GAMMA * np.amax(self.model.predict(next_state)[0])
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)

# Training the Model
if __name__ == "__main__":
    env = PlasmaGuardAI()  # Initialize the environment
    episodes = 1000

    for e in range(episodes):
        # Sample a random index for state from the synthetic dataset
        idx = np.random.randint(0, num_samples)
        state = df.iloc[idx][:-1].values.reshape(1, STATE_SIZE)  # Features as state
        total_reward = 0

        for time in range(200):
            action = env.act(state)  # Choose action
            next_idx = np.random.randint(0, num_samples)  # Simulate next state
            next_state = df.iloc[next_idx][:-1].values.reshape(1, STATE_SIZE)  # Features as next state

            # Define a simple reward structure
            if df.iloc[next_idx]['stability'] == 1 and action == 0:
                reward = 1  # Reward for maintaining stability
            elif df.iloc[next_idx]['stability'] == 0 and action == 1:
                reward = 1  # Reward for acting to stabilize
            else:
                reward = -1  # Penalty for incorrect action

            done = time == 199  # Terminal condition
            env.remember(state, action, reward, next_state, done)
            state = next_state
            total_reward += reward

            if done:
                print(f"Episode: {e + 1}/{episodes}, Total Reward: {total_reward}, Epsilon: {EPSILON:.2f}")
                break

            env.replay()
            if EPSILON > EPSILON_MIN:
                EPSILON *= EPSILON_DECAY


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.1.2 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "C:\Users\Nassir\AppData\Roaming\Python\Python311\site-packages\ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "C:\Users\Nassir\AppData\Roaming\Python\Python311\site-packages\traitlets\config\application.py", line 1075, in launch_instance
    app.start()
  File "C:\Users\Nassir\AppData\Roaming\Python\Python311\site-packages\ipykernel\kernelapp.py", line 739, in start
    self.io

AttributeError: _ARRAY_API not found

SystemError: initialization of _pywrap_checkpoint_reader raised unreported exception