## Final_DQN_system.ipynb

In [None]:
# Basal Ganglia DQN Simulation
# --------------------------------------------
# This notebook models the Basal Ganglia neural pathway as a Markov Decision Process (MDP)
# and uses a Deep Q-Network (DQN) agent to learn optimal state transitions.
# Rewards are modulated by neurochemical levels: Dopamine, Acetylcholine, and Levodopa.

import numpy as np  # For numerical operations and random value generation
import gym  # OpenAI Gym is used to define and simulate the environment
import tensorflow as tf  # TensorFlow is used to build and train the neural network (Q-network)

# ----------------------
# BasalGangliaMDP Class
# ----------------------
class BasalGangliaMDP(gym.Env):
    """
    Custom OpenAI Gym environment that models the basal ganglia circuitry using six states
    (brain regions), two possible actions (activation/inhibition), and transition probabilities
    guided by neurochemical dynamics.
    """
    def __init__(self):
        super().__init__()  # Call the base class constructor (gym.Env)

        # Define the key brain regions involved in the basal ganglia loop
        self.states = ['Cortex', 'Striatum', 'GPe', 'STN', 'GPi', 'Thalamus']  # 6 states representing brain areas

        # Define the two types of actions representing neural activation or inhibition
        self.actions = ["activation", "inhibition"]  # Agent can choose to activate or inhibit
        self.action_space = gym.spaces.Discrete(len(self.actions))  # Action space: Discrete(2)

        # Define the transitions from one brain region to another based on chosen action
        self.transition_probs = {
            "Cortex": {
                "activation": {"Striatum": 1.0},  # Always go to Striatum if activation chosen
                "inhibition": {"Striatum": 1.0}   # Same for inhibition (simplified model)
            },
            "Striatum": {
                "inhibition": {'GPe': 0.5, 'GPi': 0.5},  # 50% chance to go to either GPe or GPi
            },
            "GPe": {
                "inhibition": {"STN": 1.0},  # Deterministic transition to STN
            },
            "STN": {
                "activation": {"GPi": 1.0},  # Activation leads to GPi
            },
            "GPi": {
                "inhibition": {"Thalamus": 1.0},  # Inhibition leads to Thalamus
            },
            "Thalamus": {
                "activation": {"Cortex": 1.0},  # Loops back to Cortex
            }
        }

        # Define base rewards for certain transitions (will be adjusted dynamically later)
        # The reward is given when a specific action leads from one state to another
        self.rewards = {
            ("Cortex", "activation", "Striatum") : 1.0,
            ("Cortex", "inhibition", "Striatum") : 1.0,
            ("Striatum", "inhibition", "GPe") : 1.0,
            ("Striatum", "inhibition", "GPi") : 1.0,
            ("GPe", "inhibition", "STN") : 1.0,
            ("STN", "activation", "GPi") : 1.0,
            ("GPi", "inhibition", "Thalamus") : 1.0,
            ("Thalamus", "activation", "Cortex") : 1.0
        }

        # Initial state when environment starts
        self.state = 'Cortex'

    def calculate_rewards(self, state, action, next_state, dopamine, acetyl, levodopa):
        """
        Reward shaping function influenced by neurochemical levels.
        Adjusts the base reward based on dopamine, acetylcholine, and levodopa ranges.

        Inputs:
        - state: current brain region (string)
        - action: 'activation' or 'inhibition'
        - next_state: resulting brain region
        - dopamine, acetyl, levodopa: chemical concentrations affecting behavior

        Output:
        - numerical reward after applying weight according to condition
        """
        try:
            # Condition 1: Low dopamine, high acetylcholine, low levodopa
            # Represents a simulated Parkinson's state
            if(dopamine <= 39.6 and acetyl > 2.5 and levodopa < 100):
                # Example: Cortex activates Striatum leads to negative outcome
                if(state == "Cortex" and next_state == "Striatum"):
                    return self.rewards.get((state, action, next_state), 0) * (-100 if action == "activation" else 250)
                elif(state=="Striatum" and next_state=="GPe"):
                    return self.rewards.get((state, action, next_state), 0) * 400
                elif(state=="Striatum" and next_state=="GPi"):
                    return self.rewards.get((state, action, next_state), 0) * -250
                elif(state=="GPe" and next_state=="STN"):
                    return self.rewards.get((state, action, next_state), 0) * 500
                elif(state=="STN" and next_state=="GPi"):
                    return self.rewards.get((state, action, next_state), 0) * 600
                elif(state=="GPi" and next_state=="Thalamus"):
                    return self.rewards.get((state, action, next_state), 0) * 800

            # Condition 2: Low dopamine + high acetyl + moderate levodopa (treatment starts)
            elif(dopamine <= 39.6 and acetyl > 2.5 and levodopa >= 100 and levodopa <=250):
                if(state == "Cortex" and next_state == "Striatum"):
                    return self.rewards.get((state, action, next_state), 0) * (200 if action == "activation" else -150)
                elif(state=="Striatum" and next_state=="GPe"):
                    return self.rewards.get((state, action, next_state), 0) * -200
                elif(state=="Striatum" and next_state=="GPi"):
                    return self.rewards.get((state, action, next_state), 0) * 450
                elif(state=="GPe" and next_state=="STN"):
                    return self.rewards.get((state, action, next_state), 0) * -300
                elif(state=="STN" and next_state=="GPi"):
                    return self.rewards.get((state, action, next_state), 0) * -350
                elif(state=="GPi" and next_state=="Thalamus"):
                    return self.rewards.get((state, action, next_state), 0) * 1000

            # Condition 3: Normal dopamine and acetyl levels
            elif(dopamine > 39.6 and dopamine <= 195.8 and acetyl >=0.5 and acetyl <= 2.5):
                if(state == "Cortex" and next_state == "Striatum"):
                    return self.rewards.get((state, action, next_state), 0) * (200 if action == "activation" else -150)
                elif(state=="Striatum" and next_state=="GPe"):
                    return self.rewards.get((state, action, next_state), 0) * -200
                elif(state=="Striatum" and next_state=="GPi"):
                    return self.rewards.get((state, action, next_state), 0) * 550
                elif(state=="GPe" and next_state=="STN"):
                    return self.rewards.get((state, action, next_state), 0) * -300
                elif(state=="STN" and next_state=="GPi"):
                    return self.rewards.get((state, action, next_state), 0) * -350
                elif(state=="GPi" and next_state=="Thalamus"):
                    return self.rewards.get((state, action, next_state), 0) * 1000

        except Exception as e:
            raise Exception("Error in reward calculation: " + str(e))


In [None]:
# ------------------
# Deep Q-Network Agent
# ------------------

class DQNAgent:
    def __init__(self, state_space_size, action_space_size, learning_rate=0.001, discount_factor=0.9, exploration_prob=0.9):
        # Dimensions of the state and action spaces
        self.state_space_size = state_space_size
        self.action_space_size = action_space_size

        # Hyperparameters
        self.learning_rate = learning_rate              # Learning rate for the optimizer
        self.discount_factor = discount_factor          # Gamma: importance of future rewards
        self.exploration_prob = exploration_prob        # Epsilon: probability of choosing a random action

        # Main Q-network: predicts Q-values for each action
        self.q_network = self.build_q_network()

        # Target Q-network: used for stable training
        self.target_q_network = self.build_q_network()
        self.target_q_network.set_weights(self.q_network.get_weights())  # Sync weights initially

        # Optimizer for training the model
        self.optimizer = tf.keras.optimizers.Adam(learning_rate)

        # Replay memory buffer to store past experiences
        self.memory = []

    def build_q_network(self):
        # Build a simple feed-forward neural network to approximate Q-values
        model = tf.keras.Sequential([
            tf.keras.layers.Dense(64, activation='relu', input_shape=(self.state_space_size,), dtype=tf.float32),
            tf.keras.layers.Dense(64, activation='relu'),
            tf.keras.layers.Dense(self.action_space_size)  # Output: one Q-value for each action
        ])
        model.compile(optimizer='adam', loss='mse')
        return model

    def select_action(self, state):
        # Epsilon-greedy strategy: choose a random action with probability epsilon
        if np.random.rand() < self.exploration_prob:
            return np.random.choice(self.action_space_size)  # Explore
        else:
            # Convert the state to one-hot encoding
            state_one_hot = np.zeros(self.state_space_size)
            state_one_hot[state] = 1

            # Predict Q-values using the model
            q_values = self.q_network.predict(state_one_hot.reshape(1, -1))
            return np.argmax(q_values[0])  # Exploit: pick the action with max Q-value

    def update_q_network(self, batch_size, states=None):
        # Only update if there's enough data in memory
        if len(self.memory) < batch_size:
            return

        # Sample a batch of experiences from memory
        samples = np.random.choice(len(self.memory), batch_size, replace=False)
        batch = [self.memory[i] for i in samples]
        states, actions, rewards, next_states, dones = zip(*batch)

        # Convert state indices to one-hot encoded vectors
        states = np.eye(self.state_space_size)[np.array(states)]

        # Predict Q-values using both the main and target networks
        q_values = self.q_network.predict(states)
        next_q_values = self.target_q_network.predict(states)

        # Apply the Bellman update equation to each experience in the batch
        for i in range(batch_size):
            target = rewards[i] + self.discount_factor * np.max(next_q_values[i]) * (1 - dones[i])
            q_values[i, actions[i]] = target

        # Train the main Q-network with updated Q-values
        self.q_network.fit(states, q_values, verbose=0)

    def update_target_network(self):
        # Copy weights from the main network to the target network
        self.target_q_network.set_weights(self.q_network.get_weights())


# -------------------
# Generate Inputs (dopamine, acetylcholine, levodopa)
# -------------------
# These simulate real neurochemical variations for training

# Dopamine levels (low and high ranges)
dopamine_values = np.concatenate([
    np.random.uniform(0, 39.5, 5000),           # Low dopamine range (possibly Parkinsonian)
    np.random.uniform(39.5, 195.8, 10000)       # Normal/High range
])

# Acetylcholine levels (high and normal ranges)
acetyl_values = np.concatenate([
    np.random.uniform(2.5, 5, 5000),            # Elevated acetylcholine
    np.random.uniform(0.5, 2.5, 10000)          # Normal range
])

# Levodopa levels (low and therapeutic ranges)
levodopa_values = np.concatenate([
    np.random.uniform(0, 100, 5000),            # No or low Levodopa
    np.random.uniform(100, 250, 10000)          # Typical therapeutic range
])


In [None]:
# -------------------
# Train DQN Agent
# -------------------

# Create an instance of the custom BasalGangliaMDP environment
env = BasalGangliaMDP()

# Instantiate the Deep Q-Network (DQN) agent
agent = DQNAgent(
    state_space_size=len(env.states),           # Number of unique brain regions (states)
    action_space_size=env.action_space.n        # Number of discrete actions (activation/inhibition)
)

# Number of episodes (episodes = full runs through a brain loop)
num_episodes = 15000

# Batch size used for training the neural network using replay buffer
batch_size = 32

# Loop over all episodes
for episode in range(num_episodes):
    state_episode = []        # To store the sequence of visited states for context
    total_reward = 0          # Accumulator for total reward earned in this episode

    # Sample neurochemical levels for this episode
    dopamine_value = dopamine_values[episode]
    acetyl_value = acetyl_values[episode]
    levodopa_value = levodopa_values[episode]

    # Reset environment to initial state ('Cortex')
    state = env.reset()
    state_episode.append(state)  # Track initial state
    print(f"\nEpisode {episode + 1}:")

    while True:
        # Convert the current state name to its index (needed for one-hot encoding)
        state_index = env.states.index(state)

        # Select an action using epsilon-greedy strategy (explore or exploit)
        action = agent.select_action(state_index)

        # Some actions (like "activation") may not be allowed from certain states.
        # Keep re-sampling until a valid action is chosen.
        while env.actions[action] not in env.transition_probs[state]:
            action = agent.select_action(state_index)

        # Perform the action and receive the next state, reward, and done flag
        next_state, reward, done, _ = env.step(
            env.actions[action],
            dopamine_value,
            acetyl_value,
            levodopa_value
        )

        # Log the transition in the console
        print(f"Transition: {env.states[state_index]} -> {next_state}, Reward: {reward}")

        # Convert the next state into an index
        next_state_index = env.states.index(next_state)

        # Store the experience tuple in replay memory
        agent.memory.append((state_index, action, reward, next_state_index, done))

        # After at least two steps in the episode, update Q-network
        if len(state_episode) >= 2:
            agent.update_q_network(batch_size, states=state_episode)

        # Accumulate the reward
        if reward is not None:
            total_reward += reward

        # Move to the next state
        state = next_state

        # If episode ends (loop reached 'Thalamus'), exit the loop
        if done:
            break

        # Maintain a rolling window of the last 10 visited states for learning context
        if len(state_episode) < 10:
            state_episode.append(state)
        else:
            state_episode.pop(0)
            state_episode.append(state)

    # Every 10 episodes, sync the target network with the Q-network (stabilizes training)
    if episode % 10 == 0:
        agent.update_target_network()

    # Print cumulative reward for this episode
    print(f"Episode: {episode + 1}, Total Reward: {total_reward}")


## arm_N_model_connect.py

In [None]:
import serial          # For serial communication
import time            # For delays
import os              # For file path and existence checking
import sys             # To exit the program
import struct          # (Not used here, can be removed if unnecessary)

on = "1"  # Default string to write to serial initially (not used in current logic)

# ------------------------
# Helper function to read values from a file
# ------------------------
def fileread(line):
    """
    Writes an initial message into a file and waits until a response file is available.
    Then, reads comma-separated values from that file and returns them as a list.
    """
    downloads_path = r'C:\Users\amrit\Downloads'

    # Create or overwrite a file named "_init_roboarm.txt" to indicate startup
    file1_path = os.path.join(downloads_path, '_init_roboarm.txt')
    with open(file1_path, 'w') as file:
        file.write(line)

    # Path to the file where the control values are expected
    file_path = os.path.join(downloads_path, 'roboarm.txt')

    # Wait until this file appears (could be written by another program)
    while not os.path.exists(file_path):
        time.sleep(3)

    # Once file is available, read its contents as comma-separated values
    with open(file_path, 'r') as file:
        content = [i for i in file.read().split(",")]

    return content  # Returns list of strings, e.g. ['1.23', '0.45', '-0.67', 'done']

# ------------------------
# Main Serial Communication Loop
# ------------------------
try:
    # Open the serial port (update 'COM6' if using a different port)
    ser = serial.Serial('COM6', 115200)

    # Allow time for serial port to properly initialize
    time.sleep(2)

    # Run loop until task ends or user interrupts
    while True:
        if ser.is_open:
            ser.flush()  # Clears serial buffer to avoid stale data

            # Read a line from serial input (from microcontroller)
            data = ser.readline()

            try:
                # Decode the incoming data from bytes to string
                line = data.decode('utf-8').strip()
                print("Received:", line)

                # Check for specific signals from the Arduino/microcontroller
                if line == "Robo arm ready!":
                    # If the robot is ready, send the first command from file
                    ser.write(fileread(line)[0].encode())
                    print("Data Sent")

                elif line == "2":
                    ser.write(fileread(line)[1].encode())

                elif line == "-0.5528708847336269":
                    ser.write(fileread(line)[2].encode())

                elif line == "0.1485990549217123":
                    ser.write(fileread(line)[3].encode())

                elif line == "Task Ended :(":
                    # Terminate the program gracefully
                    print("Received 'Task Ended :('. Ending the program.")
                    sys.exit()

            except UnicodeDecodeError:
                # If decoding fails (corrupt or binary data), print raw bytes
                print("Received (raw):", data)

        else:
            print("Serial port is not open.")
            break  # Exit loop if serial port is closed

# Handle exceptions gracefully
except serial.SerialException as e:
    print("Serial Error:", e)
except KeyboardInterrupt:
    print("Program terminated by user.")

# Ensure port is closed properly
finally:
    if ser.is_open:
        ser.close()



##  Project Overview

This project simulates the **Basal Ganglia neural circuitry** using a **Markov Decision Process (MDP)**, trains a **Deep Q-Network (DQN)** agent to learn optimal neural transitions, and eventually **interacts with an Arduino-controlled robotic arm** based on learned behavior.

---

## MODULE 1: Basal Ganglia MDP Simulation

### What it models:

The **BasalGangliaMDP** class defines a simplified brain model with 6 neural regions and transition paths modulated by actions (`activation`, `inhibition`) and **neurochemical signals**.

### States:

* `'Cortex'`
* `'Striatum'`
* `'GPe'` (Globus Pallidus externa)
* `'STN'` (Subthalamic Nucleus)
* `'GPi'` (Globus Pallidus interna)
* `'Thalamus'`

### Actions:

* `"activation"`
* `"inhibition"`

### Transitions:

Defined probabilistically (some deterministic), e.g.,

* Cortex → Striatum on any action.
* Striatum → GPe/GPi (inhibition, 50-50 chance).
* GPi → Thalamus (inhibition).
* Thalamus → Cortex (activation, closes the loop).

### Neurochemical Reward Modulation:

The reward for each transition is adjusted based on the simulated levels of:

* **Dopamine** (low → Parkinsonian)
* **Acetylcholine**
* **Levodopa** (treatment)

> For example, low dopamine + high acetylcholine gives **very negative rewards** for "activation" of Cortex → Striatum.

---

## MODULE 2: Deep Q-Network Agent (DQN)

### DQNAgent Class:

* Uses a simple neural network to estimate Q-values.
* Supports:

  * **Epsilon-greedy exploration**
  * **Replay memory**
  * **Target network for stability**
  * **One-hot encoded states**

### Neural Net Architecture:

```text
Input: One-hot encoded state (length 6)
Hidden: 64 → 64 ReLU
Output: Q-values for each of the 2 actions
```

### Training:

* Stores transitions `(state, action, reward, next_state, done)` in memory.
* Periodically samples a batch and updates Q-network using **Bellman equation**.

---

## MODULE 3: Training Loop

### Inputs Simulated:

* 15,000 episodes, each with:

  * A random dopamine, acetylcholine, and levodopa level (sampled from predefined distributions).

### Training Flow:

1. **Reset** to `'Cortex'`.
2. **Loop** until reaching `'Thalamus'`:

   * Choose action using `select_action()`.
   * Perform transition using `env.step()`.
   * Get neurochemically-modulated reward.
   * Store experience and update Q-network (if enough history).
3. Every 10 episodes: sync target network.
4. Print total reward at end of episode.

---

## MODULE 4: Serial Communication with Robo Arm

### Goal:

This part listens to a **serial port (COM6)** connected to a **microcontroller/Arduino**, receives signals, and sends back control commands based on pre-generated outputs.

### File Interaction:

* Writes `"Robo arm ready!"` status to `"_init_roboarm.txt"` in `Downloads`.
* Waits for `roboarm.txt` to appear in Downloads.
* Reads values from `roboarm.txt`, expecting 4 comma-separated float values (possibly robotic joint angles or actions).
* Sends specific values back to the robot when it receives matching signals.

### Flow:

```text
1. Waits for serial data (e.g., "Robo arm ready!", "2", etc.)
2. Matches received signal to a value index.
3. Reads value from roboarm.txt and sends it over serial.
4. Terminates cleanly on "Task Ended :(" signal.
```

---

## Full Flow Diagram Summary

```text
[Dopamine/Acetyl/Levodopa Inputs] ───────┐
                                        ▼
  [BasalGangliaMDP Environment] ──> [Reward Calculation]
                                        │
                                        ▼
  [DQN Agent] ──────> [Action Selection (ε-greedy)]
                          │
                          ▼
  [State Transition + Experience Storage] ──> [Q-Network Update (Batch)]
                          │
                          ▼
                    Repeat till done (reaches Thalamus)
                                        │
                                        ▼
                Trained model informs control values
                                        │
                                        ▼
             [File: roboarm.txt] ←──── [Serial Port Reads]
                                        │
                                        ▼
               [Arduino Receives and Acts on Data]
```

---

## What’s Unique About This Project?

* Combines **neuroscience modeling** with **reinforcement learning**.
* Adjusts learning behavior based on **biological chemical concentrations**.
* Interfaces with **hardware (robot arm)** via **serial communication**.
* Helps simulate disorders like **Parkinson’s Disease** and see how medication (Levodopa) affects neural decision-making paths.


