Project file using Wright Fischer model

In [1]:
!git clone https://github.com/CBad1024/evo_dm_wright.git

import os
os.chdir('evo_dm_wright')

!pip install -e .

from evodm.dpsolve import dp_env, backwards_induction, value_iteration, policy_iteration
from evodm.evol_game import define_mira_landscapes, evol_env
from evodm.learner import DrugSelector, hyperparameters, practice
import numpy as np
import pandas as pd

fatal: destination path 'evo_dm_wright' already exists and is not an empty directory.
Obtaining file:///Users/chaaranathb/Developer/GitRepo/evo_dm_wright/evodm/notebooks/exploratory/evo_dm_wright
  Preparing metadata (setup.py) ... [?25ldone
Installing collected packages: evodm
  Attempting uninstall: evodm
    Found existing installation: evodm 1.1.2
    Not uninstalling evodm at /Users/chaaranathb/Developer/GitRepo/evo_dm_wright, outside environment /Users/chaaranathb/Developer/GitRepo/evo_dm_wright/venv
    Can't uninstall 'evodm'. No files were found to uninstall.
[33m  DEPRECATION: Legacy editable install of evodm==1.1.2 from file:///Users/chaaranathb/Developer/GitRepo/evo_dm_wright/evodm/notebooks/exploratory/evo_dm_wright (setup.py develop) is deprecated. pip 25.1 will enforce this behaviour change. A possible replacement is to add a pyproject.toml or enable --use-pep517, and use setuptools >= 64. If the resulting installation is not behaving as expected, try using --c

Definitions go here

In [2]:
def mira_env():
    """Initializes the MDP environment and a simulation environment."""
    drugs = define_mira_landscapes()
    # The dp_env is for solving the MDP
    envdp = dp_env(N=4, num_drugs=15, drugs=drugs, sigma=0.5)
    # The evol_env is for simulating policies
    env = evol_env(N=4, drugs=drugs, num_drugs=15, normalize_drugs=False,
                   train_input='fitness')
    # The DrugSelector agent is for the RL algorithm. It requires an hp object.
    hp = hyperparameters()
    hp.N = 4 # Ensure N is set for hyperparameters
    hp.NUM_DRUGS = 15 # Ensure NUM_DRUGS is set for hyperparameters
    learner_env = DrugSelector(hp=hp, drugs=drugs)
    learner_env_naive = DrugSelector(hp=hp, drugs=drugs)
    return envdp, env, learner_env, learner_env_naive # # Removed for simplicity, can be added back if needed

#generate drug sequences using policies from backwards induction,
#value iteration, or policy iteration
# added here
def get_sequences(policy, env, num_episodes=10, episode_length=20, finite_horizon=True):
    """
    Simulates the environment for a number of episodes using a given policy.

    Args:
        policy (np.array): The policy to follow.
        env (evol_env): The simulation environment.
        num_episodes (int): The number of simulation episodes.
        episode_length (int): The length of each episode.
        finite_horizon (bool): Whether the policy is for a finite horizon problem.
                               If True, policy is indexed by time step.

    Returns:
        pd.DataFrame: A dataframe containing the simulation history.
    """
    ep_number_list = []
    opt_drug_list = []
    time_step_list = []
    fitness_list = []

    for i in range(num_episodes):
        env.reset()
        for j in range(episode_length):
            current_state_index = np.argmax(env.state_vector)
            if finite_horizon:
                # For FiniteHorizon, policy is shaped (time_step, state)
                action_opt = policy[j, current_state_index]
            else:
                # For Value/PolicyIteration, policy is shaped (state,)
                action_opt = policy[current_state_index]

            # evol_env now expects 0-indexed actions
            env.action = int(action_opt)
            env.step()

            #save the optimal drug, time step, and episode number
            opt_drug_list.append(env.action)
            time_step_list.append(j)
            ep_number_list.append(i)
            fitness_list.append(np.mean(env.fitness))

    results_df = pd.DataFrame({
        'episode': ep_number_list,
        'time_step': time_step_list,
        'drug': opt_drug_list,
        'fitness': fitness_list
    })
    return results_df



Main Code here

In [3]:
def main():
    """
    Main function to solve the MIRA MDP and evaluate the policies.
    """
    print("Initializing MIRA environments (DP and Simulation)...")
    envdp, env, learner_env, learner_env_naive = mira_env() # Removed naive_learner_env from unpack

    # --- Solve the MDP using different algorithms ---
    print("\nSolving MDP with Backwards Induction (Finite Horizon)...")
    policy_bi, V_bi = backwards_induction(envdp, num_steps=16)
    print("Policy shape from Backwards Induction:", policy_bi.shape)

    print("\nSolving MDP with Value Iteration...")
    policy_vi, V_vi = value_iteration(envdp)
    print("Policy shape from Value Iteration:", policy_vi)

    print("\nSolving MDP with Policy Iteration...")
    policy_pi, V_pi = policy_iteration(envdp)
    print("Policy shape from Policy Iteration:", policy_pi)

    # --- RL Agent Training  ---
    # print("\nUsing non-naive RL to solve system:")
    # rewards_NN, agent_NN = practice(learner_env, naive=False, standard_practice=True, prev_action= True, compute_implied_policy_bool=True)
    # policy_NN = agent_NN.compute_implied_policy(update = True)
    # print("policy shape under non-naive RL: ", policy_NN)
    #
    # print("\nUsing naive RL agent to solve system...")
    # reward, agent_N = practice(learner_env_naive, naive=True, standard_practice=True, prev_action = True, compute_implied_policy_bool=True)
    # policy_N = agent_N.compute_implied_policy(update = True)
    # print("policy shape under naive RL: ", policy_N)




    # --- Evaluate the policies by simulation ---
    print("\nSimulating policy from Backwards Induction...")
    bi_results = get_sequences(policy_bi, env, num_episodes=5, episode_length=envdp.nS, finite_horizon=True)
    print("Backwards Induction Results (first 5 rows):")
    print(bi_results.to_string())
    print("\nAverage fitness under BI policy:", bi_results['fitness'].mean())

    print("\nSimulating policy from Value Iteration...")
    vi_results = get_sequences(policy_vi, env, num_episodes=5, episode_length=envdp.nS, finite_horizon=False)
    print("Value Iteration Results (first 5 rows):")
    print(vi_results.to_string())
    print("\nAverage fitness under VI policy:", vi_results['fitness'].mean())

    print("\nSimulating policy from Policy Iteration...")
    pi_results = get_sequences(policy_pi, env, num_episodes=5, episode_length=envdp.nS, finite_horizon=False)
    print("Policy Iteration Results (first 5 rows):")
    print(pi_results.to_string())
    print("\nAverage fitness under PI policy:", pi_results['fitness'].mean())


    # print("\nSimulating policy from Non-naive RL...")
    # RL_NN_results = get_sequences(policy_NN, env, num_episodes = 5, episode_length=envdp.nS, finite_horizon=True)
    # print("RL NN results:")
    # print(RL_NN_results.to_string())
    # print("\nAverage fitness under RL_NN policy:", RL_NN_results['fitness'].mean())
    #
    # print("\nSimulating policy from naive RL...")
    # RL_N_results = get_sequences(policy_N, env, num_episodes = 5, episode_length=envdp.nS, finite_horizon=True)
    # print("RL NN results:")
    # print(RL_N_results.to_string())
    # print("\nAverage fitness under RL_NN policy:", RL_N_results['fitness'].mean())



In [4]:
main()

Initializing MIRA environments (DP and Simulation)...


  self._set_intXint(row, col, x.flat[0])
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  if (matrix >= 0).all():



Solving MDP with Backwards Induction (Finite Horizon)...
Policy shape from Backwards Induction: (16, 16)

Solving MDP with Value Iteration...
Policy shape from Value Iteration: (3, 3, 10, 10, 3, 3, 10, 0, 3, 3, 0, 2, 11, 3, 3, 13)

Solving MDP with Policy Iteration...
Policy shape from Policy Iteration: (3, 3, 10, 10, 3, 3, 10, 0, 3, 3, 0, 2, 11, 3, 3, 13)

Simulating policy from Backwards Induction...
Backwards Induction Results (first 5 rows):
    episode  time_step  drug  fitness
0         0          0     3    0.160
1         0          1     3    0.185
2         0          2    10    2.504
3         0          3    10    3.002
4         0          4     1    2.047
5         0          5     3    0.092
6         0          6    10    2.437
7         0          7     0    2.024
8         0          8     3    0.225
9         0          9     3    1.653
10        0         10     0    2.033
11        0         11     2    2.230
12        0         12    11    2.018
13        0      