Project file using Wright Fischer model. All the necessary imports are done below.

In [None]:
!git clone https://github.com/CBad1024/evo_dm_wright.git

import os
os.chdir('evo_dm_wright')

!pip install -e .

from evodm.dpsolve import dp_env, backwards_induction, value_iteration, policy_iteration
from evodm.evol_game import define_mira_landscapes, evol_env
from evodm.learner import DrugSelector, hyperparameters, practice
import numpy as np
import pandas as pd
from mira_mdp import mira_env, get_sequences


Main Code here

In [None]:
def main():
    """
    Main function to solve the MIRA MDP and evaluate the policies.
    """
    print("Initializing MIRA environments (DP and Simulation)...")
    envdp, env, learner_env, learner_env_naive = mira_env() # Removed naive_learner_env from unpack

    # --- Solve the MDP using different algorithms ---
    print("\nSolving MDP with Backwards Induction (Finite Horizon)...")
    policy_bi, V_bi = backwards_induction(envdp, num_steps=16)
    print("Policy shape from Backwards Induction:", policy_bi.shape)

    print("\nSolving MDP with Value Iteration...")
    policy_vi, V_vi = value_iteration(envdp)
    print("Policy shape from Value Iteration:", policy_vi)

    print("\nSolving MDP with Policy Iteration...")
    policy_pi, V_pi = policy_iteration(envdp)
    print("Policy shape from Policy Iteration:", policy_pi)

    # --- RL Agent Training  ---
    print("\nUsing non-naive RL to solve system:")

    results = practice(learner_env, naive=False, standard_practice=True, prev_action= True, num_episodes=50)
    agent_NN = results[1]
    policy_NN = agent_NN.compute_implied_policy(update = True)
    print("policy shape under non-naive RL: ", policy_NN)

    print("\nUsing naive RL agent to solve system...")
    results = practice(learner_env_naive, naive=True, standard_practice=True, prev_action = True)
    agent_N = results[1]
    policy_N = agent_N.compute_implied_policy(update = True)
    print("policy shape under naive RL: ", policy_N)




    # --- Evaluate the policies by simulation ---
    print("\nSimulating policy from Backwards Induction...")
    bi_results = get_sequences(policy_bi, env, num_episodes=5, episode_length=envdp.nS, finite_horizon=True)
    print("Backwards Induction Results (first 5 rows):")
    print(bi_results.to_string())
    print("\nAverage fitness under BI policy:", bi_results['fitness'].mean())

    print("\nSimulating policy from Value Iteration...")
    vi_results = get_sequences(policy_vi, env, num_episodes=5, episode_length=envdp.nS, finite_horizon=False)
    print("Value Iteration Results (first 5 rows):")
    print(vi_results.to_string())
    print("\nAverage fitness under VI policy:", vi_results['fitness'].mean())

    print("\nSimulating policy from Policy Iteration...")
    pi_results = get_sequences(policy_pi, env, num_episodes=5, episode_length=envdp.nS, finite_horizon=False)
    print("Policy Iteration Results (first 5 rows):")
    print(pi_results.to_string())
    print("\nAverage fitness under PI policy:", pi_results['fitness'].mean())


    # print("\nSimulating policy from Non-naive RL...")
    # RL_NN_results = get_sequences(policy_NN, env, num_episodes = 5, episode_length=envdp.nS, finite_horizon=True)
    # print("RL NN results:")
    # print(RL_NN_results.to_string())
    # print("\nAverage fitness under RL_NN policy:", RL_NN_results['fitness'].mean())
    #
    # print("\nSimulating policy from naive RL...")
    # RL_N_results = get_sequences(policy_N, env, num_episodes = 5, episode_length=envdp.nS, finite_horizon=True)
    # print("RL NN results:")
    # print(RL_N_results.to_string())
    # print("\nAverage fitness under RL_NN policy:", RL_N_results['fitness'].mean())



Now call the main code for execution

In [None]:
main()