In [None]:
import gymnasium as gym
import source.config as config
from source.algorithms import FarmAgentNeuralREINFORCEAdvantage
import source.farm_env # Import necessary to register the gym environment

In [None]:
env = gym.make(id='FarmEnv-v0',
                initial_budget = config.INITIAL_BUDGET,
                sheep_cost = config.SHEEP_COST,
                wheat_cost = config.WHEAT_COST,
                wool_price = config.WOOL_PRICE,
                wheat_price = config.WHEAT_PRICE,
                max_years = config.MAX_YEARS,
                wool_fixed_cost = config.WOOL_FIXED_COST,
                storm_probability = config.STORM_PROBABILITY,
                incest_penalty = config.INCEST_PENALTY,
                reward_std = config.SIGMA
                )

In [None]:
learning_rate = 0.001
policy_learning_rate = 1e-4
value_learning_rate = 1e-4
n_episodes = 500_000
start_epsilon = 1.0
epsilon_decay = start_epsilon / (n_episodes)  # reduce the exploration over time
final_epsilon = 0.1

REINFORCE_agent = FarmAgentNeuralREINFORCEAdvantage(
    environment=env, policy_learning_rate=policy_learning_rate, value_learning_rate=value_learning_rate, epsilon=start_epsilon, epsilon_decay=epsilon_decay, final_epsilon=final_epsilon, gamma=.999,
    policy_net_weights_path='agent_models/REINFORCENeuralAdvantage/s6-penalty2.5-edecay1-g0.999/policy_net_weights-500k.pth',
    value_net_weights_path='agent_models//REINFORCENeuralAdvantage/s6-penalty2.5-edecay1-g0.999/value_net_weights-500k.pth'
)

In [None]:
state, info = env.reset()
for _ in range(30):
    options = env.unwrapped.actions_available
    action = REINFORCE_agent.policy(state, greedy=True)
    
    s_prime, reward, terminated, truncated, info = env.step(action)
    print(f'Action: {action}', f'State: {state}', f'Reward: {round(reward,3)}', f'Terminated: {terminated}', truncated, info)

    if terminated or truncated:
        print(f'Final state: {s_prime}')
        print("============End of episode============")
        state, info = env.reset()
        break
    else:
        state = s_prime
env.close()