In [None]:
import numpy as np
import tensorflow as tf
from TrainAgent import Trainer
from PPO import PPOAgent
from environment import env
from map_generator import Map_Generator as Generator

def main():
    # Set TensorFlow to use float64
    tf.keras.backend.set_floatx('float64')
    
    # Define environment creator function
    def create_env():
        map_generator = Generator(np.random.randint(0, 100000))
        return env(map_generator.ref_map(), action_setting=1, pher_condition=False)
    
    # Define agent parameters
    agent_params = {
        'actor_lr': 1e-4,
        'critic_lr': 5e-4,
        'entropy_beta': 0.01,
        'clip_ratio': 0.2,
        'gamma': 0.99,
        'lmbda': 0.95,
        'batch_size': 64,
        'n_epochs': 3
    }
    
    # Define training parameters
    training_params = {
        'max_episodes': 1000,
        'max_steps': 100,
        'save_freq': 50,
        'save_dir': './models',
        'save_maps': True,
        'maps_dir': './maps'
    }
    
    # Create and run the trainer
    trainer = Trainer(create_env, agent_params, training_params)
    metrics = trainer.train()
    
    # Evaluate the agent after training
    eval_metrics = trainer.evaluate(num_episodes=10)
    
    print(f"Training complete! Final metrics:")
    print(f"Average reward: {np.mean(metrics['episode_rewards'][-100:]):.2f}")
    print(f"Average episode length: {np.mean(metrics['episode_lengths'][-100:]):.2f}")
    print(f"Average similarity: {np.mean(metrics['episode_similarities'][-100:]):.2f}")
    
    print(f"\nEvaluation metrics:")
    print(f"Average reward: {np.mean(eval_metrics['episode_rewards']):.2f}")
    print(f"Average episode length: {np.mean(eval_metrics['episode_lengths']):.2f}")
    print(f"Average similarity: {np.mean(eval_metrics['episode_similarities']):.2f}")

if __name__ == "__main__":
    main()