<a href="https://colab.research.google.com/github/RizanSM/zero_shot_llms_in_HIL_RL/blob/main/02_reacher_env/05_LLM_DIRECT/02_Generate_Trajectories_for_Model_Testing_LLM_DIRECT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install the required libraries in your Google Colab environment
!pip install gymnasium[mujoco] mujoco stable-baselines3 -q

In [None]:
# Import the necessary libraries
import gymnasium as gym
import numpy as np
import pandas as pd
import pickle
import os
import matplotlib.pyplot as plt

In [None]:
# THE ENVIRONMENT
# Step 1.1: Choose the Environment
# Initialize the environment.
env = gym.make('Reacher-v5')

In [None]:
from stable_baselines3 import PPO
from google.colab import drive
from google.colab import data_table

# Mount Google Drive
drive.mount('/content/drive')

In [None]:
# Step A.6.2: Load the all the saved PPO model
model = PPO.load('/content/drive/MyDrive/data3_rp1/1_trained_models/4_ppo_reacher_llmf_8')           # Update directory location 1

Trajectory Collection

In [None]:
def generate_reacher_trajectories(env, model, num_episodes,seed):
    """
    Generates trajectories for the Reacher-v5 environment.

    Parameters:
        env (gym.Env): The wrapped Gymnasium environment.
        model (stable_baselines3.PPO): The trained PPO model.
        num_episodes (int): Number of episodes to run.

    Returns:
        pd.DataFrame: A DataFrame containing trajectory data.
    """
    trajectory_data = []

    for episode in range(num_episodes):
        state, _ = env.reset(seed=seed)
        timestep = 0
        done = False

        while not done:
            # Get action from the trained PPO model
            action, _ = model.predict(state, deterministic = True)

            next_state, reward, terminated, truncated, info = env.step(action)

            # Store the transition
            trajectory_data.append({
                "Episode": episode + 1,
                "Timestep": timestep,
                "State": state,
                "Action": action,
                "Reward": reward,
                "Next State": next_state
            })

            # Update state and timestep
            state = next_state
            timestep += 1
            done = terminated or truncated

    return pd.DataFrame(trajectory_data)

0. GENERATING TRAJECTORIES FOR TESTING (LLM FEEDBACK DIRECT)

First LLM feedback direct data frame

In [None]:
# Collect data for 100 episodes
trajectory_df_1 = generate_reacher_trajectories(env, model, num_episodes=100,seed=2)

In [None]:
data_table.enable_dataframe_formatter()
data_table.DataTable(trajectory_df_1)

In [None]:
# Save the dataframe as a pickle file
trajectory_df_1.to_pickle('/content/drive/MyDrive/data3_rp1/3_test_trajectories/4_llm_d/1_llm_d_reacher_df_8.pkl')        # Update directory location 2


Second LLM feedback direct data frame


In [None]:
# Collect data for 100 episodes
trajectory_df_2 = generate_reacher_trajectories(env, model, num_episodes=100,seed=10)

In [None]:
data_table.enable_dataframe_formatter()
data_table.DataTable(trajectory_df_2)

In [None]:
# Save the dataframe as a pickle file
trajectory_df_2.to_pickle('/content/drive/MyDrive/data3_rp1/3_test_trajectories/4_llm_d/2_llm_d_reacher_df_8.pkl')       # Update directory location 3

Third LLM feedback direct data frame

In [None]:
# Collect data for 100 episodes
trajectory_df_3 = generate_reacher_trajectories(env, model, num_episodes=100,seed=6)

In [None]:
data_table.enable_dataframe_formatter()
data_table.DataTable(trajectory_df_3)

In [None]:
# Save the dataframe as a pickle file
trajectory_df_3.to_pickle('/content/drive/MyDrive/data3_rp1/3_test_trajectories/4_llm_d/3_llm_d_reacher_df_8.pkl')        # Update directory location 4

Fourth LLM feedback direct data frame

In [None]:
# Collect data for 100 episodes
trajectory_df_4 = generate_reacher_trajectories(env, model, num_episodes=100,seed=20)

In [None]:
data_table.enable_dataframe_formatter()
data_table.DataTable(trajectory_df_4)

In [None]:
# Save the dataframe as a pickle file
trajectory_df_4.to_pickle('/content/drive/MyDrive/data3_rp1/3_test_trajectories/4_llm_d/4_llm_d_reacher_df_8.pkl')        # Update directory location 5

Fifth LLM feedback direct data frame

In [None]:
# Collect data for 100 episodes
trajectory_df_5 = generate_reacher_trajectories(env, model, num_episodes=100,seed=34)

In [None]:
data_table.enable_dataframe_formatter()
data_table.DataTable(trajectory_df_5)

In [None]:
# Save the dataframe as a pickle file
trajectory_df_5.to_pickle('/content/drive/MyDrive/data3_rp1/3_test_trajectories/4_llm_d/5_llm_d_reacher_df_8.pkl')         # Update directory location 6