<a href="https://colab.research.google.com/github/RizanSM/zero_shot_llms_in_HIL_RL/blob/main/02_reacher_env/01_generating_trajectories/01_generating_trajectories_reacher.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install gymnasium[mujoco] mujoco stable-baselines3 -q

In [None]:
# Import the necessary libraries
import gymnasium as gym
import numpy as np
import pandas as pd
import pickle
import os
import matplotlib.pyplot as plt

In [None]:
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.monitor import Monitor
from google.colab import data_table
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

In [None]:
# Path to save the training logs
log_dir = "/content/drive/MyDrive/data3_rp1/0_log_dir/0_ppo_reacher_initial"         # Update directory location 1
os.makedirs(log_dir, exist_ok=True)

In [None]:
# Create the environment
env = gym.make("Reacher-v5")
env = Monitor(env, log_dir)

In [None]:
# Step 1.2: Initial Observation
# Print out a sample observation to see what the agent receives at the start.
obs = env.reset()
print("Initial Observation: ", obs)

In [None]:
# Step 1.3: Implement PPO with the Highway environment
# Initialize the PPO algorithm with the chosen environment.
# Create the PPO model with the Highway environment
model = PPO('MlpPolicy', env, verbose=1, tensorboard_log=log_dir)

In [None]:
# Step 1.4: Train the model
# 10,000 timesteps for initial training
model.learn(total_timesteps=10000)

In [None]:
log_path = os.path.join(log_dir, "monitor.csv")
df = pd.read_csv(log_path, skiprows=1)
df.rename(columns={"index": "episode", "r": "reward", "l": "length", "t": "time_step"}, inplace=True)

In [None]:
# Step 1.5: Save the trained model to Google Drive
model.save('/content/drive/MyDrive/data3_rp1/1_trained_models/0_ppo_reacher_intial_training')         # Update directory location 2

In [None]:
# Step 1.6: Load the saved PPO model from Google Drive
model = PPO.load('/content/drive/MyDrive/data3_rp1/1_trained_models/0_ppo_reacher_intial_training')   # Update directory location 3

# TRAJECTORY GENERATION AND COLLECTION

In [None]:
def generate_reacher_trajectories(env, model, num_episodes):
    """
    Generates trajectories for the Reacher-v5 environment.

    Parameters:
        env (gym.Env): The wrapped Gymnasium environment.
        model (stable_baselines3.PPO): The trained PPO model.
        num_episodes (int): Number of episodes to run.

    Returns:
        pd.DataFrame: A DataFrame containing trajectory data.
    """
    trajectory_data = []

    for episode in range(num_episodes):
        state, _ = env.reset()
        timestep = 0
        done = False

        while not done:
            # Get action from the trained PPO model
            action, _ = model.predict(state)

            next_state, reward, terminated, truncated, info = env.step(action)

            # Store the transition
            trajectory_data.append({
                "Episode": episode + 1,
                "Timestep": timestep,
                "State": state,
                "Action": action,
                "Reward": reward,
                "Next State": next_state
            })

            # Update state and timestep
            state = next_state
            timestep += 1
            done = terminated or truncated

    return pd.DataFrame(trajectory_data)

In [None]:
trajectory_df = generate_reacher_trajectories(env, model, num_episodes=100)

In [None]:
data_table.enable_dataframe_formatter()
data_table.DataTable(trajectory_df)

In [None]:
# Check the data type of each column
print(type(trajectory_df['Episode'][0]))
print(type(trajectory_df['Timestep'][0]))
print(type(trajectory_df['State'][0]))
print(type(trajectory_df['Action'][0]))
print(type(trajectory_df['Reward'][0]))
print(type(trajectory_df['Next State'][0]))

In [None]:
# Define the path to save the DataFrame (adjust the path as necessary)
trajectory_df_path = '/content/drive/MyDrive/data3_rp1/2_trajectories/0_initial_training/0_initial_trajectory_reacher_df.csv'     # Update directory location 4
# Save the DataFrame to Google Drive as a CSV file
trajectory_df.to_csv(trajectory_df_path, index=False)

In [None]:
# Save the processed dataframe as a pickle file
trajectory_df.to_pickle('/content/drive/MyDrive/data3_rp1/2_trajectories/0_initial_training/0_initial_trajectory_reacher_df.pkl')    # Update directory location 5