In [4]:
import gdown
# Download the trained expert model
!gdown --id 1tqMYda2ltKC_ehC72SX4AiLVCY-N1alo

# Download the trained imitation model
!gdown --id 1VuVeMVWr4uviZx5_a8JacNx2oJdN36_l

Downloading...
From: https://drive.google.com/uc?id=1tqMYda2ltKC_ehC72SX4AiLVCY-N1alo
To: /content/reinforce_cartpole.h5
100% 37.8k/37.8k [00:00<00:00, 66.3MB/s]
Downloading...
From: https://drive.google.com/uc?id=1VuVeMVWr4uviZx5_a8JacNx2oJdN36_l
To: /content/cartpole_imitation_model.h5
100% 84.4k/84.4k [00:00<00:00, 47.0MB/s]


In [5]:
import numpy as np
import tensorflow as tf
import gymnasium as gym

# Load the trained imitation model
model = tf.keras.models.load_model("cartpole_imitation_model.h5")

# Create the CartPole environment
env = gym.make("CartPole-v1")

num_episodes = 5  # Number of episodes to test
total_rewards = []

for episode in range(num_episodes):
    state, _ = env.reset()
    done = False
    episode_reward = 0

    while not done:
        # Prepare input for the model
        state_input = np.array(state).reshape(1, -1)

        # Inference: Predict the best action
        action_probs = model.predict(state_input, verbose=0)
        action = np.argmax(action_probs)

        # Take action in the environment
        state, reward, terminated, truncated, _ = env.step(action)
        done = terminated or truncated

        episode_reward += reward

        env.render()  # Show the environment (disable for faster inference)

    total_rewards.append(episode_reward)
    print(f"Episode {episode + 1}: Reward = {episode_reward}")

env.close()

# Calculate and display the average reward
avg_reward = np.mean(total_rewards)
print(f"Imitation Model Average Reward over {num_episodes} episodes: {avg_reward:.2f}")


  gym.logger.warn(


Episode 1: Reward = 500.0
Episode 2: Reward = 500.0
Episode 3: Reward = 500.0
Episode 4: Reward = 500.0
Episode 5: Reward = 500.0
Imitation Model Average Reward over 5 episodes: 500.00


In [6]:
import numpy as np
import tensorflow as tf
import gymnasium as gym
import cv2

def record_inference_video(model_path, video_filename, num_episodes=1, fps=30):
    """ Records a video of the model playing CartPole and saves it as an MP4 file. """
    # Load the trained model
    model = tf.keras.models.load_model(model_path)

    # Create the environment
    env = gym.make("CartPole-v1", render_mode="rgb_array")

    frame_width = 600
    frame_height = 400

    # Set up the video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(video_filename, fourcc, fps, (frame_width, frame_height))

    for episode in range(num_episodes):
        state, _ = env.reset()
        done = False

        while not done:
            # Render frame from the environment
            frame = env.render()  # Get the frame as a NumPy array
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)  # Convert to OpenCV format
            frame = cv2.resize(frame, (frame_width, frame_height))  # Resize
            out.write(frame)  # Save the frame to the video file

            # Prepare model input
            state_input = np.array(state).reshape(1, -1)
            action_probs = model.predict(state_input, verbose=0)
            action = np.argmax(action_probs)

            # Take action in the environment
            state, _, terminated, truncated, _ = env.step(action)
            done = terminated or truncated

    out.release()
    env.close()
    print(f"Video saved as: {video_filename}")

record_inference_video("cartpole_imitation_model.h5", "imitation_model_video.mp4")




Video saved as: imitation_model_video.mp4
