# Q Learning

## Spring mass system
Maximize the mass compression

In [13]:
%reset -f
import numpy as np
import mediapy as media
import matplotlib.pyplot as plt
from tqdm import tqdm
import mujoco
import time

# Q-learning parameters
learning_rate = 0.002
gamma = 0.99  # Discount factor
epsilon = 1.0  # Epsilon-greedy strategy (initial)
epsilon_decay = 0.995
min_epsilon = 0.01
episodes = 10000
episode_length = 10  # in seconds
timestep = 0.01  # in seconds
max_steps_per_episode = int(episode_length / timestep)

# Environment setup
actions = [-10, 0, +10]  # Force actions
action_size = len(actions)

# Initialize Q-table uniformly
state_size = 10  # Adjust based on the actual state space
Q_table = np.random.uniform(low=-1, high=1, size=(state_size, action_size))

# MuJoCo environment initialization
def init_mujoco_env():
    model = mujoco.MjModel.from_xml_path('../MujocoEnvs/springmass.xml')
    data = mujoco.MjData(model)
    renderer = mujoco.Renderer(model)
    return model, data, renderer

# Epsilon-greedy action selection
def choose_action(state):
    if np.random.rand() < epsilon:
        return np.random.randint(action_size)  # Explore
    return np.argmax(Q_table[state])  # Exploit

# Training function
def train_mujoco():
    global epsilon  # Use the global epsilon
    rewards = []
    task_values = []
    state_histories = {0: [], 1: [], 'second_last': [], 'last': []}  # For specific episodes
    model, data, renderer = init_mujoco_env()

    for episode in tqdm(range(episodes), desc="Training"):
        state = np.random.randint(state_size)  # Initialize state randomly
        total_reward = 0
        task_value = 0
        
        for step in range(max_steps_per_episode):
            action_idx = choose_action(state)
            action = actions[action_idx]
            next_state, reward, done = step_environment(data, action, timestep)  # Simulate step
            
            # TD Update
            Q_table[state, action_idx] += learning_rate * (reward + gamma * np.max(Q_table[next_state]) - Q_table[state, action_idx])
            state = next_state
            total_reward += reward
            task_value += calculate_task_variable(state)  # Compression or task variable

            if done:
                break
        
        # Store episode results
        rewards.append(total_reward)
        task_values.append(task_value)

        # Store state history for specific episodes
        if episode == 0 or episode == 1 or episode == episodes - 2 or episode == episodes - 1:
            state_histories[episode if episode < 2 else ('second_last' if episode == episodes - 2 else 'last')] = get_state_history(data)
        
        # Epsilon decay
        epsilon = max(min_epsilon, epsilon * epsilon_decay)
    
    # Render episodes
    render_episodes(model, state_histories, renderer)

    # Plot results
    plot_training_results(state_histories, rewards, task_values)

def step_environment(data, action, timestep):
    data.ctrl[0] = action  # Apply action to the actuator
    mujoco.mj_step(data, timestep)
    
    # Extract relevant state information (e.g., mass position)
    state = np.clip(int(data.qpos[0] * 10), 0, state_size - 1)  # Quantize position to a state index
    reward = -abs(data.qpos[0])  # Reward function based on compression (maximize compression)
    done = False  # Define terminal condition if required
    return state, reward, done

def calculate_task_variable(state):
    # Use compression as the task variable
    return np.random.randn()  # Replace with actual task variable

def get_state_history(data):
    # Collect state history during simulation
    return np.random.randn(max_steps_per_episode, state_size)

# Render specific episodes
def render_episodes(model, state_histories, renderer):
    cam = mujoco.MjvCamera()
    cam.type = mujoco.mjtCamera.mjCAMERA_FIXED
    cam.fixedcamid = model.camera_name2id('cam1')  # Use your camera name
    
    episode_indices = [0, 1, 'second_last', 'last']
    frames = []
    
    for episode in episode_indices:
        state_history = state_histories[episode]
        for step in range(max_steps_per_episode):
            renderer.update_scene(data, cam, mujoco.MjvOption())
            frame = renderer.render()
            timestamp = f"Episode {episode}, Time {step * timestep:.2f}s"
            media.add_text(frame, timestamp, position=(10, 20))  # Adding timestamp to video frame
            frames.append(frame)

    # Show video with timestamps
    media.show_video(frames, fps=int(1/timestep))

# Plotting training results
def plot_training_results(state_histories, rewards, task_values):
    fig, axs = plt.subplots(3, 1, figsize=(10, 10))

    # Plot state variables for specific episodes
    episode_indices = [0, 1, 'second_last', 'last']
    for i, episode in enumerate(episode_indices):
        axs[0].plot(state_histories[episode], label=f'Episode {episode}')
    axs[0].set_title('State Variables Over Episodes')
    axs[0].legend()

    # Plot cumulative rewards
    axs[1].plot(rewards)
    axs[1].set_title('Cumulative Rewards per Episode')
    axs[1].set_xlabel('Episode')
    axs[1].set_ylabel('Cumulative Reward')

    # Plot task variable values
    axs[2].plot(task_values)
    axs[2].set_title('Task Variable per Episode')
    axs[2].set_xlabel('Episode')
    axs[2].set_ylabel('Task Variable Value')

    plt.tight_layout()
    plt.show()

# Testing function
def test_mujoco():
    model, data, renderer = init_mujoco_env()
    state = np.random.randint(state_size)  # Initialize state randomly

    frames = []
    for step in range(max_steps_per_episode):
        action_idx = np.argmax(Q_table[state])
        action = actions[action_idx]
        state, reward, done = step_environment(data, action, timestep)
        renderer.update_scene(data, cam, mujoco.MjvOption())
        frame = renderer.render()
        frames.append(frame)

        if done:
            break
    
    media.show_video(frames, fps=int(1/timestep))

# Main function to run training and testing
def main():
    train_mujoco()
    test_mujoco()

if __name__ == "__main__":
    main()


ValueError: ParseXML: Error opening file '../MujocoEnvs/springmass.xml': No such file or directory