In [7]:
import torch
import torch.optim as optim
import mujoco 
from actor_critic_rnn import ActorCriticRNN, update, compute_reward  # Importa il tuo modello e la funzione update
from ConvAutoencoder import Encoder, Decoder

# Configura la simulazione MuJoCo
XML_FILE_PATH = "/Users/edoardozappia/Desktop/Tesi_Magistrale/irregular_shape_2D.xml"

# Parametri di training
latent_dim = 8192
action_dim = 8192
hidden_dim = 128
learning_rate = 1e-3
num_episodes = 100
max_steps_per_episode = 200
episode_termination_threshold = 0.1  # Soglia di tolleranza

# Inizializza il modello Actor-Critic
actor_critic_rnn = ActorCriticRNN(latent_dim, action_dim, hidden_dim)
optimizer = optim.Adam(actor_critic_rnn.parameters(), lr=learning_rate)

# Encoder pre-addestrato
encoder = Encoder()
encoder.load_state_dict(torch.load('encoder.pth'))
encoder.eval()

# Decoder pre-addestrato
decoder = Decoder()
decoder.load_state_dict(torch.load('decoder.pth'))
decoder.eval()  

# Carica il modello MuJoCo
model = mujoco.MjModel.from_xml_path(XML_FILE_PATH)
data = mujoco.MjData(model)

# Configura la telecamera
camera = mujoco.MjvCamera()
camera.type = mujoco.mjtCamera.mjCAMERA_FREE
camera.lookat = [0, 0, 0]
camera.distance = 2.0
camera.azimuth = 90
camera.elevation = -90

# Renderer per visualizzare la finestra
scene = mujoco.MjvScene(model, maxgeom=1000)
context = mujoco.MjrContext(model, mujoco.mjtFontScale.mjFONTSCALE_150)

# Parametri del movimento sinusoidale
freq_x = 0.5  # Frequenza della sinusoide lungo x
freq_y = 0.3  # Frequenza della sinusoide lungo y
freq_phi = 0.1  # Frequenza della sinusoide per la rotazione
amp_x = 5000.0  # Ampiezza della sinusoide lungo x
amp_y = 5000.0  # Ampiezza della sinusoide lungo y
amp_phi = 1.0  # Ampiezza della sinusoide per la rotazione
R = 0.2  # Raggio massimo della circonferenza
noise = 0.0  # Rumore bianco gaussiano

def get_frame():
    rgb_buffer = mujoco.mjr_readPixels(width=64, height=64, depth_buffer=None, context=context)
    frame = torch.tensor(rgb_buffer).float() / 255.0
    frame = frame.permute(2, 0, 1).unsqueeze(0)  # [C, H, W] -> [1, C, H, W]
    return frame

# Training loop basato su episodi
for episode in range(num_episodes):
    mujoco.mj_resetData(model, data)
    latent_state = encoder(get_frame())
    latent_state = latent_state.view(1, 1, -1)  # [batch_size=1, seq_len=1, latent_dim]

    episode_reward = 0.0
    for step in range(max_steps_per_episode):
        # Forze sinusoidali
        t = step * 0.01
        force_x = amp_x * torch.sin(2 * torch.pi * freq_x * t) + noise * torch.randn(1).item()
        force_y = amp_y * torch.cos(2 * torch.pi * freq_y * t) + noise * torch.randn(1).item()
        torque_phi = amp_phi * torch.sin(2 * torch.pi * freq_phi * t) + noise * torch.randn(1).item()

        # Controlla che l'oggetto rimanga dentro la circonferenza
        distance = (data.qpos[0]**2 + data.qpos[1]**2).sqrt()
        if distance >= R:
            direction_x = -data.qpos[0] / distance
            direction_y = -data.qpos[1] / distance
            force_x += direction_x * 1000.0
            force_y += direction_y * 1000.0

        # Applica le forze e i momenti
        data.qfrc_applied[0] = force_x
        data.qfrc_applied[1] = force_y
        data.qfrc_applied[2] = torque_phi

        # Avanza la simulazione
        mujoco.mj_step(model, data)

        # Ottieni il frame successivo
        real_next_frame = get_frame()
        real_next_latent_state = encoder(real_next_frame).view(1, 1, -1)

        # Predizione del Critic e Actor
        action, state_value = actor_critic_rnn(latent_state)
        predicted_state = latent_state + action

        # Calcola il reward
        reward = -torch.norm(predicted_state - real_next_latent_state, dim=2).item()
        episode_reward += reward

        # Termina l'episodio se lo stato predetto diverge troppo dal reale
        if torch.norm(predicted_state - real_next_latent_state).item() > episode_termination_threshold:
            print(f"Episode {episode} terminated early at step {step}.")
            break

        # Aggiorna il modello
        loss, actor_loss, critic_loss, _ = update(
            actor_critic_rnn, optimizer, latent_state, real_next_latent_state, gamma=0.99
        )

        # Imposta lo stato successivo
        latent_state = real_next_latent_state

    print(f"Episode {episode + 1}/{num_episodes}, Total Reward: {episode_reward:.4f}")

# Salva il modello allenato
torch.save(actor_critic_rnn.state_dict(), "actor_critic_rnn_trained.pth")

print("Training completed.")


NameError: name 'MjSim' is not defined

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from ConvAutoencoder import Encoder, Decoder
from actor_critic_rnn import ActorCriticRNN, update, compute_reward
import matplotlib.pyplot as plt
import torch.nn.functional as F