# 1. Import delle librerie

In [1]:
import sys
sys.path.append('/Users/macstudio/Desktop/ProgettoMario2/yolov5')  # Path alla tua cartella yolov5
import os
import torch
import gym_super_mario_bros
from nes_py.wrappers import JoypadSpace
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT
from gym.wrappers import GrayScaleObservation
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import VecFrameStack, DummyVecEnv
from stable_baselines3.common.callbacks import BaseCallback
from matplotlib import pyplot as plt
from models.common import DetectMultiBackend  # Caricatore YOLOv5
from utils.general import non_max_suppression  # Per la gestione delle predizioni
from utils.torch_utils import select_device  # Per scegliere il dispositivo
import numpy as np
import cv2
import gym

# 2. Configurazione del dispositivo e YOLO

In [2]:
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
if device.type == 'mps':
    print("La GPU è configurata tramite Metal Performance Shaders (MPS).")
else:
    print("La GPU non è configurata, utilizzo CPU.")

print(f"Usando il dispositivo: {device}")

yolo_model_path = '/Users/macstudio/Desktop/ProgettoMario2/yolov5/runs/train/exp2/weights/best.pt'
yolo_model = DetectMultiBackend(yolo_model_path, device=device)
print(f"YOLOv5 model loaded from: {yolo_model_path}")

La GPU è configurata tramite Metal Performance Shaders (MPS).
Usando il dispositivo: mps


Fusing layers... 
YOLOv5l summary: 267 layers, 46156743 parameters, 0 gradients, 107.8 GFLOPs


YOLOv5 model loaded from: /Users/macstudio/Desktop/ProgettoMario2/yolov5/runs/train/exp2/weights/best.pt


# 3. Definizione dell'ambiente

In [3]:
def create_env():
    env = gym_super_mario_bros.make('SuperMarioBros-v0')
    env = JoypadSpace(env, SIMPLE_MOVEMENT)
    env = GrayScaleObservation(env, keep_dim=True)
    return env

# Crea l'ambiente
base_env = create_env()

# 4. Feature Extraction con YOLO

In [4]:
def extract_yolo_features(state, resize_dim=(640, 640)):
    if len(state.shape) == 4:
        state = state[0]

    if state.shape[2] == 1:
        state = np.repeat(state, 3, axis=2)
    elif state.shape[2] == 4:
        state = state[:, :, :3]

    resized_state = cv2.resize(state, resize_dim)
    state_tensor = torch.tensor(resized_state, dtype=torch.float32).permute(2, 0, 1).unsqueeze(0).to(device) / 255.0

    with torch.no_grad():
        pred = yolo_model(state_tensor)
        results = non_max_suppression(pred, conf_thres=0.25, iou_thres=0.45)

    feature_vector = torch.zeros((len(yolo_model.names),), dtype=torch.float32)
    for result in results:
        if result is not None:
            for *box, conf, class_id in result:
                feature_vector[int(class_id)] += 1

    return feature_vector

# 5. Custom Environment Wrapper

In [6]:
class CustomEnvWrapper(gym.Wrapper):
    def __init__(self, env, yolo_model):
        super().__init__(env)
        self.env = env
        self.yolo_model = yolo_model

    def reset(self):
        state = self.env.reset()
        features = extract_yolo_features(state)
        combined_state = self._combine_state(state, features)
        return combined_state

    def step(self, action):
        state, reward, done, info = self.env.step(action)
        features = extract_yolo_features(state)
        combined_state = self._combine_state(state, features)
        return combined_state, float(reward), bool(done), info

    def _combine_state(self, state, features):
        state_flat = state.flatten().astype(np.float32)
        features_np = features.cpu().numpy().astype(np.float32)
        combined = np.concatenate((state_flat, features_np), axis=0)
        return combined

    @property
    def observation_space(self):
        original_shape = self.env.observation_space.shape
        flattened_size = np.prod(original_shape)
        yolo_features_size = len(self.yolo_model.names)
        combined_size = flattened_size + yolo_features_size
        return gym.spaces.Box(low=0.0, high=255.0, shape=(combined_size,), dtype=np.float32)

# Avvolgi l'ambiente
wrapped_env = DummyVecEnv([lambda: CustomEnvWrapper(base_env, yolo_model)])
wrapped_env = VecFrameStack(wrapped_env, n_stack=4, channels_order='last')

# 6. Callback per il salvataggio

In [7]:
class TrainAndLoggingCallback(BaseCallback):
    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path

    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, f'best_model_{self.n_calls}')
            self.model.save(model_path)
        return True

CHECKPOINT_DIR = './train/'
LOG_DIR = './logs/'

callback = TrainAndLoggingCallback(check_freq=10000, save_path=CHECKPOINT_DIR)

# 7. Addestramento del modello

In [7]:
model = PPO(
    'MlpPolicy',
    wrapped_env,
    verbose=1,
    tensorboard_log=LOG_DIR,
    learning_rate=0.000003,
    n_steps=1024,
    device=device
)

model.learn(total_timesteps=10000000, callback=callback)
model.save('thisisatestmodel_with_yolo')

Using mps device
Logging to ./logs/PPO_9
-----------------------------
| time/              |      |
|    fps             | 33   |
|    iterations      | 1    |
|    time_elapsed    | 30   |
|    total_timesteps | 1024 |
-----------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 31            |
|    iterations           | 2             |
|    time_elapsed         | 64            |
|    total_timesteps      | 2048          |
| train/                  |               |
|    approx_kl            | 3.5125297e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.95         |
|    explained_variance   | 0.0202        |
|    learning_rate        | 3e-06         |
|    loss                 | 96.1          |
|    n_updates            | 10            |
|    policy_gradient_loss | -0.000304     |
|    value_loss           | 223           |
-

# 8. Testing del modello

In [None]:
model = PPO.load('./train/best_model_1000000')

# Associa wrapped_env a env per il testing
env = wrapped_env

state = env.reset()
import warnings
warnings.filterwarnings("ignore")  # Sopprime i warning

episode = 0
victories = 0
last_world = 1
last_stage = 1

# Test del modello
while True:
    action, _ = model.predict(state)
    state, reward, done, info = env.step(action)
    env.render()

    if done:
        episode += 1
        current_world = info[0].get("world", 1)
        current_stage = info[0].get("stage", 1)

        if (current_world > last_world) or (current_stage > last_stage):
            victories += 1
            print(f"Vittoria nell'episodio {episode}! Ora Mario è nel mondo {current_world}-{current_stage}.")

        last_world = current_world
        last_stage = current_stage
        state = env.reset()

2025-01-03 16:30:28.238 Python[93961:136573041] ApplePersistenceIgnoreState: Existing state will not be touched. New state will be written to /var/folders/lc/zlvl7yzd6zn7xdjhl8dh61_w0000gn/T/org.python.python.savedState


KeyboardInterrupt: 

: 