<a href="https://colab.research.google.com/github/Maxxx-VS/The-Founder/blob/master/42_3_RL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [18]:
# Установка необходимых библиотек
%%capture
!pip install "gymnasium[atari]"
!pip install autorom[accept-rom-license]

In [40]:
# Импорт необходимых библиотек + регистрация окружения
import gymnasium as gym
import ale_py
import torch
import numpy as np
import matplotlib.pyplot as plt
from IPython import display as ipythondisplay
from moviepy.video.io.ImageSequenceClip import ImageSequenceClip
gym.register_envs(ale_py)

In [41]:
# Создаем окружение с игрой
env = gym.make("ALE/Enduro-v5", render_mode='rgb_array')

In [42]:
# Проверяем кол-во действий
n_state = env.observation_space.shape[0]
print(f'Размерности пространств наблюдений и действий: {n_state}')

Размерности пространств наблюдений и действий: 210


### Алгоритм случайного поиска

In [45]:
def random_search(env, episodes=1000, max_steps=1000):
    best_reward = -np.inf
    best_action_sequence = []
    best_frames = []

    for episode in range(episodes):
        state = env.reset()
        total_reward = 0
        action_sequence = []
        frames = []

        for step in range(max_steps):
            action = env.action_space.sample()  # Случайное действие
            action_sequence.append(action)
            state, reward, done, truncated, info = env.step(action)
            total_reward += reward
            frames.append(env.render())

            if done or truncated:
                break

        if total_reward > best_reward:
            best_reward = total_reward
            best_action_sequence = action_sequence
            best_frames = frames

    return best_reward, best_action_sequence, best_frames

best_reward, best_action_sequence, best_frames = random_search(env)
print(f"Лучшая награда за случайный поиск: {best_reward}")

Лучшая награда за случайный поиск: 8.0


### Алгоритм восхождения на вершину

In [46]:
def hill_climbing(env, episodes=1000, max_steps=1000, noise_scale=0.1):
    best_reward = -np.inf
    best_action_sequence = [env.action_space.sample() for _ in range(max_steps)]
    best_frames = []

    for episode in range(episodes):
        # Добавляем шум к лучшей последовательности действий
        action_sequence = [action + np.random.normal(0, noise_scale) for action in best_action_sequence]
        action_sequence = [np.clip(int(a), 0, env.action_space.n-1) for a in action_sequence]

        state = env.reset()
        total_reward = 0
        frames = []

        for step in range(max_steps):
            action = action_sequence[step]
            state, reward, done, truncated, info = env.step(action)
            total_reward += reward
            frames.append(env.render())

            if done or truncated:
                break

        if total_reward > best_reward:
            best_reward = total_reward
            best_action_sequence = action_sequence
            best_frames = frames

    return best_reward, best_action_sequence, best_frames

best_reward_hc, best_action_sequence_hc, best_frames_hc = hill_climbing(env)
print(f"Лучшая награда за восхождение на вершину: {best_reward_hc}")

Лучшая награда за восхождение на вершину: 8.0


In [47]:
def save_video(frames, filename, fps=30):
    clip = ImageSequenceClip(frames, fps=fps)
    clip.write_videofile(filename, fps=fps)

# Сохраняем видео для случайного поиска
save_video(best_frames, 'random_search.mp4')

# Сохраняем видео для восхождения на вершину
save_video(best_frames_hc, 'hill_climbing.mp4')

Moviepy - Building video random_search.mp4.
Moviepy - Writing video random_search.mp4





Moviepy - Done !
Moviepy - video ready random_search.mp4
Moviepy - Building video hill_climbing.mp4.
Moviepy - Writing video hill_climbing.mp4





Moviepy - Done !
Moviepy - video ready hill_climbing.mp4


In [48]:
print("АЛГОРИТМ СЛУЧАЙНОГО ПОИСКА:")
print(f"Best Reward: {best_reward}")
print(f"Best Action Sequence: {best_action_sequence}")

print("\nАЛГОРИТМ ВОСХОЖДЕНИЯ НА ВЕРШИНУ:")
print(f"Best Reward: {best_reward_hc}")
print(f"Best Action Sequence: {best_action_sequence_hc}")

АЛГОРИТМ СЛУЧАЙНОГО ПОИСКА:
Best Reward: 8.0
Best Action Sequence: [0, 2, 3, 2, 2, 0, 3, 1, 3, 2, 2, 2, 0, 1, 2, 2, 2, 0, 2, 0, 3, 2, 1, 0, 2, 2, 0, 2, 1, 3, 1, 1, 3, 1, 2, 2, 1, 1, 3, 2, 1, 3, 2, 1, 0, 1, 0, 0, 1, 3, 0, 2, 3, 2, 1, 0, 0, 2, 1, 2, 0, 0, 2, 3, 1, 2, 2, 0, 3, 0, 1, 2, 3, 0, 2, 1, 2, 3, 0, 0, 3, 3, 3, 3, 3, 2, 1, 3, 1, 0, 1, 3, 0, 1, 0, 2, 1, 2, 3, 3, 3, 1, 3, 2, 2, 2, 1, 0, 0, 0, 2, 0, 3, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 2, 1, 1, 2, 0, 2, 1, 3, 1, 1, 3, 0, 3, 3, 1, 2, 1, 3, 0, 3, 1, 1, 3, 0, 2, 2, 3, 0, 0, 2, 3, 0, 2, 0, 1, 0, 2, 0, 3, 0, 0, 3, 3, 2, 3, 0, 0, 0, 3, 0, 1, 1, 1, 2, 0, 2, 0, 1, 2, 1, 0, 1, 2, 2, 2, 2, 1, 3, 0, 3, 2, 3, 0, 1, 3, 3, 0, 1, 3, 3, 2, 0, 1, 0, 2, 0, 1, 3, 0, 1, 2, 0, 2, 0, 3, 0, 0, 2, 3, 0, 0, 1, 2, 3, 0, 0, 0, 0, 3, 2, 1, 0, 3, 3, 1, 1, 0, 1, 0, 2, 0, 0, 1, 1, 3, 3, 2, 1, 2, 1, 0, 2, 2, 2, 1, 3, 2, 3, 3, 0, 0, 1, 3, 0, 0, 1, 1, 2, 2, 3, 2, 2, 0, 1, 2, 1, 2, 0, 3, 3, 1, 2, 1, 1, 3, 2, 3, 1, 2, 2, 2, 0, 1, 2, 1, 2, 2, 3, 2, 0, 2, 2, 3, 0, 2, 0, 1, 3,

In [50]:
from IPython.display import HTML
from base64 import b64encode

def show_video(video_path, video_width=600):
    with open(video_path, "rb") as video_file:
        video_url = f"data:video/mp4;base64,{b64encode(video_file.read()).decode()}"

    html_code = f'''
    <video width={video_width} controls>
        <source src="{video_url}" type="video/mp4">
    </video>
    '''
    return HTML(html_code)

In [51]:
# Отображение видео для случайного поиска
show_video('random_search.mp4')

In [52]:
# Отображение видео для восхождения на вершину
show_video('hill_climbing.mp4')