<a href="https://colab.research.google.com/github/Helena26-ai/Rainforcement_project/blob/main/beakout.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install gym[atari,accept-rom-license] ale-py autorom

INFO: pip is looking at multiple versions of gym[accept-rom-license,atari] to determine which version is compatible with other requirements. This could take a while.
Collecting gym[accept-rom-license,atari]
  Using cached gym-0.26.2-py3-none-any.whl
Collecting ale-py
  Using cached ale_py-0.8.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.1 kB)
Using cached ale_py-0.8.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)
Installing collected packages: gym, ale-py
  Attempting uninstall: gym
    Found existing installation: gym 0.25.2
    Uninstalling gym-0.25.2:
      Successfully uninstalled gym-0.25.2
  Attempting uninstall: ale-py
    Found existing installation: ale-py 0.10.1
    Uninstalling ale-py-0.10.1:
      Successfully uninstalled ale-py-0.10.1
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
dopamine-rl 

In [None]:
!AutoROM --accept-license

AutoROM will download the Atari 2600 ROMs.
They will be installed to:
	/usr/local/lib/python3.11/dist-packages/AutoROM/roms

Existing ROMs will be overwritten.
Installed /usr/local/lib/python3.11/dist-packages/AutoROM/roms/adventure.bin
Installed /usr/local/lib/python3.11/dist-packages/AutoROM/roms/air_raid.bin
Installed /usr/local/lib/python3.11/dist-packages/AutoROM/roms/alien.bin
Installed /usr/local/lib/python3.11/dist-packages/AutoROM/roms/amidar.bin
Installed /usr/local/lib/python3.11/dist-packages/AutoROM/roms/assault.bin
Installed /usr/local/lib/python3.11/dist-packages/AutoROM/roms/asterix.bin
Installed /usr/local/lib/python3.11/dist-packages/AutoROM/roms/asteroids.bin
Installed /usr/local/lib/python3.11/dist-packages/AutoROM/roms/atlantis.bin
Installed /usr/local/lib/python3.11/dist-packages/AutoROM/roms/atlantis2.bin
Installed /usr/local/lib/python3.11/dist-packages/AutoROM/roms/backgammon.bin
Installed /usr/local/lib/python3.11/dist-packages/AutoROM/roms/bank_heist.bin
Inst

In [None]:
import gym
import cv2
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import random
from collections import deque
import matplotlib.pyplot as plt
import imageio

# --- Preprocessing funkcji ---
def preprocess_frame(frame):
    # Zmiana rozdzielczości i normalizacja, ale bez zmiany na szaro-skalowy
    frame = cv2.resize(frame, (84, 84), interpolation=cv2.INTER_AREA)  # resize do 84x84
    return frame / 255.0  # Normalizujemy do zakresu 0-1

def stack_frames(stacked_frames, frame, is_new_episode):
    frame = preprocess_frame(frame)
    if is_new_episode:
        stacked_frames = deque([frame] * 4, maxlen=4)
    else:
        stacked_frames.append(frame)

    # Łączenie 4 klatek w jeden tensor z 12 kanałami (4 * 3 = 12 kanałów)
    stacked_state = np.concatenate(stacked_frames, axis=2)  # Połącz wzdłuż ostatniej osi (kanały)
    stacked_state = np.transpose(stacked_state, (2, 0, 1))  # Zmieniamy oś, aby rozmiar był (12, 84, 84)
    return stacked_state, stacked_frames

# --- Sieć neuronowa ---
class DQN(nn.Module):
    def __init__(self, action_size):
        super(DQN, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(12, 32, 8, stride=4),  # 12 kanałów (4 klatki * 3 kanały)
            nn.ReLU(),
            nn.Conv2d(32, 64, 4, stride=2),
            nn.ReLU(),
            nn.Conv2d(64, 64, 3, stride=1),
            nn.ReLU()
        )
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(7*7*64, 512),
            nn.ReLU(),
            nn.Linear(512, action_size)
        )

    def forward(self, x):
        x = self.conv(x)
        return self.fc(x)

# --- Agent DQN ---
class DQNAgent:
    def __init__(self, action_size):
        self.action_size = action_size
        self.memory = deque(maxlen=10000)
        self.gamma = 0.99
        self.epsilon = 1.0
        self.epsilon_min = 0.1
        self.epsilon_decay = 0.995
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.model = DQN(action_size).to(self.device)  # Używamy GPU/CPU
        self.optimizer = optim.Adam(self.model.parameters(), lr=0.00025)
        self.criterion = nn.MSELoss()

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        state = torch.tensor(state, dtype=torch.float32).unsqueeze(0).to(self.device)
        with torch.no_grad():
            q_values = self.model(state)
        return torch.argmax(q_values).item()

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def replay(self, batch_size=32):
        if len(self.memory) < batch_size:
            return
        minibatch = random.sample(self.memory, batch_size)
        states, actions, rewards, next_states, dones = zip(*minibatch)
        states = torch.tensor(np.array(states), dtype=torch.float32).to(self.device)
        next_states = torch.tensor(np.array(next_states), dtype=torch.float32).to(self.device)
        actions = torch.tensor(actions).unsqueeze(1).to(self.device)
        rewards = torch.tensor(rewards).to(self.device)
        dones = torch.tensor(dones, dtype=torch.float32).to(self.device)

        q_values = self.model(states).gather(1, actions).squeeze()
        next_q_values = self.model(next_states).max(1)[0]
        target = rewards + self.gamma * next_q_values * (1 - dones)
        loss = self.criterion(q_values, target.detach())

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

# --- Trening ---
def train_dqn(env_name="ALE/Breakout-v5", episodes=100):
    env = gym.make(env_name)
    agent = DQNAgent(env.action_space.n)
    scores = []
    stacked_frames = deque([np.zeros((84, 84, 3), dtype=np.float32)] * 4, maxlen=4)

    for e in range(episodes):
        state = env.reset()[0]
        state, stacked_frames = stack_frames(stacked_frames, state, True)
        score = 0
        done = False
        while not done:
            action = agent.act(state)
            next_state, reward, done, truncated, _ = env.step(action)
            next_state, stacked_frames = stack_frames(stacked_frames, next_state, False)
            agent.remember(state, action, reward, next_state, done)
            state = next_state
            score += reward
            agent.replay()
        scores.append(score)
        print(f"Episode {e+1}/{episodes} - Score: {score} - Epsilon: {agent.epsilon:.3f}")

    # Save model
    torch.save(agent.model.state_dict(), "dqn_model.pth")

    # Plot rewards
    plt.plot(scores)
    plt.xlabel('Episode')
    plt.ylabel('Reward')
    plt.title('DQN Agent Performance')
    plt.savefig('dqn_rewards.png')
    plt.show()

    env.close()
    return scores

# --- GIF po treningu ---
def create_gif(env_name="ALE/Breakout-v5", model_path="dqn_model.pth", gif_filename="dqn_agent_play.gif"):
    env = gym.make(env_name, render_mode="rgb_array")  # Używamy render_mode="rgb_array" dla obrazów RGB
    agent = DQNAgent(env.action_space.n)
    agent.model.load_state_dict(torch.load(model_path))
    agent.model.eval()

    frames = []
    state = env.reset()[0]
    stacked_frames = deque([np.zeros((84, 84), dtype=np.float32)] * 4, maxlen=4)

    done = False
    while not done:
        state, stacked_frames = stack_frames(stacked_frames, state, True)
        action = agent.act(state)
        next_state, reward, done, truncated, _ = env.step(action)

        # Dodanie obrazu do GIF-a
        frame = next_state  # Obraz RGB
        if frame.shape[-1] == 1:  # Sprawdzenie czy obraz jest w odcieniach szarości
            frame = np.repeat(frame, 3, axis=-1)  # Przekształcenie na RGB

        frames.append(frame)  # Dodanie klatki do GIF-a

        state = next_state

    # Tworzenie GIF-a
    imageio.mimsave(gif_filename, frames, duration=0.05)
    print(f"GIF saved as {gif_filename}")
    env.close()

# --- Start ---
scores = train_dqn(episodes=2000)


Episode 1/2000 - Score: 2.0 - Epsilon: 0.435
Episode 2/2000 - Score: 0.0 - Epsilon: 0.228
Episode 3/2000 - Score: 1.0 - Epsilon: 0.106
Episode 4/2000 - Score: 1.0 - Epsilon: 0.100
Episode 5/2000 - Score: 4.0 - Epsilon: 0.100
Episode 6/2000 - Score: 0.0 - Epsilon: 0.100
Episode 7/2000 - Score: 0.0 - Epsilon: 0.100
Episode 8/2000 - Score: 1.0 - Epsilon: 0.100
Episode 9/2000 - Score: 0.0 - Epsilon: 0.100
Episode 10/2000 - Score: 2.0 - Epsilon: 0.100
Episode 11/2000 - Score: 0.0 - Epsilon: 0.100
Episode 12/2000 - Score: 2.0 - Epsilon: 0.100
Episode 13/2000 - Score: 2.0 - Epsilon: 0.100
Episode 14/2000 - Score: 0.0 - Epsilon: 0.100
Episode 15/2000 - Score: 3.0 - Epsilon: 0.100
Episode 16/2000 - Score: 2.0 - Epsilon: 0.100
Episode 17/2000 - Score: 0.0 - Epsilon: 0.100
Episode 18/2000 - Score: 3.0 - Epsilon: 0.100
Episode 19/2000 - Score: 0.0 - Epsilon: 0.100
Episode 20/2000 - Score: 2.0 - Epsilon: 0.100
Episode 21/2000 - Score: 1.0 - Epsilon: 0.100
Episode 22/2000 - Score: 3.0 - Epsilon: 0.1

In [None]:
create_gif(gif_filename="dqn_agent_play.gif")


GIF saved as dqn_agent_play.gif


In [None]:
while True:pass