# 🧠 Aviator RL Agent - Google Colab
Simulação e treinamento de um agente PPO para o jogo Aviator usando Aprendizado por Reforço.

In [None]:
!pip install stable-baselines3 gym torch matplotlib


## 🕹️ Ambiente Aviator

In [None]:
import numpy as np
import gym
from gym import spaces

class AviatorGame:
    def simulate_round(self):
        crash_point = np.random.exponential(scale=1.5) + 1
        return round(min(crash_point, 100), 2)

    def play(self, cashout_multiplier):
        crash_point = self.simulate_round()
        win = cashout_multiplier <= crash_point
        reward = cashout_multiplier if win else 0
        return crash_point, win, reward

class AviatorEnv(gym.Env):
    def __init__(self):
        super(AviatorEnv, self).__init__()
        self.game = AviatorGame()
        self.history_length = 10
        self.action_space = spaces.Box(low=np.array([1.01]), high=np.array([5.0]), dtype=np.float32)
        self.observation_space = spaces.Box(low=1.0, high=100.0, shape=(self.history_length,), dtype=np.float32)
        self.history = np.ones(self.history_length)

    def reset(self):
        self.history = np.ones(self.history_length)
        return self.history

    def step(self, action):
        action = float(action[0])
        crash, win, reward = self.game.play(action)
        self.history = np.roll(self.history, -1)
        self.history[-1] = crash
        done = False
        return self.history, reward, done, {}


## 🏋️‍♂️ Treinamento com PPO

In [None]:
from stable_baselines3 import PPO

env = AviatorEnv()
model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=100_000)


## 🎮 Simulação do Agente

In [None]:
import matplotlib.pyplot as plt

obs = env.reset()
actions, crashes, rewards = [], [], []

for _ in range(100):
    action, _ = model.predict(obs)
    obs, reward, done, _ = env.step(action)
    actions.append(float(action[0]))
    crashes.append(obs[-1])
    rewards.append(reward)

plt.figure(figsize=(10, 5))
plt.plot(actions, label="Cashout")
plt.plot(crashes, label="Crash", linestyle='--')
plt.legend()
plt.title("Cashouts vs Crash Points")
plt.xlabel("Rodada")
plt.ylabel("Multiplicador")
plt.grid()
plt.show()

plt.figure()
plt.plot(np.cumsum(rewards), color='green')
plt.title("Lucro acumulado")
plt.xlabel("Rodada")
plt.ylabel("Recompensa")
plt.grid()
plt.show()
