In [None]:
import gymnasium as gym
from gymnasium import spaces
from gymnasium.wrappers import RecordEpisodeStatistics
import numpy as np
import random
from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.env_util import make_vec_env
import os

In [2]:

class FlappyBirdEnv(gym.Env):
    def __init__(self):
        super(FlappyBirdEnv, self).__init__()
        
        self.action_space = spaces.Discrete(2)
        self.observation_space = spaces.Box(
            low=np.array([0, -10, 0, 0]), 
            high=np.array([400, 10, 300, 200]),
            dtype=np.float32)
        
        self.bird_y = 200 
        self.bird_velocity = 0
        self.pipe_x = 300 
        self.pipe_gap = random.randint(100, 300) 
        self.pipe_width = 50
        self.gravity = 1
        self.flap_strength = -10
        self.game_width = 300
        self.game_height = 400
        self.score = 0

    def reset(self):
        self.bird_y = 200
        self.bird_velocity = 0
        self.pipe_x = self.game_width
        self.pipe_gap = random.randint(100, 300)
        self.score = 0
        return np.array([self.bird_y, self.bird_velocity, self.pipe_x, self.pipe_gap], dtype=np.float32)

    def step(self, action):
        if action == 1:  
            self.bird_velocity = self.flap_strength
        
        self.bird_velocity += self.gravity
        self.bird_y += self.bird_velocity
        self.pipe_x -= 5
        if self.pipe_x < 0:
            self.pipe_x = self.game_width
            self.pipe_gap = random.randint(100, 300)
            self.score += 1 

        done = False
        if self.bird_y < 0 or self.bird_y > self.game_height:  
            done = True
        elif (self.pipe_x < 50 and 
              (self.bird_y < self.pipe_gap or self.bird_y > self.pipe_gap + 100)): 
            done = True

        reward = 1 if not done else -10
        state = np.array([self.bird_y, self.bird_velocity, self.pipe_x, self.pipe_gap], dtype=np.float32)
        return state, reward, done, {}

    def render(self, mode='human'):
        print(f"Bird Y: {self.bird_y}, Velocity: {self.bird_velocity}, Pipe X: {self.pipe_x}, Gap: {self.pipe_gap}")

env = FlappyBirdEnv()
state = env.reset()

for _ in range(100):
    action = env.action_space.sample()
    state, reward, done, _ = env.step(action)
    env.render()
    if done:
        break

Bird Y: 201, Velocity: 1, Pipe X: 295, Gap: 194
Bird Y: 203, Velocity: 2, Pipe X: 290, Gap: 194
Bird Y: 194, Velocity: -9, Pipe X: 285, Gap: 194
Bird Y: 185, Velocity: -9, Pipe X: 280, Gap: 194
Bird Y: 177, Velocity: -8, Pipe X: 275, Gap: 194
Bird Y: 168, Velocity: -9, Pipe X: 270, Gap: 194
Bird Y: 160, Velocity: -8, Pipe X: 265, Gap: 194
Bird Y: 153, Velocity: -7, Pipe X: 260, Gap: 194
Bird Y: 147, Velocity: -6, Pipe X: 255, Gap: 194
Bird Y: 142, Velocity: -5, Pipe X: 250, Gap: 194
Bird Y: 138, Velocity: -4, Pipe X: 245, Gap: 194
Bird Y: 135, Velocity: -3, Pipe X: 240, Gap: 194
Bird Y: 133, Velocity: -2, Pipe X: 235, Gap: 194
Bird Y: 124, Velocity: -9, Pipe X: 230, Gap: 194
Bird Y: 115, Velocity: -9, Pipe X: 225, Gap: 194
Bird Y: 107, Velocity: -8, Pipe X: 220, Gap: 194
Bird Y: 100, Velocity: -7, Pipe X: 215, Gap: 194
Bird Y: 91, Velocity: -9, Pipe X: 210, Gap: 194
Bird Y: 83, Velocity: -8, Pipe X: 205, Gap: 194
Bird Y: 76, Velocity: -7, Pipe X: 200, Gap: 194
Bird Y: 67, Velocity: -9,

  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


In [None]:
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = []  
        self.epsilon = 1.0  
        self.epsilon_min = 0.1  
        self.epsilon_decay = 0.995  
        self.gamma = 0.95 
        self.learning_rate = 0.001
        self.q_network = self._build_model()  
        self.target_network = self._build_model() 
        self.update_target_network()

    def _build_model(self):
        model = keras.Sequential([
            keras.layers.Dense(24, input_dim=self.state_size, activation='relu'),
            keras.layers.Dense(24, activation='relu'),
            keras.layers.Dense(self.action_size, activation='linear')
        ])
        model.compile(optimizer=keras.optimizers.Adam(learning_rate=self.learning_rate), loss='mse')
        return model

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return np.random.choice(self.action_size)  
        q_values = self.q_network.predict(state, verbose=0)  
        return np.argmax(q_values[0]) 

    def replay(self, batch_size):
        if len(self.memory) < batch_size:
            return
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target += self.gamma * np.amax(self.target_network.predict(next_state, verbose=0)[0])
            target_f = self.q_network.predict(state, verbose=0)
            target_f[0][action] = target
            self.q_network.fit(state, target_f, epochs=1, verbose=0)
        self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)

    def store_transition(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))
        if len(self.memory) > 2000:  
            self.memory.pop(0)

    def update_target_network(self):
        self.target_network.set_weights(self.q_network.get_weights())