In [1]:
!pip install gym gym-retro



You should consider upgrading via the 'C:\Users\jose_\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip' command.


In [1]:
# Importing retro to play Street Fighter using a ROM
import retro
import time

In [4]:
!python -m retro.import .

Importing StreetFighterIISpecialChampionEdition-Genesis
Importing StreetFighterIISpecialChampionEdition-Genesis
Imported 2 games


In [23]:
# Creating an environment for Street Fighter
env = retro.make(game='StreetFighterIISpecialChampionEdition-Genesis')

In [26]:
# Closing the environment
env.close()

In [5]:
# Sample actions that are possible in the environment
env.action_space.sample()

array([1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0], dtype=int8)

In [25]:
# Reset game to stating state
obs = env.reset()
# Set flag to false
done = False

for game in range(1):
    while not done:
        if done:
            obs = env.reset()
        env.render()
        obs, reward, done, info = env.step(env.action_space.sample())
        time.sleep(0.01)
        print(reward)

AttributeError: 'int' object has no attribute 'sleep'

In [11]:
info

{'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 60.0}


# Setup Enviroment

In [2]:
!pip install opencv-python

Collecting opencv-python
  Downloading opencv_python-4.10.0.82-cp37-abi3-win_amd64.whl (38.8 MB)
Installing collected packages: opencv-python
Successfully installed opencv-python-4.10.0.82


You should consider upgrading via the 'C:\Users\jose_\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip' command.


In [12]:
from gym import Env
from gym.spaces import MultiBinary, Box
import numpy as np
import cv2
from matplotlib import pyplot as plt

In [38]:
class StreetFighterEnv(Env):
    def __init__(self):
        super().__init__()
        self.observation_space = Box(low=0, high=255, shape=(84, 84, 1), dtype=np.uint8)
        self.action_space = MultiBinary(12)
        self.game = retro.make(game='StreetFighterIISpecialChampionEdition-Genesis', use_restricted_actions=retro.Actions.FILTERED)
        self.previous_frame = None
        self.score = 0
        self.previous_health = None
        self.opponent_previous_health = None
        self.action_history = []

    def reset(self):
        obs = self.game.reset()
        obs = self.preprocess(obs)
        self.previous_frame = obs
        self.score = 0
        self.previous_health = 176
        self.opponent_previous_health = 176
        self.action_history = []
        return obs

    def preprocess(self, observation):
        gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
        resize = cv2.resize(gray, (84, 84), interpolation=cv2.INTER_CUBIC)
        channels = np.reshape(resize, (84, 84, 1))
        return channels

    def step(self, action):
        if len(self.action_history) >= 5 and all(np.array_equal(a, action) for a in self.action_history[-5:]):
            action = self.action_space.sample()

        self.action_history.append(action)
        if len(self.action_history) > 10:
            self.action_history.pop(0)

        obs, reward, done, info = self.game.step(action)
        obs = self.preprocess(obs)
        frame_delta = obs - self.previous_frame
        self.previous_frame = obs

        current_health = info.get('health', self.previous_health)
        opponent_current_health = info.get('enemy_health', self.opponent_previous_health)

        reward = 0
        if current_health < self.previous_health:
            reward -= (self.previous_health - current_health) * 0.05  # Reduced penalty for receiving damage
        if opponent_current_health < self.opponent_previous_health:
            reward += (self.opponent_previous_health - opponent_current_health) * 0.2  # Increased reward for hitting

        if done:
            if opponent_current_health <= 0:
                reward += 500  # Increased reward for winning
            else:
                reward -= 50  # Reduced penalty for losing

        self.previous_health = current_health
        self.opponent_previous_health = opponent_current_health

        return frame_delta, reward, done, info

    def render(self, *args, **kwargs):
        self.game.render()

    def close(self):
        self.game.close()

In [52]:
env.close()

In [15]:
import tensorflow as tf
from tensorflow.keras import layers
from collections import deque
import random

In [53]:
class DQNAgent:
    def __init__(self, state_shape, num_actions):
        self.state_shape = state_shape
        self.num_actions = num_actions
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.1
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self.build_model()

    def build_model(self):
        model = tf.keras.Sequential([
            layers.Conv2D(32, (8, 8), strides=(4, 4), activation='relu', input_shape=self.state_shape),
            layers.Conv2D(64, (4, 4), strides=(2, 2), activation='relu'),
            layers.Conv2D(64, (3, 3), strides=(1, 1), activation='relu'),
            layers.Flatten(),
            layers.Dense(512, activation='relu'),
            layers.Dense(self.num_actions, activation='linear')
        ])
        model.compile(optimizer=tf.keras.optimizers.Adam(lr=self.learning_rate), loss='mse')
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.num_actions)
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, verbose=0)  # Aquí se aplica verbose=0 para no imprimir
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)

    def map_action(self, action_index):
        action = np.zeros(self.num_actions, dtype=int)
        action[action_index] = 1
        return action

# Entrenamiento del agente
env = StreetFighter()
agent = DQNAgent(state_shape=(84, 84, 1), num_actions=env.action_space.shape[0])
episodes = 5000000  # Cambiar según sea necesario

for e in range(episodes):
    state = env.reset()
    state = np.reshape(state, [1, 84, 84, 1])
    for time in range(5000):  # Se podría ajustar el número de pasos máximo por episodio
        action_index = agent.act(state)
        action = agent.map_action(action_index)
        next_state, reward, done, _ = env.step(action)
        next_state = np.reshape(next_state, [1, 84, 84, 1])
        agent.remember(state, action_index, reward, next_state, done)
        state = next_state
        if done:
            print(f"episode: {e}/{episodes}, score: {time}, e: {agent.epsilon:.2}")
            break
    if len(agent.memory) > 32:
        agent.replay(32)

    if (e + 1) % 100000 == 0:  # Guardar el modelo cada 100,000 episodios
        agent.save(f"model_{e+1}.h5")

