In [1]:
# Copyright 2023 LIN Yi. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#     http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

import os
import time 

import retro
from stable_baselines3 import PPO

from street_fighter_custom_wrapper import StreetFighterCustomWrapper
import numpy as np

import PIL
from tqdm import tqdm
import matplotlib.pyplot as plt
import gym
from IPython import display
import cv2

def show_render(env):
    plt.imshow(env.render(mode='rgb_array'))
    display.display(plt.gcf())    
    display.clear_output(wait=True)
    
def save_image(image_array, filename):
    image_bgr = cv2.cvtColor(image_array, cv2.COLOR_RGB2BGR)
    cv2.imwrite(filename, image_bgr)


RESET_ROUND = True  # Whether to reset the round when fight is over. 
RENDERING = False  # Whether to render the game screen.

MODEL_NAME = r"ppo_ryu_500000_steps"
MODEL_NAME = r"ppo_ryu_2500000_steps" # Specify the model file to load. Model "ppo_ryu_2500000_steps_updated" is capable of beating the final stage (Bison) of the game.
MODEL_NAME = r"ppo_ryu_5000000_steps"

# Model notes:
# ppo_ryu_2000000_steps_updated: Just beginning to overfit state, generalizable but not quite capable.
# ppo_ryu_2500000_steps_updated: Approaching the final overfitted state, cannot dominate first round but partially generalizable. High chance of beating the final stage.
# ppo_ryu_3000000_steps_updated: Near the final overfitted state, almost dominate first round but barely generalizable.
# ppo_ryu_7000000_steps_updated: Overfitted, dominates first round but not generalizable. 

RANDOM_ACTION = False
NUM_EPISODES = 10 # Make sure NUM_EPISODES >= 3 if you set RESET_ROUND to False to see the whole final stage game.
MODEL_DIR = r"trained_models/"

def make_env(game, state, players=1):
    def _init():
        env = retro.make(
            game=game, 
            state=state, 
            players=players,
            use_restricted_actions=retro.Actions.FILTERED,
            obs_type=retro.Observations.IMAGE
        )
        env = StreetFighterCustomWrapper(env, reset_round=RESET_ROUND, rendering=RENDERING)
        return env
    return _init

# if env:
#     env.close()
    
game = "StreetFighterIISpecialChampionEdition-Genesis"
env = make_env(game, state="Champion.Level12.RyuVsBison", players=2)()
# model = PPO("CnnPolicy", env)

# env2 = make_env(game, state="Champion.Level12.RyuVsBison", players=2)()

if not RANDOM_ACTION:
    model = PPO.load(os.path.join(MODEL_DIR, MODEL_NAME),  device='cuda') # env=env,

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
obs = env.reset()
done = False

num_episodes = NUM_EPISODES
episode_reward_sum = 0
num_victory = 0

action_space_per_player = 12

print("\nFighting Begins!\n")

save_folder = f'./outputs/level12_{MODEL_NAME}'

os.makedirs(save_folder, exist_ok=True)

image_id = 0
for _ in range(num_episodes):
    done = False
    
    if RESET_ROUND:
        obs = env.reset()

    total_reward = 0

    while not done:
        timestamp = time.time()

        if RANDOM_ACTION:
            obs, reward, done, info = env.step(env.action_space.sample())
        else:
            action, _states = model.predict(obs)
            
            # action_player1, _states = model.predict(obs)
            # action_player1 = action_player1.astype(np.int8)
            # action = np.zeros((2*action_space_per_player), dtype=np.int8)
            # action[:action_space_per_player] = action_player1
            
            obs, reward, done, info = env.step(action)
        
        filename = os.path.join(save_folder, f'{image_id:8d}.png')
        image_id += 1
        # show_render(env)
        image_array = env.render(mode='rgb_array')
        save_image(image_array, filename)
        
        if reward != 0:
            total_reward += reward
            print("Reward: {:.3f}, playerHP: {}, enemyHP:{}".format(reward, info['agent_hp'], info['enemy_hp']))
        
        if info['enemy_hp'] < 0 or info['agent_hp'] < 0:
            done = True

    if info['enemy_hp'] < 0:
        print("Victory!")
        num_victory += 1

    print("Total reward: {}\n".format(total_reward))
    episode_reward_sum += total_reward

    if not RESET_ROUND:
        while info['enemy_hp'] < 0 or info['agent_hp'] < 0:
        # Inter scene transition. Do nothing.
            obs, reward, done, info = env.step([0] * 12)
            env.render()

env.close()
print("Winning rate: {}".format(1.0 * num_victory / num_episodes))
if RANDOM_ACTION:
    print("Average reward for random action: {}".format(episode_reward_sum/num_episodes))
else:
    print("Average reward for {}: {}".format(MODEL_NAME, episode_reward_sum/num_episodes))


Fighting Begins!

Reward: 0.105, playerHP: 176, enemyHP:141
Reward: -0.031, playerHP: 145, enemyHP:141
Reward: -0.034, playerHP: 111, enemyHP:141
Reward: -0.021, playerHP: 90, enemyHP:141
Reward: -0.038, playerHP: 52, enemyHP:141
Reward: -0.044, playerHP: 8, enemyHP:141
Reward: 0.048, playerHP: 8, enemyHP:125
Reward: -0.040, playerHP: -1, enemyHP:125
Total reward: -0.05467350251439412

Reward: 0.114, playerHP: 176, enemyHP:138
Reward: 0.138, playerHP: 176, enemyHP:92
Reward: -0.035, playerHP: 141, enemyHP:92
Reward: 0.018, playerHP: 141, enemyHP:86
Reward: 0.117, playerHP: 141, enemyHP:47
Reward: -0.044, playerHP: 97, enemyHP:47
Reward: 0.018, playerHP: 97, enemyHP:41
Reward: -0.032, playerHP: 65, enemyHP:41
Reward: -0.027, playerHP: 38, enemyHP:41
Reward: 0.108, playerHP: 38, enemyHP:5
Reward: -0.035, playerHP: 3, enemyHP:5
Reward: -0.001, playerHP: -1, enemyHP:5
Total reward: 0.33880843133623395

Reward: 0.105, playerHP: 176, enemyHP:141
Reward: -0.035, playerHP: 141, enemyHP:141
Re

# convert images to video

In [3]:
from image_to_video import image_files_to_video, image_list_to_video

parts = save_folder.split('//')
parent_folder, filename = '//'.join(parts[:-1]), parts[-1]

fps = 5
video_name = os.path.join(parent_folder, f'{filename}_{fps}.mp4')

image_files_to_video(video_name, save_folder, "mp4v", fps)

Writing Video: 100%|██████████| 1824/1824 [00:06<00:00, 278.08it/s]
