In [1]:
# Standard Libraries
import os
import time

# Data Manipulation and Visualization Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px

# Reinforcement Learning Libraries
import optuna
from stable_baselines3 import PPO
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack
from stable_baselines3.common.callbacks import CheckpointCallback

# Custom Environment
from utils.balanced_env import BalancedStreetFighterEnv

CHECKPOINT_DIR = './logs/'
LOG_DIR = './logs/'
OPT_DIR = './opt/'
os.makedirs(LOG_DIR, exist_ok=True)
os.makedirs(OPT_DIR, exist_ok=True)

env = BalancedStreetFighterEnv(save_video=True)
env.enable_save()  
env = Monitor(env, LOG_DIR)
env = DummyVecEnv([lambda: env])
env = VecFrameStack(env, 4, channels_order='last')

model = PPO.load(os.path.join(CHECKPOINT_DIR, 'final_model.zip'), env=env)

Wrapping the env in a VecTransposeImage.


In [2]:
rewards = []         
num_games = 2    

for game in range(num_games):
    obs = env.reset()
    done = False
    while not done: 
        if done: 
            obs = env.reset()
        action = model.predict(obs)[0]
        obs, reward, done, info = env.step(action)
    print("------------------------------------------ game: ", game)           #Decir que guarde el video del juego
env.close()


Video saved as reports/videos/output_video_2.mp4
Data saved to ./data/reward_data\rewards_data_2.xlsx
------------------------------------------ game:  0
Video saved as reports/videos/output_video_3.mp4
Data saved to ./data/reward_data\rewards_data_3.xlsx
------------------------------------------ game:  1


In [5]:
# Load the data from the Excel file
file_name = 'data/reward_data/rewards_data_3.xlsx'
data = pd.read_excel(file_name)

# Create the figure for Aggressiveness Signal
fig = go.Figure()

# Plot the aggressiveness signal
fig.add_trace(go.Scatter(
    x=data.index,
    y=data['aggressiveness_signal'],
    mode='lines+markers',
    name='Aggressiveness Signal',
    line=dict(color='blue'),
    marker=dict(size=5)
))

# Add vertical lines for match rewards
for i, reward in enumerate(data['match_reward']):
    if reward != 0:
        color = 'red' if reward < 0 else 'blue'
        fig.add_vline(x=i, line=dict(color=color, dash='dash'))

# Update layout for aggressiveness signal
fig.update_layout(
    title='Aggressiveness Signal Over Steps',
    xaxis_title='Steps',
    yaxis_title='Aggressiveness Signal',
    hovermode='x unified'
)

# Show the figure
fig.show()

# Create the figure for Normal Signal
fig = go.Figure()

# Plot the normal signal
fig.add_trace(go.Scatter(
    x=data.index,
    y=data['normal_signal'],
    mode='lines+markers',
    name='Normal Signal',
    line=dict(color='green'),
    marker=dict(size=5)
))

# Add vertical lines for match rewards
for i, reward in enumerate(data['match_reward']):
    if reward != 0:
        color = 'red' if reward < 0 else 'blue'
        fig.add_vline(x=i, line=dict(color=color, dash='dash'))

# Update layout for normal signal
fig.update_layout(
    title='Normal Signal Over Steps',
    xaxis_title='Steps',
    yaxis_title='Normal Signal',
    hovermode='x unified'
)

# Show the figure
fig.show()

# Create the figure for Distance Reward
fig = go.Figure()

# Plot the distance reward
fig.add_trace(go.Scatter(
    x=data.index,
    y=data['distance_reward'],
    mode='lines+markers',
    name='Distance Reward',
    line=dict(color='purple'),
    marker=dict(size=5)
))

# Add vertical lines for match rewards
for i, reward in enumerate(data['match_reward']):
    if reward != 0:
        color = 'red' if reward < 0 else 'blue'
        fig.add_vline(x=i, line=dict(color=color, dash='dash'))

# Update layout for distance reward
fig.update_layout(
    title='Distance Reward Over Steps',
    xaxis_title='Steps',
    yaxis_title='Distance Reward',
    hovermode='x unified'
)

# Show the figure
fig.show()

# Create the figure for Time Penalty
fig = go.Figure()

# Plot the time penalty
fig.add_trace(go.Scatter(
    x=data.index,
    y=data['time_penalty'],
    mode='lines+markers',
    name='Time Penalty',
    line=dict(color='orange'),
    marker=dict(size=5)
))

# Add vertical lines for match rewards
for i, reward in enumerate(data['match_reward']):
    if reward != 0:
        color = 'red' if reward < 0 else 'blue'
        fig.add_vline(x=i, line=dict(color=color, dash='dash'))

# Update layout for time penalty
fig.update_layout(
    title='Time Penalty Over Steps',
    xaxis_title='Steps',
    yaxis_title='Time Penalty',
    hovermode='x unified'
)

# Show the figure
fig.show()

# Create the figure for Distance Reward and Time Penalty combined
fig = go.Figure()

# Plot distance reward
fig.add_trace(go.Scatter(
    x=data.index,
    y=data['distance_reward'],
    mode='lines',
    name='Distance Reward',
    line=dict(color='purple')
))

# Plot time penalty
fig.add_trace(go.Scatter(
    x=data.index,
    y=data['time_penalty'],
    mode='lines',
    name='Time Penalty',
    line=dict(color='orange')
))

# Add vertical lines for match rewards
for i, reward in enumerate(data['match_reward']):
    if reward != 0:
        color = 'red' if reward < 0 else 'blue'
        fig.add_vline(x=i, line=dict(color=color, dash='dash'))

# Update layout for combined graph
fig.update_layout(
    title='Distance Reward and Time Penalty Over Steps',
    xaxis_title='Steps',
    yaxis_title='Values',
    hovermode='x unified'
)

# Show the figure
fig.show()



In [4]:
# Load the data from the Excel file
file_name = 'data/reward_data/rewards_data_3.xlsx'
data = pd.read_excel(file_name)

# Initialize lists to store the accumulated rewards
accumulated_aggressiveness = []
accumulated_normal = []

# Variables to track the current match and accumulated values
current_match_reward = 0
current_aggressiveness = 0
current_normal = 0

# Iterate over the rows in the DataFrame
for index, row in data.iterrows():
    if row['match_reward'] != 0 or row['done']:
        # If a new match starts, save the accumulated rewards and reset counters
        if index > 0:
            accumulated_aggressiveness.append(current_aggressiveness)
            accumulated_normal.append(current_normal)
        
        # Reset counters for the new match
        current_aggressiveness = 0
        current_normal = 0

    # Accumulate rewards
    current_aggressiveness += row['aggressiveness_signal']
    current_normal += row['normal_signal']

# Append the last match accumulated rewards
accumulated_aggressiveness.append(current_aggressiveness)
accumulated_normal.append(current_normal)

# Create a DataFrame to store the results
results = pd.DataFrame({
    'Match': range(1, len(accumulated_aggressiveness) + 1),
    'Accumulated Aggressiveness': accumulated_aggressiveness,
    'Accumulated Normal': accumulated_normal
})

results

Unnamed: 0,Match,Accumulated Aggressiveness,Accumulated Normal
0,1,-23.911558,320.0
1,2,-219.665382,-152.5
2,3,0.122339,0.0
