In [1]:
%matplotlib inline
import os, sys, inspect
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.monitor import Monitor

sys.path.append(os.getcwd())
from overdr1ve_env import Overdr1veEnv


In [2]:
def parse_type_bonus(cell: str):
    if not isinstance(cell, str):
        return 0, 0
    s = cell.strip()
    if not s:
        return 0, 0
    parts = s.split()
    try:
        val = int(parts[0])
    except Exception:
        return 0, 0
    rest = " ".join(parts[1:]).lower()
    if "core" in rest:
        return val, 0
    if "max" in rest:
        return 0, val
    return 0, 0

def split_types(cell: str):
    if not isinstance(cell, str):
        return set()
    s = cell.strip()
    if s == "-" or s == "":
        return set()
    return {p.strip() for p in s.split("/") if p.strip()}


In [3]:
# # Parameters
# tracks_csv = "data_csv/tracks.csv"
# cars_csv = "data_csv/cars.csv"
# upgrades_csv = "data_csv/upgrades.csv"

# ppo_model_path = "./checkpoints/ppo_overdr1ve"  # without .zip
# vecnorm_path = "./checkpoints/vecnorm_stats.pkl"

# EPISODES = 200
# AGENT_ID = 0
# SHUFFLE_TRACKS = True

# # Load DataFrames used by the offline simulator
# tracks_df = pd.read_csv(tracks_csv)
# cars_df = pd.read_csv(cars_csv)
# upgrades_df = pd.read_csv(upgrades_csv)

# tracks_df = tracks_df.copy()
# tracks_df["bonus_core"], tracks_df["bonus_max"] = zip(*tracks_df["Type Bonus"].map(parse_type_bonus))

# upgrades_df = upgrades_df.copy()
# upgrades_df["cond_set"] = upgrades_df["Track Type Condition"].map(split_types)

# POINTS = [25, 18, 15, 12, 10, 8, 6, 4, 2, 1]

In [3]:
# Offline simulator for Random/Greedy
rng = np.random.default_rng(12345)

def episode_tracks(df: pd.DataFrame, shuffle: bool):
    if shuffle:
        return df.sample(frac=1.0, random_state=int(rng.integers(0, 1_000_000))).reset_index(drop=True)
    return df.reset_index(drop=True)

def eval_action_points_on_track(track_row, agent_car_row, action_row, opponents_df):
    # track bonuses
    ttype = track_row["Track Type"]
    req_laps = int(track_row["Total Laps"])
    b_core = int(track_row["bonus_core"]) if not np.isnan(track_row["bonus_core"]) else 0
    b_max  = int(track_row["bonus_max"])  if not np.isnan(track_row["bonus_max"])  else 0

    # agent base
    a_core = float(agent_car_row["Core Power"]) + b_core
    a_max  = int(agent_car_row["Max Laps"]) + b_max
    cond = action_row["cond_set"]
    if (len(cond) == 0) or (ttype in cond):
        a_core += float(action_row["Core Power"])  # upgrade core
        a_max  += int(action_row["Max Laps"])     # upgrade max
    a_dnf = a_max < req_laps

    # opponents
    o_core = opponents_df["Core Power"].to_numpy(float) + b_core
    o_max  = opponents_df["Max Laps"].to_numpy(int) + b_max
    o_dnf  = o_max < req_laps

    grid_core = np.concatenate([o_core, np.array([a_core])])
    grid_dnf  = np.concatenate([o_dnf,  np.array([a_dnf])])
    idxs = np.arange(len(grid_core))
    noise = np.linspace(0, 1e-6, len(grid_core))
    order = sorted(idxs, key=lambda i: (grid_dnf[i], -(grid_core[i] + noise[i])))
    agent_idx = len(grid_core) - 1
    pos = order.index(agent_idx) + 1
    pts = POINTS[pos - 1] if not a_dnf else 0
    return pts, not a_dnf, (pos == 1 and not a_dnf)

def run_offline_mode(mode, episodes=100, agent_id=0, shuffle=True):
    rows = []
    agent_car = cars_df.iloc[agent_id]
    opponents = cars_df.drop(agent_id).reset_index(drop=True)
    # include a synthetic "No Upgrade" that does nothing
    actions = pd.concat([
        pd.DataFrame([{ "Upgrade": "No Upgrade", "Core Power": 0.0, "Max Laps": 0, "Track Type Condition": "-", "cond_set": set() }]),
        upgrades_df
    ], ignore_index=True)

    for ep in range(episodes):
        tr_order = episode_tracks(tracks_df, shuffle)
        total_points, finishes, wins = 0.0, 0, 0
        for _, tr in tr_order.iterrows():
            if mode == "Random":
                a_idx = int(rng.integers(0, len(actions)))
            elif mode == "Greedy":
                # choose action maximizing points on this track
                best_pts, best_idx = -1, 0
                for i, a in actions.iterrows():
                    pts, finished, won = eval_action_points_on_track(tr, agent_car, a, opponents)
                    if pts > best_pts:
                        best_pts, best_idx = pts, i
                a_idx = best_idx
            else:
                raise ValueError("mode must be Random or Greedy in offline sim")

            pts, finished, won = eval_action_points_on_track(tr, agent_car, actions.iloc[a_idx], opponents)
            total_points += pts
            finishes += int(finished)
            wins += int(won)

        tracks_played = len(tr_order)
        rows.append({
            "Mode": mode,
            "Episode": ep + 1,
            "Total Points": total_points,
            "Finishes": finishes,
            "Wins": wins,
            "Tracks": tracks_played,
            "Finish Rate": finishes / max(1, tracks_played),
            "Avg Points / Track": total_points / max(1, tracks_played),
        })
    return pd.DataFrame(rows)


In [4]:
import numpy as np
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.monitor import Monitor
import inspect, os, pandas as pd

# ---- helpers to normalize vecenv API differences (gym vs gymnasium) ----
def vec_reset(venv):
    """Return (obs, info) for both old/new APIs."""
    out = venv.reset()
    if isinstance(out, tuple) and len(out) == 2:
        return out  # (obs, info) new API
    return out, {}  # old API: just obs

def vec_step(venv, action):
    """
    Return (obs, reward, terminated, truncated, info, done_bool_array)
    for both old (4-tuple) and new (5-tuple) APIs.
    """
    out = venv.step(action)
    if isinstance(out, tuple) and len(out) == 5:
        obs, reward, terminated, truncated, info = out
        done = np.logical_or(terminated, truncated)
        return obs, reward, terminated, truncated, info, done
    # old API: (obs, reward, done, info)
    obs, reward, done, info = out
    done = np.asarray(done, dtype=bool)
    terminated = done
    truncated = np.zeros_like(done, dtype=bool)
    return obs, reward, terminated, truncated, info, done

# ---- env factory that tolerates your constructor signature ----
def make_env_any_signature(tracks_csv, cars_csv, upgrades_csv, agent_car_id=0, shuffle_tracks=True):
    from overdr1ve_env import Overdr1veEnv  # import here to reflect live edits
    sig = inspect.signature(Overdr1veEnv.__init__)
    params = set(sig.parameters.keys())

    def _thunk():
        # Case 1: expects DataFrames named track_df/car_df/upgrade_df
        if {"track_df", "car_df"}.issubset(params):
            env = Overdr1veEnv(
                track_df=pd.read_csv(tracks_csv),
                car_df=pd.read_csv(cars_csv),
                upgrade_df=pd.read_csv(upgrades_csv) if (upgrades_csv and os.path.exists(upgrades_csv)) else None,
                episode_len=len(pd.read_csv(tracks_csv)) if "episode_len" in params else None,
                opponent_mode="static" if "opponent_mode" in params else None,
                penalty_dnf=0.0 if "penalty_dnf" in params else None,
            )
        # Case 2: refactored DF-based version
        elif {"tracks_df", "cars_df"}.issubset(params):
            env = Overdr1veEnv(
                tracks_df=pd.read_csv(tracks_csv),
                cars_df=pd.read_csv(cars_csv),
                upgrades_df=pd.read_csv(upgrades_csv) if (upgrades_csv and os.path.exists(upgrades_csv)) else None,
                agent_car_id=agent_car_id if "agent_car_id" in params else 0,
                shuffle_tracks_each_reset=shuffle_tracks if "shuffle_tracks_each_reset" in params else False,
            )
        # Case 3: CSV-path-based version
        elif {"tracks_csv", "cars_csv"}.issubset(params):
            kwargs = dict(tracks_csv=tracks_csv, cars_csv=cars_csv)
            if "upgrades_csv" in params:
                kwargs["upgrades_csv"] = upgrades_csv
            if "agent_car_id" in params:
                kwargs["agent_car_id"] = agent_car_id
            if "shuffle_tracks_each_reset" in params:
                kwargs["shuffle_tracks_each_reset"] = shuffle_tracks
            env = Overdr1veEnv(**kwargs)
        else:
            raise TypeError(f"Unrecognized Overdr1veEnv constructor params: {sorted(params)}")
        return Monitor(env)
    return _thunk

# ---- patched PPO eval using the normalized helpers ----
def run_ppo_env_eval(tracks_csv, cars_csv, upgrades_csv,
                     ppo_model_path="./checkpoints/ppo_overdr1ve",
                     vecnorm_path="./checkpoints/vecnorm_stats.pkl",
                     agent_id=0, shuffle_tracks=True, episodes=100):
    from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize

    venv = DummyVecEnv([make_env_any_signature(tracks_csv, cars_csv, upgrades_csv, agent_id, shuffle_tracks)])

    # Load VecNormalize if available
    if os.path.exists(vecnorm_path):
        venv = VecNormalize.load(vecnorm_path, venv)
        venv.training = False
        venv.norm_reward = False

    if not os.path.exists(ppo_model_path + ".zip"):
        raise FileNotFoundError(f"Missing PPO model file: {ppo_model_path}.zip")

    model = PPO.load(ppo_model_path, env=venv, device="cpu")

    rows = []
    for ep in range(episodes):
        obs, info = vec_reset(venv)
        done_all = False
        total_points = 0.0
        finishes = 0
        wins = 0
        tracks_played = 0

        while not done_all:
            action, _ = model.predict(obs, deterministic=True)
            obs, reward, terminated, truncated, info_step, done = vec_step(venv, action)

            # vec env returns arrays per env
            d = info_step[0] if isinstance(info_step, (list, tuple)) else info_step
            total_points += float(np.asarray(reward)[0])
            finished = not bool(d.get("agent_dnf", False))
            if finished:
                finishes += 1
            if finished and int(d.get("position", 99)) == 1:
                wins += 1
            tracks_played += 1
            done_all = bool(np.asarray(done)[0])

        rows.append({
            "Mode": "PPO",
            "Episode": ep + 1,
            "Total Points": total_points,
            "Finishes": finishes,
            "Wins": wins,
            "Tracks": tracks_played,
            "Finish Rate": finishes / max(1, tracks_played),
            "Avg Points / Track": total_points / max(1, tracks_played),
        })
    import pandas as pd
    return pd.DataFrame(rows)


In [5]:
# Params (adjust if needed)
tracks_csv = "data_csv/tracks.csv"
cars_csv = "data_csv/cars.csv"
upgrades_csv = "data_csv/upgrades.csv"
ppo_model_path = "./checkpoints/ppo_overdr1ve"
vecnorm_path = "./checkpoints/vecnorm_stats.pkl"

EPISODES = 200
AGENT_ID = 0
SHUFFLE_TRACKS = True

# # Load DataFrames used by the offline simulator
tracks_df = pd.read_csv(tracks_csv)
cars_df = pd.read_csv(cars_csv)
upgrades_df = pd.read_csv(upgrades_csv)

tracks_df = tracks_df.copy()
tracks_df["bonus_core"], tracks_df["bonus_max"] = zip(*tracks_df["Type Bonus"].map(parse_type_bonus))

upgrades_df = upgrades_df.copy()
upgrades_df["cond_set"] = upgrades_df["Track Type Condition"].map(split_types)

POINTS = [25, 18, 15, 12, 10, 8, 6, 4, 2, 1]

# Evaluate PPO with env (robust to API/version differences)
df_ppo = run_ppo_env_eval(
    tracks_csv, cars_csv, upgrades_csv,
    ppo_model_path=ppo_model_path,
    vecnorm_path=vecnorm_path,
    agent_id=AGENT_ID,
    shuffle_tracks=SHUFFLE_TRACKS,
    episodes=EPISODES
)

# You already had:
df_random = run_offline_mode("Random", episodes=EPISODES, agent_id=AGENT_ID, shuffle=SHUFFLE_TRACKS)
df_greedy = run_offline_mode("Greedy", episodes=EPISODES, agent_id=AGENT_ID, shuffle=SHUFFLE_TRACKS)

# Combine and summarize
import pandas as pd
summary = pd.concat([df_random, df_greedy, df_ppo], ignore_index=True)

mode_agg = summary.groupby("Mode").agg(
    Total_Points_Mean=("Total Points", "mean"),
    Total_Points_Std=("Total Points", "std"),
    Avg_Points_per_Track=("Avg Points / Track", "mean"),
    Finish_Rate=("Finish Rate", "mean"),
    Wins=("Wins", "mean"),
).round(3)

print(mode_agg.to_string())


        Total_Points_Mean  Total_Points_Std  Avg_Points_per_Track  Finish_Rate    Wins
Mode                                                                                  
Greedy            300.000             0.000                25.000        1.000  12.000
PPO               237.590            12.429                19.799        1.000   5.665
Random            183.595            24.852                15.300        0.953   3.045


In [6]:
mode_agg = summary.groupby("Mode").agg(
    Total_Points_Mean=("Total Points", "mean"),
    Total_Points_Std=("Total Points", "std"),
    Avg_Points_per_Track=("Avg Points / Track", "mean"),
    Finish_Rate=("Finish Rate", "mean"),
    Wins=("Wins", "mean"),
).round(3)
mode_agg


Unnamed: 0_level_0,Total_Points_Mean,Total_Points_Std,Avg_Points_per_Track,Finish_Rate,Wins
Mode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Greedy,300.0,0.0,25.0,1.0,12.0
PPO,237.59,12.429,19.799,1.0,5.665
Random,183.595,24.852,15.3,0.953,3.045


In [None]:
agg = summary.groupby("Mode").agg(
    mean_points=("Total Points", "mean"),
    std_points=("Total Points", "std"),
    finish=("Finish Rate", "mean"),
    wins=("Wins", "mean"),
    apt=("Avg Points / Track", "mean"),
).reset_index()

x = np.arange(len(agg))

plt.figure(figsize=(6,4))
plt.bar(x, agg["mean_points"], yerr=agg["std_points"], capsize=4)
plt.xticks(x, agg["Mode"])
plt.ylabel("Total Points (mean ± std)")
plt.title("Total Points by Mode")
plt.show()

plt.figure(figsize=(6,4))
plt.bar(x, agg["finish"]) ; plt.ylim(0,1)
plt.xticks(x, agg["Mode"]) ; plt.ylabel("Finish Rate")
plt.title("Finish Rate by Mode")
plt.show()

plt.figure(figsize=(6,4))
plt.bar(x, agg["wins"]) ; plt.xticks(x, agg["Mode"]) ; plt.ylabel("Wins")
plt.title("Wins by Mode")
plt.show()

plt.figure(figsize=(6,4))
plt.bar(x, agg["apt"]) ; plt.xticks(x, agg["Mode"]) ; plt.ylabel("Avg Points / Track")
plt.title("Avg Points / Track by Mode")
plt.show()
