In [13]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt

calculate normalized score

In [14]:
import pandas as pd

def calculate_normalized_score(raw_scores, score_random, score_expert):
    # print(f"raw_scores: {raw_scores}, score_random: {score_random}, score_expert: {score_expert}")
    return 100 * ((raw_scores - score_random) / (score_expert - score_random))

def process_experiment_results(base_path, experiment_prefix, game):
    seeds = ["123", "132", "321"]
    normalized_scores = []

    # Game specific random and expert scores
    score_random = {
        'Breakout': 1.7, 'Qbert': 163.9, 'Pong': -20.7, 'Seaquest': 68.4, 'Hero': 1027.0, 'KungFuMaster': 258.5
    }
    score_expert = {
        'Breakout': 30.5, 'Qbert': 13455.0, 'Pong': 14.6, 'Seaquest': 42054.7, 'Hero': 30826.4, 'KungFuMaster': 22736.3
    }

    for seed in seeds:
        file_path = f"{base_path}{experiment_prefix}{seed}/summary.csv"
        if pd.read_csv(file_path).empty:
            print(f"Warning: No data in {file_path}")
            continue

        # load the best epoch data
        data = pd.read_csv(file_path)
        best_epoch = data['evaluation/eval_return'].idxmax()
        raw_score = data.loc[best_epoch]['evaluation/eval_return']

        # # load the average data of all epochs
        # data = pd.read_csv(file_path)
        # raw_score = data['evaluation/eval_return'].mean()
        
        # Calculate normalized score
        normalized_score = calculate_normalized_score(raw_score, score_random[game], score_expert[game])
        normalized_scores.append(normalized_score)

    if normalized_scores:
        return pd.Series(normalized_scores).agg(['mean', 'std'])
    else:
        return None


In [15]:
base_path = "~/msc-project/atari/output/atari_10/"

experiments = [
    ("dmamba_breakout", "Breakout"),
    ("dtrans_breakout", "Breakout"),
    ("dmamba_qbert", "Qbert"),
    ("dtrans_qbert", "Qbert"),
    ("dmamba_hero", "Hero"),
    ("dtrans_hero", "Hero"),
    ("dmamba_kungfumaster", "KungFuMaster"),
    ("dtrans_kungfumaster", "KungFuMaster"),
    # ("dmamba_pong", "Pong"),
    # ("dtrans_pong", "Pong"),
    # ("dmamba_seaquest", "Seaquest"),
    # ("dtrans_seaquest", "Seaquest"),
]

for experiment_prefix, game in experiments:
    results = process_experiment_results(base_path, experiment_prefix, game)
    results = results.round(2)  # Round to 2 decimal places
    model_name = "Decision Mamba" if "dmamba" in experiment_prefix else "Decision Transformer"
    print(f"Game: {game} \nModel: {model_name}")
    print(results)
    print()


Game: Breakout 
Model: Decision Mamba
mean    367.13
std      75.09
dtype: float64

Game: Breakout 
Model: Decision Transformer
mean    309.14
std      97.41
dtype: float64

Game: Qbert 
Model: Decision Mamba
mean    26.93
std      1.20
dtype: float64

Game: Qbert 
Model: Decision Transformer
mean    36.98
std     11.38
dtype: float64

Game: Hero 
Model: Decision Mamba
mean    7.77
std     0.99
dtype: float64

Game: Hero 
Model: Decision Transformer
mean    30.37
std      4.47
dtype: float64

Game: KungFuMaster 
Model: Decision Mamba
mean    5.29
std     0.89
dtype: float64

Game: KungFuMaster 
Model: Decision Transformer
mean    29.41
std      6.48
dtype: float64



In [16]:
base_path = "~/msc-project/atari/output/atari_30/"

experiments = [
    ("dmamba_breakout", "Breakout"),
    ("dtrans_breakout", "Breakout"),
    ("dmamba_qbert", "Qbert"),
    ("dtrans_qbert", "Qbert"),
    ("dmamba_hero", "Hero"),
    ("dtrans_hero", "Hero"),
    ("dmamba_kungfumaster", "KungFuMaster"),
    ("dtrans_kungfumaster", "KungFuMaster"),
    # ("dmamba_pong", "Pong"),
    # ("dtrans_pong", "Pong"),
    # ("dmamba_seaquest", "Seaquest"),
    # ("dtrans_seaquest", "Seaquest"),
]

for experiment_prefix, game in experiments:
    results = process_experiment_results(base_path, experiment_prefix, game)
    results = results.round(2)  # Round to 2 decimal places
    model_name = "Decision Mamba" if "dmamba" in experiment_prefix else "Decision Transformer"
    print(f"Game: {game} \nModel: {model_name}")
    print(results)
    print()


Game: Breakout 
Model: Decision Mamba
mean    390.16
std      50.43
dtype: float64

Game: Breakout 
Model: Decision Transformer
mean    274.65
std      70.69
dtype: float64

Game: Qbert 
Model: Decision Mamba
mean    23.07
std     10.61
dtype: float64

Game: Qbert 
Model: Decision Transformer
mean    12.05
std     11.95
dtype: float64

Game: Hero 
Model: Decision Mamba
mean    6.84
std     0.28
dtype: float64

Game: Hero 
Model: Decision Transformer
mean    30.12
std      4.24
dtype: float64

Game: KungFuMaster 
Model: Decision Mamba
mean    7.42
std     0.69
dtype: float64

Game: KungFuMaster 
Model: Decision Transformer
mean    10.49
std      6.49
dtype: float64



In [17]:
base_path = "~/msc-project/atari/output/atari_50/"

experiments = [
    ("dmamba_breakout", "Breakout"),
    ("dtrans_breakout", "Breakout"),
    ("dmamba_qbert", "Qbert"),
    ("dtrans_qbert", "Qbert"),
    ("dmamba_hero", "Hero"),
    ("dtrans_hero", "Hero"),
    ("dmamba_kungfumaster", "KungFuMaster"),
    ("dtrans_kungfumaster", "KungFuMaster"),
    # ("dmamba_pong", "Pong"),
    # ("dtrans_pong", "Pong"),
    # ("dmamba_seaquest", "Seaquest"),
    # ("dtrans_seaquest", "Seaquest"),
]

for experiment_prefix, game in experiments:
    results = process_experiment_results(base_path, experiment_prefix, game)
    results = results.round(2)  # Round to 2 decimal places
    model_name = "Decision Mamba" if "dmamba" in experiment_prefix else "Decision Transformer"
    print(f"Game: {game} \nModel: {model_name}")
    print(results)
    print()


Game: Breakout 
Model: Decision Mamba
mean    343.63
std     106.93
dtype: float64

Game: Breakout 
Model: Decision Transformer
mean    206.48
std      33.53
dtype: float64

Game: Qbert 
Model: Decision Mamba
mean    22.73
std      2.31
dtype: float64

Game: Qbert 
Model: Decision Transformer
mean    11.48
std      6.11
dtype: float64

Game: Hero 
Model: Decision Mamba
mean    7.58
std     1.58
dtype: float64

Game: Hero 
Model: Decision Transformer
mean    29.31
std      8.40
dtype: float64

Game: KungFuMaster 
Model: Decision Mamba
mean    5.15
std     3.51
dtype: float64

Game: KungFuMaster 
Model: Decision Transformer
mean    11.77
std     10.34
dtype: float64



In [18]:
base_path = "~/msc-project/atari/output/atari_10_action_fusion/"

experiments = [
    ("dmamba_hero", "Hero"),
    ("dtrans_hero", "Hero"),
    ("dmamba_kungfumaster", "KungFuMaster"),
    ("dtrans_kungfumaster", "KungFuMaster"),
]

for experiment_prefix, game in experiments:
    results = process_experiment_results(base_path, experiment_prefix, game)
    results = results.round(2)  # Round to 2 decimal places
    model_name = "Decision Mamba" if "dmamba" in experiment_prefix else "Decision Transformer"
    print(f"Game: {game} \nModel: {model_name}")
    print(results)
    print()


FileNotFoundError: [Errno 2] No such file or directory: '/home/keyan/msc-project/atari/output/atari_10_action_fusion/dmamba_hero123/summary.csv'

In [None]:
base_path = "~/msc-project/atari/output/atari_10_no_af/"

experiments = [
    ("dmamba_hero", "Hero"),
    ("dtrans_hero", "Hero"),
    ("dmamba_kungfumaster", "KungFuMaster"),
    ("dtrans_kungfumaster", "KungFuMaster"),
]

for experiment_prefix, game in experiments:
    results = process_experiment_results(base_path, experiment_prefix, game)
    results = results.round(2)  # Round to 2 decimal places
    model_name = "Decision Mamba" if "dmamba" in experiment_prefix else "Decision Transformer"
    print(f"Game: {game} \nModel: {model_name}")
    print(results)
    print()


Game: Hero 
Model: Decision Mamba
mean    7.77
std     0.99
dtype: float64

Game: Hero 
Model: Decision Transformer
mean    30.37
std      4.47
dtype: float64

Game: KungFuMaster 
Model: Decision Mamba
mean    5.29
std     0.89
dtype: float64

Game: KungFuMaster 
Model: Decision Transformer
mean    29.41
std      6.48
dtype: float64

