## Calculate Normalized Score

In [2]:
import pandas as pd

def calculate_normalized_score(raw_scores, score_random, score_expert):
    return 100 * ((raw_scores - score_random) / (score_expert - score_random))

def process_experiment_results(base_path, experiment_prefix, game):

    # seeds = ["123", "132", "321"]                 # Use 3 seeds
    seeds = ["123", "132", "321", "231", "312"]     # Use 5 seeds

    normalized_scores = []
    # Game scores by random walk and human
    score_random = {
        'Breakout': 1.7, 'Qbert': 163.9, 'Pong': -20.7, 'Seaquest': 68.4, 
        'Hero': 1027.0, 'KungFuMaster': 258.5, 'Alien': 227.8, 'RoadRunner': 11.5,
        'BattleZone': 2360.0, 'BankHeist': 14.0, 'FishingDerby': -92.0, 'SpaceInvaders': 148,
    }
    score_expert = {
        'Breakout': 30.5, 'Qbert': 13455.0, 'Pong': 14.6, 'Seaquest': 42054.7, 
        'Hero': 30826.4, 'KungFuMaster': 22736.3, 'Alien': 7127.7, 'RoadRunner': 7845.0,
        'BattleZone': 37187.5, 'BankHeist': 753.0, 'FishingDerby': -39.0, 'SpaceInvaders': 1669.0,
    }

    for seed in seeds:
        file_path = f"{base_path}{experiment_prefix}{seed}/summary.csv"
        if pd.read_csv(file_path).empty:
            print(f"Warning: No data in {file_path}")
            continue

        # load the best results
        data = pd.read_csv(file_path)
        best_epoch = data['evaluation/eval_return'].idxmax()
        raw_score = data.loc[best_epoch]['evaluation/eval_return']
        
        # Calculate normalized score
        normalized_score = calculate_normalized_score(raw_score, score_random[game], score_expert[game])
        normalized_scores.append(normalized_score)
    
    return pd.Series(normalized_scores).round(2).to_string(index=False)


    # # Calculate mean and std of normalized scores directly at this stage
    # if normalized_scores:
    #     results = pd.Series(normalized_scores).agg(['mean', 'std'])
    #     return results.round(2).to_string(index=True)
    # else:
    #     return None

### Results of Init Experiments
- 5 random seeds
  - remove outliers if necessary
- 4 Games: Breakout, Qbert, Hero and KungFuMaster
- context length: 10, 30 and 50

#### Calculate Normalized Scores, context length = 10

In [226]:
base_path = "~/msc-project/atari/output/atari_10/"

experiments = [
    ("dmamba_breakout", "Breakout"),
    ("dtrans_breakout", "Breakout"),
    ("dmamba_qbert", "Qbert"),
    ("dtrans_qbert", "Qbert"),
    ("dmamba_hero", "Hero"),
    ("dtrans_hero", "Hero"),
    ("dmamba_kungfumaster", "KungFuMaster"),
    ("dtrans_kungfumaster", "KungFuMaster"),
]

for experiment_prefix, game in experiments:
    results = process_experiment_results(base_path, experiment_prefix, game)
    model_name = "Decision Mamba" if "dmamba" in experiment_prefix else "Decision Transformer"
    print(f"Game: {game} \nModel: {model_name}")
    print(results)
    print()

Game: Breakout 
Model: Decision Mamba
280.56
414.58
406.25
334.38
279.86

Game: Breakout 
Model: Decision Transformer
199.31
343.06
385.07
225.00
184.37

Game: Qbert 
Model: Decision Mamba
26.19
26.29
28.32
26.00
25.89

Game: Qbert 
Model: Decision Transformer
49.50
34.20
27.24
18.74
18.69

Game: Hero 
Model: Decision Mamba
8.65
7.97
6.69
6.67
8.51

Game: Hero 
Model: Decision Transformer
31.87
33.90
25.34
36.45
30.57

Game: KungFuMaster 
Model: Decision Mamba
5.26
4.41
6.19
9.04
6.68

Game: KungFuMaster 
Model: Decision Transformer
33.33
21.94
32.97
 3.88
 6.15



##### Remove Outliers and Calculate Mean and Std of Normalized Scores

In [12]:
import pandas as pd

# Data
data = {
    'Breakout': {
        'Decision Mamba': [280.56, 414.58, 406.25, 334.38, 279.86],
        'Decision Transformer': [199.31, 343.06, 385.07, 225.00]
    },
    'Qbert': {
        'Decision Mamba': [26.19, 26.29, 28.32, 26.00, 25.89],
        'Decision Transformer': [34.20, 27.24, 18.74]
    },
    'Hero': {
        'Decision Mamba': [8.65, 7.97, 6.69, 6.67, 8.51],
        'Decision Transformer': [31.87, 33.90, 25.34, 36.45, 30.57]
    },
    'KungFuMaster': {
        'Decision Mamba': [5.26, 4.41, 6.19, 9.04, 6.68],
        'Decision Transformer': [33.33, 21.94, 32.97]
    }
}

# Calculate mean and std for each model and game
results = {}
for game, models in data.items():
    for model, scores in models.items():
        mean_score = pd.Series(scores).mean()
        std_score = pd.Series(scores).std()
        results[(game, model)] = {'mean': round(mean_score, 2), 'std': round(std_score, 2)}

# Display the results
for (game, model), stats in results.items():
    print(f"Game: {game} | Model: {model}")
    print(f"Mean: {stats['mean']}, Std: {stats['std']}")
    print()


Game: Breakout | Model: Decision Mamba
Mean: 343.13, Std: 65.35

Game: Breakout | Model: Decision Transformer
Mean: 288.11, Std: 89.98

Game: Qbert | Model: Decision Mamba
Mean: 26.54, Std: 1.01

Game: Qbert | Model: Decision Transformer
Mean: 26.73, Std: 7.74

Game: Hero | Model: Decision Mamba
Mean: 7.7, Std: 0.96

Game: Hero | Model: Decision Transformer
Mean: 31.63, Std: 4.16

Game: KungFuMaster | Model: Decision Mamba
Mean: 6.32, Std: 1.75

Game: KungFuMaster | Model: Decision Transformer
Mean: 29.41, Std: 6.47



#### Calculate Normalized Scores, context length = 30

In [228]:
base_path = "~/msc-project/atari/output/atari_30/"

experiments = [
    ("dmamba_breakout", "Breakout"),
    ("dtrans_breakout", "Breakout"),
    ("dmamba_qbert", "Qbert"),
    ("dtrans_qbert", "Qbert"),
    ("dmamba_hero", "Hero"),
    ("dtrans_hero", "Hero"),
    ("dmamba_kungfumaster", "KungFuMaster"),
    ("dtrans_kungfumaster", "KungFuMaster"),
]

for experiment_prefix, game in experiments:
    results = process_experiment_results(base_path, experiment_prefix, game)
    model_name = "Decision Mamba" if "dmamba" in experiment_prefix else "Decision Transformer"
    print(f"Game: {game} \nModel: {model_name}")
    print(results)
    print()

Game: Breakout 
Model: Decision Mamba
373.26
446.88
350.35
319.79
433.68

Game: Breakout 
Model: Decision Transformer
205.21
346.53
272.22
 88.19
181.60

Game: Qbert 
Model: Decision Mamba
28.88
29.50
10.82
20.32
25.70

Game: Qbert 
Model: Decision Transformer
25.55
 2.85
 7.74
 5.14
 4.01

Game: Hero 
Model: Decision Mamba
6.76
6.61
7.15
6.62
7.61

Game: Hero 
Model: Decision Transformer
34.42
30.00
25.95
22.63
27.23

Game: KungFuMaster 
Model: Decision Mamba
6.86
7.21
8.19
7.04
4.28

Game: KungFuMaster 
Model: Decision Transformer
 6.86
17.98
 6.64
17.27
 7.44



##### Remove Outliers and Calculate Mean and Std of Normalized Scores

In [1]:
import pandas as pd

# Data
data = {
    'Breakout': {
        'Decision Mamba': [373.26, 446.88, 350.35, 319.79, 433.68],
        'Decision Transformer': [205.21, 346.53, 272.22, 181.60]
    },
    'Qbert': {
        'Decision Mamba': [28.88, 29.50, 20.32, 25.70],
        'Decision Transformer': [2.85, 7.74, 5.14, 4.01]
    },
    'Hero': {
        'Decision Mamba': [6.76, 6.61, 7.15, 6.62, 7.61],
        'Decision Transformer': [34.42, 30.00, 25.95, 22.63, 27.23]
    },
    'KungFuMaster': {
        'Decision Mamba': [6.86, 7.21, 8.19, 7.04, 4.28],
        'Decision Transformer': [17.98, 17.27, 7.44]
    }
}

# Calculate mean and std for each model and game
results = {}
for game, models in data.items():
    for model, scores in models.items():
        mean_score = pd.Series(scores).mean()
        std_score = pd.Series(scores).std()
        results[(game, model)] = {'mean': round(mean_score, 2), 'std': round(std_score, 2)}

# Display the results
for (game, model), stats in results.items():
    print(f"Game: {game} | Model: {model}")
    print(f"Mean: {stats['mean']}, Std: {stats['std']}")
    print()


Game: Breakout | Model: Decision Mamba
Mean: 384.79, Std: 54.29

Game: Breakout | Model: Decision Transformer
Mean: 251.39, Std: 74.14

Game: Qbert | Model: Decision Mamba
Mean: 26.1, Std: 4.2

Game: Qbert | Model: Decision Transformer
Mean: 4.94, Std: 2.09

Game: Hero | Model: Decision Mamba
Mean: 6.95, Std: 0.43

Game: Hero | Model: Decision Transformer
Mean: 28.05, Std: 4.44

Game: KungFuMaster | Model: Decision Mamba
Mean: 6.72, Std: 1.46

Game: KungFuMaster | Model: Decision Transformer
Mean: 14.23, Std: 5.89



#### Calculate Normalized Scores, context length = 50

In [5]:
base_path = "~/msc-project/atari/output/atari_50/"

experiments = [
    ("dmamba_breakout", "Breakout"),
    ("dtrans_breakout", "Breakout"),
    ("dmamba_qbert", "Qbert"),
    ("dtrans_qbert", "Qbert"),
    ("dmamba_hero", "Hero"),
    ("dtrans_hero", "Hero"),
    ("dmamba_kungfumaster", "KungFuMaster"),
    ("dtrans_kungfumaster", "KungFuMaster"),
]

for experiment_prefix, game in experiments:
    results = process_experiment_results(base_path, experiment_prefix, game)
    model_name = "Decision Mamba" if "dmamba" in experiment_prefix else "Decision Transformer"
    print(f"Game: {game} \nModel: {model_name}")
    print(results)
    print()

Game: Breakout 
Model: Decision Mamba
336.81
453.82
240.28
413.54
257.29

Game: Breakout 
Model: Decision Transformer
244.44
194.10
180.90
146.18
276.04

Game: Qbert 
Model: Decision Mamba
24.48
23.60
20.12
21.21
23.56

Game: Qbert 
Model: Decision Transformer
14.12
15.83
 4.48
 1.59
 3.41

Game: Hero 
Model: Decision Mamba
9.41
6.70
6.63
9.08
6.65

Game: Hero 
Model: Decision Transformer
22.80
38.79
26.32
23.68
16.08

Game: KungFuMaster 
Model: Decision Mamba
6.15
8.06
1.25
8.06
6.64

Game: KungFuMaster 
Model: Decision Transformer
23.50
 7.84
 3.97
 4.32
 8.77



#### Remove Outliers and Calculate Mean and Std of Normalized Scores

In [11]:
import pandas as pd

# Data
data = {
    'Breakout': {
        'Decision Mamba': [336.81, 453.82, 413.54],
        'Decision Transformer': [244.44, 194.10, 276.04]
    },
    'Qbert': {
        'Decision Mamba': [24.48, 23.60, 20.12, 21.21, 23.56],
        'Decision Transformer': [14.12, 15.83, 4.48]
    },
    'Hero': {
        'Decision Mamba': [9.41, 6.70, 6.63, 9.08, 6.65],
        'Decision Transformer': [22.80, 38.79, 26.32, 23.68]
    },
    'KungFuMaster': {
        'Decision Mamba': [6.15, 8.06, 8.06, 6.64],
        'Decision Transformer': [7.84, 4.32, 8.77]
    }
}

# Calculate mean and std for each model and game
results = {}
for game, models in data.items():
    for model, scores in models.items():
        mean_score = pd.Series(scores).mean()
        std_score = pd.Series(scores).std()
        results[(game, model)] = {'mean': round(mean_score, 2), 'std': round(std_score, 2)}

# Display the results
for (game, model), stats in results.items():
    print(f"Game: {game} | Model: {model}")
    print(f"Mean: {stats['mean']}, Std: {stats['std']}")
    print()


Game: Breakout | Model: Decision Mamba
Mean: 401.39, Std: 59.44

Game: Breakout | Model: Decision Transformer
Mean: 238.19, Std: 41.33

Game: Qbert | Model: Decision Mamba
Mean: 22.59, Std: 1.84

Game: Qbert | Model: Decision Transformer
Mean: 11.48, Std: 6.12

Game: Hero | Model: Decision Mamba
Mean: 7.69, Std: 1.42

Game: Hero | Model: Decision Transformer
Mean: 27.9, Std: 7.41

Game: KungFuMaster | Model: Decision Mamba
Mean: 7.23, Std: 0.98

Game: KungFuMaster | Model: Decision Transformer
Mean: 6.98, Std: 2.35



# Action Reduction (discarded)

In [8]:
base_path = "~/msc-project/atari/output/atari_10_new/"

experiments = [
    ("dmamba_hero", "Hero"),
    ("dtrans_hero", "Hero"),
    ("dmamba_kungfumaster", "KungFuMaster"),
    ("dtrans_kungfumaster", "KungFuMaster"),
]

for experiment_prefix, game in experiments:
    results = process_experiment_results(base_path, experiment_prefix, game)
    model_name = "Decision Mamba" if "dmamba" in experiment_prefix else "Decision Transformer"
    print(f"Game: {game} \nModel: {model_name}")
    print(results)
    print()

Game: Hero 
Model: Decision Mamba
mean    6.46
std     0.09
dtype: float64

Game: Hero 
Model: Decision Transformer
mean    6.69
std     0.03
dtype: float64

Game: KungFuMaster 
Model: Decision Mamba
mean    0.79
std     1.09
dtype: float64

Game: KungFuMaster 
Model: Decision Transformer
mean    2.56
std     2.47
dtype: float64



### Action Reduction with Reverse Mapping

In [6]:
base_path = "~/msc-project/atari/output/atari_10_new/"

experiments = [
    ("dmamba_hero", "Hero"),
    ("dtrans_hero", "Hero"),
    ("dmamba_kungfumaster", "KungFuMaster"),
    ("dtrans_kungfumaster", "KungFuMaster"),
]

for experiment_prefix, game in experiments:
    results = process_experiment_results(base_path, experiment_prefix, game)
    model_name = "Decision Mamba" if "dmamba" in experiment_prefix else "Decision Transformer"
    print(f"Game: {game} \nModel: {model_name}")
    print(results)
    print()

Game: Hero 
Model: Decision Mamba
mean    7.94
std     1.34
dtype: float64

Game: Hero 
Model: Decision Transformer
mean    26.80
std      3.94
dtype: float64

Game: KungFuMaster 
Model: Decision Mamba
mean    2.35
std     1.51
dtype: float64

Game: KungFuMaster 
Model: Decision Transformer
mean    4.68
std     0.60
dtype: float64



# Action Fusion

## Simple Action Fusion

In [3]:
# base_path = "~/msc-project/atari/output/atari_10_new/"

experiments = [
    ("dmamba_hero", "Hero"),
    ("dtrans_hero", "Hero"),
    ("dmamba_kungfumaster", "KungFuMaster"),
    ("dtrans_kungfumaster", "KungFuMaster"),
]

for experiment_prefix, game in experiments:
    results = process_experiment_results(base_path, experiment_prefix, game)
    model_name = "Decision Mamba" if "dmamba" in experiment_prefix else "Decision Transformer"
    print(f"Game: {game} \nModel: {model_name}")
    print(results)
    print()

Game: Hero 
Model: Decision Mamba
mean    7.07
std     0.73
dtype: float64

Game: Hero 
Model: Decision Transformer
mean    18.72
std      3.67
dtype: float64

Game: KungFuMaster 
Model: Decision Mamba
mean    3.64
std     0.95
dtype: float64

Game: KungFuMaster 
Model: Decision Transformer
mean    3.27
std     3.06
dtype: float64



## Non-frequency-based Action Fusion

In [4]:
# base_path = "~/msc-project/atari/output/atari_10_new/"

experiments = [
    ("dmamba_hero", "Hero"),
    ("dtrans_hero", "Hero"),
    ("dmamba_kungfumaster", "KungFuMaster"),
    ("dtrans_kungfumaster", "KungFuMaster"),
]

for experiment_prefix, game in experiments:
    results = process_experiment_results(base_path, experiment_prefix, game)
    model_name = "Decision Mamba" if "dmamba" in experiment_prefix else "Decision Transformer"
    print(f"Game: {game} \nModel: {model_name}")
    print(results)
    print()

Game: Hero 
Model: Decision Mamba
mean    7.26
std     1.21
dtype: float64

Game: Hero 
Model: Decision Transformer
mean    16.06
std      0.74
dtype: float64

Game: KungFuMaster 
Model: Decision Mamba
mean    2.79
std     0.33
dtype: float64

Game: KungFuMaster 
Model: Decision Transformer
mean    1.55
std     0.86
dtype: float64



# Extended Experiments of 12 Games
- 3 random seeds

In [3]:
base_path = "~/msc-project/atari/output/atari_10_extend/"

experiments = [
    ("dmamba_breakout", "Breakout"),
    ("dtrans_breakout", "Breakout"),
    ("dmamba_qbert", "Qbert"),
    ("dtrans_qbert", "Qbert"),
    ("dmamba_hero", "Hero"),
    ("dtrans_hero", "Hero"),
    ("dmamba_kungfumaster", "KungFuMaster"),
    ("dtrans_kungfumaster", "KungFuMaster"),
    ("dmamba_pong", "Pong"),
    ("dtrans_pong", "Pong"),
    ("dmamba_seaquest", "Seaquest"),
    ("dtrans_seaquest", "Seaquest"),
    ("dmamba_alien", "Alien"),
    ("dtrans_alien", "Alien"),
    ("dmamba_roadrunner", "RoadRunner"),
    ("dtrans_roadrunner", "RoadRunner"),
    ("dmamba_battlezone", "BattleZone"),
    ("dtrans_battlezone", "BattleZone"),
    ("dmamba_bankheist", "BankHeist"),
    ("dtrans_bankheist", "BankHeist"),
    ("dmamba_fishingderby", "FishingDerby"),
    ("dtrans_fishingderby", "FishingDerby"),
    ("dmamba_spaceinvaders", "SpaceInvaders"),
    ("dtrans_spaceinvaders", "SpaceInvaders"),
]

for experiment_prefix, game in experiments:
    results = process_experiment_results(base_path, experiment_prefix, game)
    model_name = "Decision Mamba" if "dmamba" in experiment_prefix else "Decision Transformer"
    print(f"Game: {game} \nModel: {model_name}")
    print(results)
    print()


Game: Breakout 
Model: Decision Mamba
mean    367.13
std      75.09

Game: Breakout 
Model: Decision Transformer
mean    309.14
std      97.41

Game: Qbert 
Model: Decision Mamba
mean    26.93
std      1.20

Game: Qbert 
Model: Decision Transformer
mean    36.98
std     11.38

Game: Hero 
Model: Decision Mamba
mean    7.77
std     0.99

Game: Hero 
Model: Decision Transformer
mean    30.37
std      4.47

Game: KungFuMaster 
Model: Decision Mamba
mean    5.29
std     0.89

Game: KungFuMaster 
Model: Decision Transformer
mean    29.41
std      6.48

Game: Pong 
Model: Decision Mamba
mean    64.31
std     53.24

Game: Pong 
Model: Decision Transformer
mean    71.58
std     26.82

Game: Seaquest 
Model: Decision Mamba
mean    2.77
std     0.35

Game: Seaquest 
Model: Decision Transformer
mean    2.05
std     0.43

Game: Alien 
Model: Decision Mamba
mean    11.74
std      2.16

Game: Alien 
Model: Decision Transformer
mean    12.42
std      1.51

Game: RoadRunner 
Model: Decision Mamba
mean