In [61]:
import pandas as pd
import numpy as np
import os

In [62]:
def process_experiment_data(base_path, experiment_prefix):
    seeds = ["123", "213", "321"]
    data_frames = []
    
    # Loop through each seed and load the corresponding summary.csv
    for seed in seeds:
        file_path = os.path.join(base_path, f"{experiment_prefix}{seed}", "summary.csv")
        df = pd.read_csv(file_path)
        data_frames.append(df)

    
    # Combine data frames and calculate average
    if data_frames:
        combined_data = pd.concat(data_frames)
        average_data = combined_data.groupby('epoch').mean()
        return average_data
    else:
        return None  # or appropriate response like an empty DataFrame


### Average results over 3 seeds

In [63]:
base_path = "~/msc-project/atari/output/atari_1/"
experiment_types = ["dmamba_breakout", "dtrans_breakout", "dmamba_qbert", "dtrans_qbert"]  # add more as needed

experiment_results = {}

for experiment in experiment_types:
    result = process_experiment_data(base_path, experiment)
    if result is not None:
        experiment_results[experiment] = result
        # print(f"Results for {experiment}:")
        # print(result)
        # print("\n")
        # Save to csv with prefix avg_
        result.to_csv(f"avg_{experiment}.csv")
    else:
        print(f"No data processed for {experiment}")

### Norm score

In [66]:
import pandas as pd

def calculate_normalized_score(raw_scores, score_random, score_expert):
    return 100 * (raw_scores - score_random) / (score_expert - score_random)

def process_experiment_results(base_path, experiment_prefix, game):
    seeds = ["123", "213", "321"]
    normalized_scores = []

    # Game specific random and expert scores
    score_random = {
        'Breakout': 1.7, 'Qbert': 163.9, 'Pong': -20.7, 'Seaquest': 68.4
    }
    score_expert = {
        'Breakout': 30.5, 'Qbert': 13455.0, 'Pong': 14.6, 'Seaquest': 42054.7
    }

    for seed in seeds:
        file_path = f"{base_path}{experiment_prefix}{seed}/summary.csv"
        if pd.read_csv(file_path).empty:
            print(f"Warning: No data in {file_path}")
            continue

        # Load last epoch data
        last_epoch_data = pd.read_csv(file_path).iloc[-1]
        raw_score = last_epoch_data['evaluation/eval_return']

        # Calculate normalized score
        normalized_score = calculate_normalized_score(raw_score, score_random[game], score_expert[game])
        normalized_scores.append(normalized_score)

    if normalized_scores:
        return pd.Series(normalized_scores).agg(['mean', 'std'])
    else:
        return None


In [67]:
# Example usage
base_path = "~/msc-project/atari/output/atari_1/"

experiment_prefix = "dmamba_breakout"
game = "Breakout"
results = process_experiment_results(base_path, experiment_prefix, game)
results = results.round(2) # Round to 2 decimal places
print(f"Game: {game} \nModel: {experiment_prefix}")
print(results)
print()

experiment_prefix = "dtrans_breakout"
game = "Breakout"
results = process_experiment_results(base_path, experiment_prefix, game)
results = results.round(2) # Round to 2 decimal places
print(f"Game: {game} \nModel: {experiment_prefix}")
print(results)
print()

experiment_prefix = "dmamba_qbert"
game = "Qbert"
results = process_experiment_results(base_path, experiment_prefix, game)
results = results.round(2) # Round to 2 decimal places
print(f"Game: {game} \nModel: {experiment_prefix}")
print(results)
print()

experiment_prefix = "dtrans_qbert"
game = "Qbert"
results = process_experiment_results(base_path, experiment_prefix, game)
results = results.round(2) # Round to 2 decimal places
print(f"Game: {game} \nModel: {experiment_prefix}")
print(results)
print()


Game: Breakout 
Model: dmamba_breakout
mean    175.81
std      36.96
dtype: float64

Game: Breakout 
Model: dtrans_breakout
mean    195.25
std      36.98
dtype: float64

Game: Qbert 
Model: dmamba_qbert
mean    17.21
std     13.38
dtype: float64

Game: Qbert 
Model: dtrans_qbert
mean    3.78
std     2.02
dtype: float64

