In [2]:
import wandb
import pandas as pd
import os
from tqdm import tqdm

def download_wandb_logs(entity, project, output_dir="wandb_logs"):
    """
    Download all logs from a W&B project.
    
    Args:
        entity (str): W&B entity/username
        project (str): W&B project name
        output_dir (str): Directory to save the logs
    """
    # Initialize W&B API
    api = wandb.Api()
    
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    # Get all runs from the project
    runs = api.runs(f"{entity}/{project}")
    
    print(f"Found {len(runs)} runs in project {project}")
    
    for run in tqdm(runs):
        # Create a directory for each run
        run_dir = os.path.join(output_dir, run.name)
        os.makedirs(run_dir, exist_ok=True)
        
        # Download run history as CSV
        history_df = pd.DataFrame(run.history())
        history_df.to_csv(os.path.join(run_dir, "metrics.csv"), index=False)
        
        # Download config as JSON
        config_df = pd.DataFrame([run.config])
        config_df.to_json(os.path.join(run_dir, "config.json"), orient="records")
        
        # Download any files stored in the run
        for file in run.files():
            try:
                file.download(root=run_dir)
            except Exception as e:
                print(f"Error downloading {file.name} from run {run.name}: {e}")
        
        # Save summary metrics
        summary_df = pd.DataFrame([run.summary._json_dict])
        summary_df.to_csv(os.path.join(run_dir, "summary.csv"), index=False)

if __name__ == "__main__":
    # Example usage
    entity = "hmludwig"
    project = "species-2024-hao-final-3"
    
    download_wandb_logs(entity, project)

Found 27 runs in project species-2024-hao-final-3


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 27/27 [02:08<00:00,  4.76s/it]


In [9]:
import os
import yaml
import shutil
from collections import defaultdict
from pathlib import Path

def get_region_from_heightmap(heightmap_path):
    """Extract region name from heightmap path."""
    for region in ['austria', 'italia', 'slovenia']:
        if region in heightmap_path.lower():
            return region
    return None

def organize_logs_by_pairs(base_dir="wandb_logs"):
    """
    Organize logs by algo-heightmap pairs and copy output.log files with appropriate names.
    """
    # Dictionary to store runs by algo-heightmap pairs
    pairs_dict = defaultdict(list)
    
    # Process each run directory
    for run_dir in os.listdir(base_dir):
        run_path = os.path.join(base_dir, run_dir)
        
        # Skip if not a directory
        if not os.path.isdir(run_path):
            continue
            
        config_path = os.path.join(run_path, "config.yaml")
        if not os.path.exists(config_path):
            print(f"Warning: No config.yaml found in {run_dir}")
            continue
            
        # Read config file
        with open(config_path, 'r') as f:
            try:
                config = yaml.safe_load(f)
                # Extract heightmap and algo values, handling nested structure
                heightmap = config.get('heightmap', {}).get('value', '')
                algo = config.get('algo', {}).get('value', '')
                
                if heightmap and algo:
                    # Get the region from heightmap path
                    region = get_region_from_heightmap(heightmap)
                    if not region:
                        print(f"Warning: Could not determine region from heightmap {heightmap}")
                        continue
                        
                    pair_key = f"{algo}_{region}"
                    output_log = os.path.join(run_path, "output.log")
                    
                    if os.path.exists(output_log):
                        pairs_dict[pair_key].append((run_dir, output_log, region))
                    else:
                        print(f"Warning: No output.log found in {run_dir}")
                else:
                    print(f"Warning: Missing heightmap or algo in config for {run_dir}")
                    
            except yaml.YAMLError as e:
                print(f"Error reading config.yaml in {run_dir}: {e}")
                continue
    
    # Create directories for each pair and copy files
    for pair_key, run_files in pairs_dict.items():
        # Create pair directory
        pair_dir = os.path.join(base_dir, pair_key)
        os.makedirs(pair_dir, exist_ok=True)
        
        print(f"\nProcessing {pair_key} - {len(run_files)} runs found")
        
        # Copy output.log files with region-based names
        for i, (run_name, log_path, region) in enumerate(run_files):
            new_name = f"output_{region}_{i+1}.log"
            dest_path = os.path.join(pair_dir, new_name)
            
            try:
                shutil.copy2(log_path, dest_path)
                print(f"Copied {run_name} -> {new_name}")
            except Exception as e:
                print(f"Error copying {log_path}: {e}")
        
        if len(run_files) != 3:
            print(f"Warning: {pair_key} has {len(run_files)} runs (expected 3)")

if __name__ == "__main__":
    organize_logs_by_pairs()


Processing PSO_austria - 3 runs found
Copied divine-sea-2 -> output_austria_1.log
Copied silver-sponge-20 -> output_austria_2.log
Copied fluent-surf-11 -> output_austria_3.log

Processing CMAES_italia - 3 runs found
Copied trim-haze-7 -> output_italia_1.log
Copied ethereal-cloud-25 -> output_italia_2.log
Copied decent-wind-16 -> output_italia_3.log

Processing DE_italia - 3 runs found
Copied peachy-sky-27 -> output_italia_1.log
Copied rural-bird-9 -> output_italia_2.log
Copied woven-snowflake-18 -> output_italia_3.log

Processing CMAES_slovenia - 3 runs found
Copied earnest-river-4 -> output_slovenia_1.log
Copied azure-dew-22 -> output_slovenia_2.log
Copied breezy-dust-13 -> output_slovenia_3.log

Processing DE_austria - 3 runs found
Copied resilient-mountain-12 -> output_austria_1.log
Copied azure-dew-3 -> output_austria_2.log
Copied sleek-cloud-21 -> output_austria_3.log

Processing PSO_italia - 3 runs found
Copied graceful-darkness-26 -> output_italia_1.log
Copied graceful-dust-8 -

In [10]:
import os
import re
import csv
from pathlib import Path

def extract_cmaes_table(log_content):
    """Extract the CMAES table from log content and return as list of rows."""
    # Find the table header
    header_match = re.search(r'====+\n(n_gen.*?)\n====+\n', log_content, re.DOTALL)
    if not header_match:
        return None, None
    
    header = header_match.group(1).strip()
    headers = [h.strip() for h in header.split('|')]
    
    # Find all table rows (looking for lines with numbers and pipe symbols)
    table_pattern = r'\s*\d+\s*\|\s*\d+\s*\|.*?\n'
    rows = re.findall(table_pattern, log_content)
    
    if not rows:
        return None, None
        
    # Process each row
    processed_rows = []
    for row in rows:
        # Split by pipe and strip whitespace
        values = [cell.strip() for cell in row.split('|')]
        processed_rows.append(values)
        
    return headers, processed_rows

def process_logs(base_dir="wandb_logs"):
    """Process all log files and extract CMAES tables to CSVs."""
    for root, dirs, files in os.walk(base_dir):
        for file in files:
            if file.startswith('output_') and file.endswith('.log'):
                log_path = os.path.join(root, file)
                csv_path = log_path.rsplit('.', 1)[0] + '.csv'
                
                try:
                    with open(log_path, 'r') as f:
                        content = f.read()
                        
                    headers, rows = extract_cmaes_table(content)
                    
                    if headers and rows:
                        print(f"Extracting table from {log_path} to {csv_path}")
                        with open(csv_path, 'w', newline='') as f:
                            writer = csv.writer(f)
                            writer.writerow(headers)
                            writer.writerows(rows)
                    else:
                        print(f"No CMAES table found in {log_path}")
                        
                except Exception as e:
                    print(f"Error processing {log_path}: {e}")

if __name__ == "__main__":
    process_logs()

Extracting table from wandb_logs/PSO_slovenia/output_slovenia_2.log to wandb_logs/PSO_slovenia/output_slovenia_2.csv
Extracting table from wandb_logs/PSO_slovenia/output_slovenia_1.log to wandb_logs/PSO_slovenia/output_slovenia_1.csv
Extracting table from wandb_logs/PSO_slovenia/output_slovenia_3.log to wandb_logs/PSO_slovenia/output_slovenia_3.csv
Extracting table from wandb_logs/DE_slovenia/output_slovenia_2.log to wandb_logs/DE_slovenia/output_slovenia_2.csv
Extracting table from wandb_logs/DE_slovenia/output_slovenia_1.log to wandb_logs/DE_slovenia/output_slovenia_1.csv
Extracting table from wandb_logs/DE_slovenia/output_slovenia_3.log to wandb_logs/DE_slovenia/output_slovenia_3.csv
Extracting table from wandb_logs/PSO_italia/output_italia_3.log to wandb_logs/PSO_italia/output_italia_3.csv
Extracting table from wandb_logs/PSO_italia/output_italia_2.log to wandb_logs/PSO_italia/output_italia_2.csv
Extracting table from wandb_logs/PSO_italia/output_italia_1.log to wandb_logs/PSO_ital

In [11]:
import os
import pandas as pd
import glob
from pathlib import Path

def combine_metrics(base_dir="wandb_logs"):
    """
    Combine f_min and f_avg from all CSV files in each algo_map directory.
    Creates a summary CSV with all runs side by side.
    """
    # Walk through the base directory
    for root, dirs, files in os.walk(base_dir):
        csv_files = [f for f in files if f.endswith('.csv') and f.startswith('output_')]
        
        # Skip if no CSV files found
        if not csv_files:
            continue
            
        # Get algo and map from directory name
        dir_name = os.path.basename(root)
        if '_' not in dir_name:  # Skip if not an algo_map directory
            continue
            
        print(f"\nProcessing directory: {dir_name}")
        
        # Initialize DataFrames dictionary for this directory
        runs_data = {}
        
        # Process each CSV file
        for csv_file in sorted(csv_files):
            try:
                # Read the CSV
                df = pd.read_csv(os.path.join(root, csv_file))
                
                # Extract run number from filename
                run_num = csv_file.split('_')[-1].replace('.csv', '')
                
                # Store f_min and f_avg with run number
                runs_data[f'f_min_run_{run_num}'] = df['f_min'].values
                runs_data[f'f_avg_run_{run_num}'] = df['f_avg'].values
                
            except Exception as e:
                print(f"Error processing {csv_file}: {e}")
                continue
        
        if runs_data:
            # Create combined DataFrame
            combined_df = pd.DataFrame(runs_data)
            
            # Add generation number
            combined_df.insert(0, 'generation', range(1, len(combined_df) + 1))
            
            # Save combined data
            output_file = os.path.join(root, f'{dir_name}_combined.csv')
            combined_df.to_csv(output_file, index=False)
            print(f"Created combined file: {output_file}")
            
            # Calculate and save statistics
            stats_df = pd.DataFrame()
            stats_df['generation'] = combined_df['generation']
            
            # Calculate mean and std for f_min and f_avg across runs
            f_min_cols = [col for col in combined_df.columns if col.startswith('f_min')]
            f_avg_cols = [col for col in combined_df.columns if col.startswith('f_avg')]
            
            stats_df['f_min_mean'] = combined_df[f_min_cols].mean(axis=1)
            stats_df['f_min_std'] = combined_df[f_min_cols].std(axis=1)
            stats_df['f_avg_mean'] = combined_df[f_avg_cols].mean(axis=1)
            stats_df['f_avg_std'] = combined_df[f_avg_cols].std(axis=1)
            
            # Save statistics
            stats_file = os.path.join(root, f'{dir_name}_stats.csv')
            stats_df.to_csv(stats_file, index=False)
            print(f"Created statistics file: {stats_file}")

if __name__ == "__main__":
    combine_metrics()


Processing directory: PSO_slovenia
Created combined file: wandb_logs/PSO_slovenia/PSO_slovenia_combined.csv
Created statistics file: wandb_logs/PSO_slovenia/PSO_slovenia_stats.csv

Processing directory: DE_slovenia
Created combined file: wandb_logs/DE_slovenia/DE_slovenia_combined.csv
Created statistics file: wandb_logs/DE_slovenia/DE_slovenia_stats.csv

Processing directory: PSO_italia
Created combined file: wandb_logs/PSO_italia/PSO_italia_combined.csv
Created statistics file: wandb_logs/PSO_italia/PSO_italia_stats.csv

Processing directory: DE_italia
Created combined file: wandb_logs/DE_italia/DE_italia_combined.csv
Created statistics file: wandb_logs/DE_italia/DE_italia_stats.csv

Processing directory: CMAES_slovenia
Created combined file: wandb_logs/CMAES_slovenia/CMAES_slovenia_combined.csv
Created statistics file: wandb_logs/CMAES_slovenia/CMAES_slovenia_stats.csv

Processing directory: DE_austria
Created combined file: wandb_logs/DE_austria/DE_austria_combined.csv
Created stat

In [12]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import seaborn as sns

def create_performance_plots(base_dir="wandb_logs"):
    """
    Create combined plots for f_avg and f_min, using consistent colors for each algorithm
    across all maps.
    """
    # Set style for publication-quality plots
    plt.style.use('seaborn-v0_8-paper')
    sns.set_palette("deep")
    plt.rcParams.update({
        'font.size': 11,
        'font.family': 'serif',
        'axes.labelsize': 12,
        'axes.titlesize': 12,
        'xtick.labelsize': 10,
        'ytick.labelsize': 10,
        'legend.fontsize': 10,
        'figure.figsize': (8, 5),
        'figure.dpi': 300,
        'lines.linewidth': 2
    })

    # First pass: collect all unique algorithms
    all_algorithms = set()
    map_data = {}
    
    for root, _, files in os.walk(base_dir):
        # Skip .ipynb_checkpoints directories
        if '.ipynb_checkpoints' in root:
            continue
            
        for file in files:
            if file.endswith('_stats.csv'):
                algo_map = os.path.basename(root)
                if '_' not in algo_map:
                    continue
                
                # Split on last underscore to handle algorithms with underscores in their names
                algo = '_'.join(algo_map.split('_')[:-1])
                map_name = algo_map.split('_')[-1]
                
                if map_name not in map_data:
                    map_data[map_name] = {}
                
                # Read stats file
                df = pd.read_csv(os.path.join(root, file))
                map_data[map_name][algo] = df
                all_algorithms.add(algo)

    # Create consistent color mapping
    algorithms = sorted(list(all_algorithms))  # Sort for consistency
    colors = sns.color_palette("deep", n_colors=len(algorithms))
    algo_colors = dict(zip(algorithms, colors))

    # Create combined plots for each map
    for map_name, algo_data in map_data.items():
        # Create output directory
        output_dir = os.path.join(base_dir, 'plots')
        os.makedirs(output_dir, exist_ok=True)
        
        # Create combined plot
        plt.figure(figsize=(8, 5))
        
        for algo, data in sorted(algo_data.items()):  # Sort algorithms for consistent legend order
            generations = data['generation']
            color = algo_colors[algo]
            
            # Plot f_min with solid line
            plt.plot(generations, data['f_min_mean'], 
                    label=f'{algo} (Best)',
                    color=color,
                    linewidth=2)
            plt.fill_between(generations,
                           data['f_min_mean'] - data['f_min_std'],
                           data['f_min_mean'] + data['f_min_std'],
                           color=color,
                           alpha=0.1)
            
            # Plot f_avg with dashed line
            plt.plot(generations, data['f_avg_mean'], 
                    label=f'{algo} (Avg)',
                    color=color,
                    linestyle='--',
                    linewidth=1.5)
            plt.fill_between(generations,
                           data['f_avg_mean'] - data['f_avg_std'],
                           data['f_avg_mean'] + data['f_avg_std'],
                           color=color,
                           alpha=0.1)
        
        plt.xlabel('Generation')
        plt.ylabel('Fitness')
        if map_name == "italia":
            map_name = "italy"
        plt.title(f'{map_name.capitalize()}')
        plt.grid(True, linestyle='--', alpha=0.7)
        
        # Adjust legend to be more compact
        plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
        plt.tight_layout()
        
        # Save combined plot in both formats
        plt.savefig(os.path.join(output_dir, f'combined_fitness_{map_name}.png'), 
                   bbox_inches='tight', 
                   dpi=300)
        plt.savefig(os.path.join(output_dir, f'combined_fitness_{map_name}.pdf'), 
                   bbox_inches='tight', 
                   format='pdf')
        plt.close()
        
        print(f"Created combined performance plot for {map_name}")

if __name__ == "__main__":
    create_performance_plots()

Created combined performance plot for slovenia
Created combined performance plot for italy
Created combined performance plot for austria


In [13]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import seaborn as sns

def create_combined_performance_plot(base_dir="wandb_logs"):
    """
    Create a single figure with 1x3 subplots for all maps, sharing y-axis and having a single legend.
    """
    # Set style for publication-quality plots
    plt.style.use('seaborn-v0_8-paper')
    sns.set_palette("deep")
    plt.rcParams.update({
        'font.size': 11,
        'font.family': 'serif',
        'axes.labelsize': 12,
        'axes.titlesize': 12,
        'xtick.labelsize': 10,
        'ytick.labelsize': 10,
        'legend.fontsize': 10,
    })

    # First pass: collect all unique algorithms and data
    all_algorithms = set()
    map_data = {}
    
    for root, _, files in os.walk(base_dir):
        # Skip .ipynb_checkpoints directories
        if '.ipynb_checkpoints' in root:
            continue
            
        for file in files:
            if file.endswith('_stats.csv'):
                algo_map = os.path.basename(root)
                if '_' not in algo_map:
                    continue
                
                algo = '_'.join(algo_map.split('_')[:-1])
                map_name = algo_map.split('_')[-1]
                
                if map_name not in map_data:
                    map_data[map_name] = {}
                
                # Read stats file
                df = pd.read_csv(os.path.join(root, file))
                map_data[map_name][algo] = df
                all_algorithms.add(algo)

    # Create color mapping
    algorithms = sorted(list(all_algorithms))
    colors = sns.color_palette("deep", n_colors=len(algorithms))
    algo_colors = dict(zip(algorithms, colors))

    # Create figure with subplots
    fig, axes = plt.subplots(1, 3, figsize=(15, 5), sharey=False)  # Changed sharey to False
    fig.subplots_adjust(wspace=0.15)  # Adjusted spacing between subplots

    # Define the order of maps
    map_order = ['slovenia', 'austria', 'italia']
    
    # Store handles and labels for legend
    legend_handles = []
    legend_labels = []

    # Plot each map
    for idx, map_name in enumerate(map_order):
        if map_name not in map_data:
            continue
            
        ax = axes[idx]
        
        for algo, data in sorted(map_data[map_name].items()):
            generations = data['generation']
            color = algo_colors[algo]
            
            # Plot f_min with solid line
            line_min = ax.plot(generations, data['f_min_mean'],
                             color=color,
                             linewidth=2)
            ax.fill_between(generations,
                          data['f_min_mean'] - data['f_min_std'],
                          data['f_min_mean'] + data['f_min_std'],
                          color=color,
                          alpha=0.1)
            
            # Plot f_avg with dashed line
            line_avg = ax.plot(generations, data['f_avg_mean'],
                             color=color,
                             linestyle='--',
                             linewidth=1.5)
            ax.fill_between(generations,
                          data['f_avg_mean'] - data['f_avg_std'],
                          data['f_avg_mean'] + data['f_avg_std'],
                          color=color,
                          alpha=0.1)
            
            # Add to legend only for the first subplot
            if idx == 0:
                legend_handles.extend([line_min[0], line_avg[0]])
                legend_labels.extend([f'{algo} (Best)', f'{algo} (Avg)'])

        ax.set_xlabel('Generation')
        if idx == 0:
            ax.set_ylabel('Fitness')
        if map_name == "italia":
            map_name = "italy"
        ax.set_title(f'{map_name.capitalize()}')
        ax.grid(True, linestyle='--', alpha=0.7)

    # Add single legend to the right of the plots
    fig.legend(legend_handles, legend_labels,
              bbox_to_anchor=(1.02, 0.5),
              loc='center left',
              borderaxespad=0)

    # Adjust layout to prevent legend overlap
    plt.tight_layout()
    
    # Create output directory and save plots
    output_dir = os.path.join(base_dir, 'plots')
    os.makedirs(output_dir, exist_ok=True)
    
    plt.savefig(os.path.join(output_dir, 'combined_maps.png'),
                bbox_inches='tight',
                dpi=300)
    plt.savefig(os.path.join(output_dir, 'combined_maps.pdf'),
                bbox_inches='tight',
                format='pdf')
    plt.close()
    
    print("Created combined plot for all maps")

if __name__ == "__main__":
    create_combined_performance_plot()

Created combined plot for all maps
