In [1]:
import yaml
import pandas as pd
import pathlib
import os

# Create a table diff from two run yamls

Purpose: Quickly see how two arbitrary run yamls differ, by checking values against the same keys.

In [2]:
import yaml
import pandas as pd


def compare_yaml_dicts_to_dataframe(dict1_path: str, dict2_path: str, output_path: str) -> pd.DataFrame:
    """
    Compares two YAML run setup files and generates a DataFrame highlighting the differences.

    Args:
      dict1_path (str): Path to the first YAML file.
      dict2_path (str): Path to the second YAML file.

    Returns:
      pd.DataFrame: DataFrame containing the keys and values from both dictionaries, 
                   highlighting mismatched values between them.

    """

    # if pathlib.Path not passed, turn into one from string

    dict1_path = pathlib.Path(dict1_path) if type(
        dict1_path) != pathlib.PosixPath else dict1_path

    dict2_path = pathlib.Path(dict2_path) if type(
        dict2_path) != pathlib.PosixPath else dict2_path

    # we treat one run as a reference against which we compare a second, "subject" run

    run_1 = 'reference_run'
    run_2 = 'subject_run'

    # open the yamls

    with open(dict1_path) as f:
        dict1 = yaml.safe_load(f)
    with open(dict2_path) as f:
        dict2 = yaml.safe_load(f)

    # concat on axis-1 to get keys from both sides - whether subject or reference dict

    #all_keys = list(set(dict1.keys()) | dict2.keys())

    # get a record for each key, regardless of origin
    df = pd.concat([
        pd.Series(dict1),
        pd.Series(dict2)],
        keys=[run_1, run_2],
        axis=1)

    df.index = df.index.set_names('key')

    # a key may not be present on both subject and reference sides - we call those values NOT USED
    df.fillna('NOT USED', inplace=True)

    # filter where there is a difference in values for the same key
    df = df[df[run_1] != df[run_2]].reset_index()

    # write to csv
    output_file = f'setup_diff_{dict1_path.stem}_VS_{dict2_path.stem}.csv'.replace(
        'run_setup_', '')

    print(f'Writing file to {output_file}')
    df.to_csv(pathlib.Path(output_path, output_file))
    return df

## Set paths

In [5]:
m_drive = pathlib.Path(
    "/Volumes/Data/Models") if os.name != "nt" else pathlib.Path("M:")
home_dir = pathlib.Path.home()

In [6]:
# set a target output dir

output_path = pathlib.Path(m_drive,'urban_modeling' , 'baus' , 'PBA50Plus','diffs' )
output_path

PosixPath('/Volumes/Data/Models/urban_modeling/baus/PBA50Plus/diffs')

## Run some diffs


In [8]:
# A: paths as pathlib paths

yaml_1 = m_drive / 'urban_modeling' / 'baus' / 'PBA50Plus' / 'PBA50Plus_NoProject' / 'PBA50Plus_NoProject_v10_zn_znmod_upd' / 'run_setup_PBA50Plus_NoProject_v10_zn_znmod_upd.yaml'
yaml_2 = m_drive / 'urban_modeling' / 'baus' / 'PBA50Plus' / 'PBA50Plus_NoProject_v11' / 'run_setup_PBA50Plus_NoProject_v11.yaml'

df_differences = compare_yaml_dicts_to_dataframe(
    dict1_path=yaml_1, dict2_path=yaml_2, output_path=output_path)

Writing file to setup_diff_PBA50Plus_NoProject_v10_zn_znmod_upd_VS_PBA50Plus_NoProject_v11.csv


In [9]:
# B: paths as strings

#yaml_1 = '/Volumes/Data/Models/urban_modeling/baus/PBA50Plus/PBA50Plus_DraftBlueprint/PBA50Plus_Draft_Blueprint_v6/run_setup_PBA50Plus_Draft_Blueprint_v6.yaml'
yaml_1 = '/Volumes/Data/Models/urban_modeling/baus/PBA50Plus/PBA50Plus_DraftBlueprint/PBA50Plus_Draft_Blueprint_v8_znupd_nodevfix/run_setup_PBA50Plus_Draft_Blueprint_v8_znupd_nodevfix.yaml'
yaml_2 = '/Volumes/Data/Models/urban_modeling/baus/PBA50Plus/PBA50Plus_DraftBlueprint/PBA50Plus_Draft_Blueprint_v8_znupd_nodevfix_altseed_v2/run_setup_PBA50Plus_Draft_Blueprint_v8_znupd_nodevfix_altseed_v2.yaml'
df_differences = compare_yaml_dicts_to_dataframe(
    dict1_path=yaml_1, dict2_path=yaml_2, output_path=output_path)

Writing file to setup_diff_PBA50Plus_Draft_Blueprint_v8_znupd_nodevfix_VS_PBA50Plus_Draft_Blueprint_v8_znupd_nodevfix_altseed_v2.csv


In [10]:
df_differences

Unnamed: 0,key,reference_run,subject_run
0,run_name,PBA50Plus_Draft_Blueprint_v8_znupd_nodevfix,PBA50Plus_Draft_Blueprint_v8_znupd_nodevfix_al...
1,annotation,"[Deed restriction fix, H6 pipeline fix, TM dem...","[Deed restriction fix, H6 pipeline fix, TM dem..."
