In [None]:
import sys
sys.path.append("..")

In [None]:
from emeval.input.spec_details import FileSpecDetails

sd = FileSpecDetails("data/", "shankari@eecs.berkeley.edu", "sfba_trial_3")

In [None]:
from emeval.input.phone_view import PhoneView

pv = PhoneView(sd)

In [None]:
import pandas as pd
from typing import Dict, Any, Callable


def traverse_pv_map(node: Dict[str, Any], func: Callable[..., None]):
    """
    Helper function for recursively traversing a PhoneView map.
    """
    for key, val in node.items():
        if isinstance(val, list):
            for v in val:
                traverse_pv_map(v, func)
        elif isinstance(val, dict):
            traverse_pv_map(val, func)
        else:
            # Do something at a terminal node
            func(key, val)

    
def tabularize_pv_map(pv_map: Dict[str, Dict]) -> Dict[str, pd.DataFrame]:
    """
    Converts a PhoneView map into a dict of Pandas DataFrames
    with uniquely-identifiable (i.e. fully queryable) rows.
    """
    # keeps track of all pd.DataFrame objects found in pv_map
    df_map = dict()
    
    # keeps track of the "primary key"
    # i.e. the rows that will be prepended to each value in df_map
    pkey = set()  
    
    def populate_df_map_and_pkey(key, val):
        """
        Called by `traverse_pv_map` to find all Pandas DataFrames
        in the PhoneView map and generate the primary key.
        """
        if isinstance(val, pd.DataFrame):
            for col in val.columns:
                if col in pkey:
                    pkey.remove(col)
            if key not in df_map:
                print(f"found Pandas DataFrame at {key=} with {val.shape[0]} rows")
                df_map[key] = val
            else:
                df_map[key] = pd.concat([df_map[key], val], ignore_index=True)
                print(f"appended {val.shape[0]} rows to {key} which now has {df_map[key].shape[0]} rows")
        else:
            pkey.add(key)
    
    print("finding pd.DataFrame objects in PhoneView map and populating primary key...")
    print(10 * "-")
    traverse_pv_map(pv_map, populate_df_map_and_pkey)
    print(10 * "-")
    print(f"determined full primary key to be {pkey}")
    print(10 * "=")
    
    def prepend_pkey_to_dfs(key, val):
        """
        Called by `traverse_pv_map` to prepend the primary key and
        its corresponding values to each Pandas DataFrame in df_map.
        """
        if key in pkey:
            for label, df in df_map.items():
                if key not in df.columns:
                    df_map[label] = (
                        df.assign(
                            **{key: val}
                        )[[key] + [c for c in df.columns if c != key]]
                    )
                    print(f"added column {key} to {label}")
                    
    print("prepending primary key to each Pandas DataFrame...")
    print(10 * "-")
    traverse_pv_map(pv_map, prepend_pkey_to_dfs)
    
    # sanity check:
    # ensure every primary key element is a column in each df_map value
    for df in df_map.values():
        assert all(p in df.columns for p in pkey)
    
    return df_map
            
    
df_map = tabularize_pv_map(pv.map())

In [None]:
from IPython.display import display

for label, df in df_map.items():
    print(label)
    display(df)