In [None]:
import pandas as pd
import functools

## Fixing associated filenames from BatchExport

Assuming that the Filename column of the batch exported AnnulusContourPoints.csv is using the name of the mrb file. 

We have a naming convention for mrb files: `{redcap identifier}-{echo number}_suffix.mrb` (e.g. `14800-00003-01_CFP_Validated_AC`)

We want to remove the suffix here, so we will just split by `_` and use the first element.

In [None]:
points_in_csv = "/Volumes/GoogleDrive/Shared drives/Shape-Kitware-PERK/Data/HLHS/Comprehensive_Fontan_Data/AnnulusContourPoints_HLHS_orig.csv"
demo_in_csv = "/Volumes/GoogleDrive/Shared drives/Shape-Kitware-PERK/Data/HLHS/Comprehensive_Fontan_Data/Demographics_NoPHI_orig.csv"

points_df = pd.read_csv(points_in_csv)
demo_df = pd.read_csv(demo_in_csv)

# output into local jupyter notebook location
points_out_csv = "AnnulusContourPoints.csv"
demo_out_csv = "Demographics.csv"

In [None]:
# get an idea of how the Filename column looks
points_df.head()

In [None]:
# splitting Filename from the AnnulusContourPoints by _ and only using the first element
points_df["Filename"] = points_df["Filename"].apply(lambda s: s.split("_")[0])
points_df.head()

In [None]:
# get an idea of how the demographics look like and identify the Filename column
# noticing that the Filename within the demographics is just the redcap identifier
demo_df.head()

In [None]:
l1 = sorted(points_df["Filename"].unique())
l2 = sorted(demo_df["Filename"].unique())

In [None]:
if functools.reduce(lambda x, y : x == y, map(lambda p, q: p == q,l1,l2), True): 
    print ("The lists l1 and l2 are the same") 
else: 
    print ("The lists l1 and l2 are not the same") 

In [None]:
points_df.to_csv(points_out_csv, index=False)

## Replacing TR severity with numerical information

In [None]:
group_colum_name = "TR_Severity"
groups = {
    'Trivial': 0,
    'Mild': 1,
    'Moderate': 2,
    'Severe': 3,
}
assert len(demo_df["TR_Severity"].unique()) == len(groups)
demo_df[group_colum_name] = [groups[val] for val in demo_df[group_colum_name].tolist()]

In [None]:
# cross check TR severity column for numerical values
demo_df.head()

In [None]:
demo_df.to_csv(demo_out_csv, index=False)