In [15]:
from glob import glob
import xml.etree.ElementTree as ET
import pandas as pd

In [2]:
amps = glob("../dmd/**/f_amp2*.xml")
vmps = glob("../dmd/**/f_vmp2*.xml")
amps_parsed = {a:ET.parse(a).getroot() for a in amps}
vmps_parsed = {v:ET.parse(v).getroot() for v in vmps}

In [3]:
def parse_dmd_version(dmd_string):
    parts = dmd_string.split("_")
    major, minor, point = parts[2].split(".")
    return {
        "major":int(major),
        "minor":int(minor),
        "point":int(point),
        "datestamp":parts[3]
    }

def parse_mp_version(xmp_filename):
    return xmp_filename.replace(".xml","").split("_")[-1]

In [4]:
amp_versions = {a:
    {"v_dmd": parse_dmd_version(a.split("/")[-2]),
    "v_amp": parse_mp_version(a.split("/")[-1])
    } for a in amps}

vmp_versions = {v:
    {"v_dmd": parse_dmd_version(v.split("/")[-2]),
    "v_vmp": parse_mp_version(v.split("/")[-1])
    } for v in vmps}

In [5]:
amps_sorted = [a[0] for a in sorted(amp_versions.items(),key= lambda x: (x[1]['v_dmd']['major'],x[1]['v_dmd']['minor'],x[1]['v_dmd']['point']))]
vmps_sorted = [a[0] for a in sorted(vmp_versions.items(),key= lambda x: (x[1]['v_dmd']['major'],x[1]['v_dmd']['minor'],x[1]['v_dmd']['point']))]

In [23]:
amp_dfs = {a[0]: pd.DataFrame.from_dict([{e.tag :e.text for e in l} for l in a[1][0]]).set_index('APID') for a in amps_parsed.items()}

In [25]:
vmp_dfs = {v[0]: pd.DataFrame.from_dict([{e.tag :e.text for e in l} for l in v[1][0]]).set_index('VPID') for v in vmps_parsed.items()}

In [80]:
def sequential_changes(sorted_list,df_dict,pk):
    out_changes = {}
    for i in range(0,len(sorted_list)-1):
        changes = {}
        df1 = df_dict[sorted_list[i]]
        df2 = df_dict[sorted_list[i+1]]
        changes["new"] = df2[~df2.index.isin(df1.index)]
        changes["removed"] = df1[~df1.index.isin(df2.index)]
        df1=df1[df1.index.isin(df2.index)]
        df2=df2[df2.index.isin(df1.index)]
        ix_changed = pd.concat([df1,df2]).drop_duplicates(keep=False).index
        changes["changed"] = df1.loc[ix_changed].merge(df2.loc[ix_changed],left_index=True,right_index=True,suffixes=("_old","_new"))
        out_changes[sorted_list[i+1]] = changes
    return out_changes


In [81]:
amp_changes = sequential_changes(amps_sorted,amp_dfs,"APID")

In [82]:
vmp_changes = sequential_changes(vmps_sorted,vmp_dfs,"VPID")