In [1]:
from glob import glob
import xml.etree.ElementTree as ET
import pandas as pd
from IPython.display import display, Markdown

In [2]:
amps = glob("../dmd/**/f_amp2*.xml")
vmps = glob("../dmd/**/f_vmp2*.xml")
amps_parsed = {a:ET.parse(a).getroot() for a in amps}
vmps_parsed = {v:ET.parse(v).getroot() for v in vmps}

In [3]:
def parse_dmd_version(dmd_string):
    parts = dmd_string.split("_")
    major, minor, point = parts[2].split(".")
    return {
        "major":int(major),
        "minor":int(minor),
        "point":int(point),
        "datestamp":parts[3]
    }

def parse_mp_version(xmp_filename):
    return xmp_filename.replace(".xml","").split("_")[-1]

In [4]:
amp_versions = {a:
    {"v_dmd": parse_dmd_version(a.split("/")[-2]),
    "v_amp": parse_mp_version(a.split("/")[-1])
    } for a in amps}

vmp_versions = {v:
    {"v_dmd": parse_dmd_version(v.split("/")[-2]),
    "v_vmp": parse_mp_version(v.split("/")[-1])
    } for v in vmps}

In [5]:
amps_sorted = [a[0] for a in sorted(amp_versions.items(),key= lambda x: (x[1]['v_dmd']['major'],x[1]['v_dmd']['minor'],x[1]['v_dmd']['point']))]
vmps_sorted = [a[0] for a in sorted(vmp_versions.items(),key= lambda x: (x[1]['v_dmd']['major'],x[1]['v_dmd']['minor'],x[1]['v_dmd']['point']))]

In [6]:
amp_dfs = {a[0]: pd.DataFrame.from_dict([{e.tag :e.text for e in l} for l in a[1][0]]).set_index('APID') for a in amps_parsed.items()}

In [7]:
vmp_dfs = {v[0]: pd.DataFrame.from_dict([{e.tag :e.text for e in l} for l in v[1][0]]).set_index('VPID') for v in vmps_parsed.items()}

In [8]:
def sequential_changes(sorted_list,df_dict,pk):
    out_changes = {}
    for i in range(0,len(sorted_list)-1):
        changes = {}
        df1 = df_dict[sorted_list[i]]
        df2 = df_dict[sorted_list[i+1]]
        changes["new"] = df2[~df2.index.isin(df1.index)]
        changes["removed"] = df1[~df1.index.isin(df2.index)]
        df1=df1[df1.index.isin(df2.index)]
        df2=df2[df2.index.isin(df1.index)]
        ix_changed = pd.concat([df1,df2]).drop_duplicates(keep=False).index
        changes["changed"] = df1.loc[ix_changed].merge(df2.loc[ix_changed],left_index=True,right_index=True,suffixes=("_old","_new"))
        out_changes[sorted_list[i+1]] = changes
    return out_changes


In [9]:
amp_changes = sequential_changes(amps_sorted,amp_dfs,"APID")

In [10]:
vmp_changes = sequential_changes(vmps_sorted,vmp_dfs,"VPID")

In [11]:
for k,v in amp_changes.items():
    f = k.split('/')[-1]
    for c,df in v.items():
        df.to_csv(f"../changes/{f}.{c}.csv")

In [12]:
for k,v in vmp_changes.items():
    f = k.split('/')[-1]
    for c,df in v.items():
        df.to_csv(f"../changes/{f}.{c}.csv")

In [13]:
df_amp_versions = pd.DataFrame.from_dict([{"dmd_file":k}|v['v_dmd'] for k,v in amp_versions.items()])
df_amp_versions['datestamp'] = pd.to_datetime(df_amp_versions['datestamp'])
for c in ["new","removed","changed"]:
    df= pd.DataFrame([(a,len(v[c].index)) for a,v in amp_changes.items()],columns=["dmd_file",c]) 
    display(Markdown(f"## AMP changes: {c} items since previous"))
    display(df.merge(df_amp_versions,on="dmd_file")[["major","minor","point","datestamp",c]])

## AMP changes: new items since previous

Unnamed: 0,major,minor,point,datestamp,new
0,4,1,0,2022-04-11 00:00:01,0
1,4,2,0,2022-04-18 00:00:01,8
2,4,3,0,2022-04-25 00:00:01,16
3,5,0,0,2022-05-02 00:00:01,16
4,5,1,0,2022-05-09 00:00:01,37
5,5,2,0,2022-05-16 00:00:01,22
6,5,3,0,2022-05-23 00:00:01,29
7,5,4,0,2022-05-30 00:00:01,49
8,6,0,0,2022-06-06 00:00:01,144
9,6,1,0,2022-06-13 00:00:01,17


## AMP changes: removed items since previous

Unnamed: 0,major,minor,point,datestamp,removed
0,4,1,0,2022-04-11 00:00:01,0
1,4,2,0,2022-04-18 00:00:01,0
2,4,3,0,2022-04-25 00:00:01,0
3,5,0,0,2022-05-02 00:00:01,0
4,5,1,0,2022-05-09 00:00:01,0
5,5,2,0,2022-05-16 00:00:01,0
6,5,3,0,2022-05-23 00:00:01,0
7,5,4,0,2022-05-30 00:00:01,0
8,6,0,0,2022-06-06 00:00:01,0
9,6,1,0,2022-06-13 00:00:01,0


## AMP changes: changed items since previous

Unnamed: 0,major,minor,point,datestamp,changed
0,4,1,0,2022-04-11 00:00:01,0
1,4,2,0,2022-04-18 00:00:01,116
2,4,3,0,2022-04-25 00:00:01,84
3,5,0,0,2022-05-02 00:00:01,44
4,5,1,0,2022-05-09 00:00:01,3304
5,5,2,0,2022-05-16 00:00:01,272
6,5,3,0,2022-05-23 00:00:01,88
7,5,4,0,2022-05-30 00:00:01,168
8,6,0,0,2022-06-06 00:00:01,488
9,6,1,0,2022-06-13 00:00:01,188


In [14]:
df_vmp_versions = pd.DataFrame.from_dict([{"dmd_file":k}|v['v_dmd'] for k,v in vmp_versions.items()])
df_vmp_versions['datestamp'] = pd.to_datetime(df_vmp_versions['datestamp'])
for c,subc in [("new","new"),("new","changed VMP ID"),("removed",""),("changed","")]:
    items = vmp_changes.items()
    if c=="new":
        if subc == "new":
            l=[(a,len(v[c][v[c].VPIDPREV.isna()].index)) for a,v in items]
        if subc == "changed VMP ID":
            l=[(a,len(v[c][v[c].VPIDPREV.notna()].index)) for a,v in items]
    elif c=="removed":
        l = l=[(a,len(v[c][~v[c].index.isin(v["new"].VPIDPREV)].index)) for a,v in items]
    else :
        l = [(a,len(v[c].index)) for a,v in items]
    df = pd.DataFrame(l,columns=["dmd_file",c]) 
    df = df.merge(df_vmp_versions,on="dmd_file")[["major","minor","point","datestamp",c]]
    display(Markdown(f"## VMP changes: {subc if subc else c} items since previous"))
    display(df)

## VMP changes: new items since previous

Unnamed: 0,major,minor,point,datestamp,new
0,4,2,0,2022-04-18 00:00:01,3
1,4,3,0,2022-04-25 00:00:01,2
2,5,0,0,2022-05-02 00:00:01,4
3,5,1,0,2022-05-09 00:00:01,11
4,5,2,0,2022-05-16 00:00:01,1
5,5,3,0,2022-05-23 00:00:01,11
6,5,4,0,2022-05-30 00:00:01,5
7,6,0,0,2022-06-06 00:00:01,23
8,6,1,0,2022-06-13 00:00:01,4
9,6,2,0,2022-06-20 00:00:01,16


## VMP changes: changed VMP ID items since previous

Unnamed: 0,major,minor,point,datestamp,new
0,4,2,0,2022-04-18 00:00:01,0
1,4,3,0,2022-04-25 00:00:01,0
2,5,0,0,2022-05-02 00:00:01,0
3,5,1,0,2022-05-09 00:00:01,0
4,5,2,0,2022-05-16 00:00:01,0
5,5,3,0,2022-05-23 00:00:01,0
6,5,4,0,2022-05-30 00:00:01,0
7,6,0,0,2022-06-06 00:00:01,0
8,6,1,0,2022-06-13 00:00:01,0
9,6,2,0,2022-06-20 00:00:01,0


## VMP changes: removed items since previous

Unnamed: 0,major,minor,point,datestamp,removed
0,4,2,0,2022-04-18 00:00:01,0
1,4,3,0,2022-04-25 00:00:01,0
2,5,0,0,2022-05-02 00:00:01,0
3,5,1,0,2022-05-09 00:00:01,0
4,5,2,0,2022-05-16 00:00:01,0
5,5,3,0,2022-05-23 00:00:01,0
6,5,4,0,2022-05-30 00:00:01,0
7,6,0,0,2022-06-06 00:00:01,0
8,6,1,0,2022-06-13 00:00:01,0
9,6,2,0,2022-06-20 00:00:01,0


## VMP changes: changed items since previous

Unnamed: 0,major,minor,point,datestamp,changed
0,4,2,0,2022-04-18 00:00:01,16
1,4,3,0,2022-04-25 00:00:01,12
2,5,0,0,2022-05-02 00:00:01,52
3,5,1,0,2022-05-09 00:00:01,12
4,5,2,0,2022-05-16 00:00:01,56
5,5,3,0,2022-05-23 00:00:01,0
6,5,4,0,2022-05-30 00:00:01,40
7,6,0,0,2022-06-06 00:00:01,44
8,6,1,0,2022-06-13 00:00:01,12
9,6,2,0,2022-06-20 00:00:01,16
