# Examples of Using Pymdwizard library to batch-update XMl Metadata

* Change all instances of an author's name
* Change dates to reflect those stored in external spreadsheet
* Update project webpage
* Reset title to match that stored in "Citation" field

:boom:

In [1]:
import glob
import pandas as pd
from lxml import etree
import sys
import os
sys.path.append("C:/Users/ehbaker/Documents/Python/scriptsFromOthers/fort-pymdwizard") #path to pymdwizard library
from pymdwizard.core.xml_utils import XMLRecord, XMLNode

In [2]:
#folder with xml metadata
xml_fldr=r"Q:\Project Data\GlacierData\GPR\_ASC_RawDataRelease_StagingArea\_XMLMetadata"
os.chdir(xml_fldr)
#table with information to be added to xml
tbl_pth=r"Q:\Project Data\GlacierData\GPR\_ASC_RawDataRelease_StagingArea\_AuthorInfo\Authors_perGlacier.xlsx"
#Read in table
tbl=pd.read_excel(tbl_pth)

#Pattern of the XML files you wish to select
xml_pattern="*RawGPR.xml"
#List RawGPR xmls
xmls=glob.glob(xml_fldr+ "\\"+ xml_pattern)

In [3]:
tbl.head()

Unnamed: 0,Glacier,GroupedNames,Authors,Years,Notes
0,Wolverine,Wolverine,"McNeil, Sass, Babcock, McGrath, Candela, Baker...",2013-2016,
1,Valdez,Valdez,"Wolken, Arendt, Candela, Baker, Whorton, Gusme...",2013-2015,
2,"Susitna, West Fork, East Fork, Maclaren, and E...",Upper Susitna Basin Glaciers,"Wolken, Candela, Baker, Whorton, Gusmeroli",2012-2015,
3,Taku,Taku,"McNeil, Sass, Candela, Baker, O'Neel",2013-2015,
4,Surprise,Surprise,"Baker, Sass, Burgess",2014,


In [4]:
#Edit all dates, to reflect those in spreadsheet
for fl in xmls:
    print(fl)
    
    #Edit dates in the "Citation" section
    metd=XMLRecord(fl) #read in xml file
    og_txt=metd.metadata.idinfo.citation.citeinfo.othercit.text #store original citation text
    txt_keep=og_txt.split("Alaska, ")[0]+"Alaska, " #keep the first portion of the citation text
    glacier=og_txt.split("Data, ")[1].split(", Alaska")[0].split(" Glacier")[0]
    years=str(tbl["Years"][tbl.Glacier==glacier].values[0]) #Extract years in column, convert to string
    new_txt=txt_keep+years+"." #Citation with correct years.
    metd.metadata.idinfo.citation.citeinfo.othercit.text=new_txt #overwrite "othercit" citation with new dates

    #Edit dates in the "title" section
    new_title=metd.metadata.idinfo.citation.citeinfo.title.text.split("Alaska, ")[0]+years
    metd.metadata.idinfo.citation.citeinfo.title.text=new_title
    
    #edit beginning and end dates
    begdate=years.split("-")[0]+"0101"

    if len(years.split("-"))>1:
        enddate=years.split("-")[1]+"1231"
    else:
        enddate=years.split("-")[0]+"1231"   
    
    metd.metadata.idinfo.timeperd.timeinfo.rngdates.begdate.text=begdate
    metd.metadata.idinfo.timeperd.timeinfo.rngdates.enddate.text=enddate
    
    metd.save()

Q:\Project Data\GlacierData\GPR\_ASC_RawDataRelease_StagingArea\_XMLMetadata\EasternChugachGlaciersRawGPR.xml
Q:\Project Data\GlacierData\GPR\_ASC_RawDataRelease_StagingArea\_XMLMetadata\EklutnaGlacierRawGPR.xml
Q:\Project Data\GlacierData\GPR\_ASC_RawDataRelease_StagingArea\_XMLMetadata\GilkeyGlacierRawGPR.xml
Q:\Project Data\GlacierData\GPR\_ASC_RawDataRelease_StagingArea\_XMLMetadata\GulkanaGlacierRawGPR.xml
Q:\Project Data\GlacierData\GPR\_ASC_RawDataRelease_StagingArea\_XMLMetadata\KenaiGlaciersRawGPR.xml
Q:\Project Data\GlacierData\GPR\_ASC_RawDataRelease_StagingArea\_XMLMetadata\LakeGeorgeGlacierRawGPR.xml
Q:\Project Data\GlacierData\GPR\_ASC_RawDataRelease_StagingArea\_XMLMetadata\LemonCreekGlacierRawGPR.xml
Q:\Project Data\GlacierData\GPR\_ASC_RawDataRelease_StagingArea\_XMLMetadata\ScottGlacierRawGPR.xml
Q:\Project Data\GlacierData\GPR\_ASC_RawDataRelease_StagingArea\_XMLMetadata\SurpriseGlacierRawGPR.xml
Q:\Project Data\GlacierData\GPR\_ASC_RawDataRelease_StagingArea\_XMLMet

In [5]:
#Update project webpage
for fl in xmls:
    metd=XMLRecord(fl)
    metd.metadata.idinfo.citation.citeinfo.lworkcit.citeinfo.onlink.text="https://staging-www.usgs.gov/climate_landuse/clu_rd/glacierstudies/data.asp"
    metd.save()

In [6]:
#Edit Name for given person
replace_name="Erin Whorton"
new_name="Erin N. Whorton"
for fl in xmls:
    metd=XMLRecord(fl)
    for name in metd.metadata.idinfo.citation.citeinfo.origin:
        if name.text==replace_name: #replace old name with new name
            name.text=new_name
    metd.save()

In [7]:
#Update pubdate
pubdate="201707"
for fl in xmls:
    metd=XMLRecord(fl)
    metd.metadata.idinfo.citation.citeinfo.pubdate=pubdate
    metd.save()

In [8]:
#Update Long All-Author List:
all_author_path=r"Q:\Project Data\GlacierData\GPR\_ASC_RawDataRelease_StagingArea\_XMLMetadata\people.xml"
all_author=XMLNode(open(all_author_path, 'r').read()) #Must read in text file as string to convert to node

for fl in xmls:
    metd=XMLRecord(fl) #read xml
    metd.metadata.idinfo.citation.citeinfo.lworkcit.citeinfo.clear_children('origin') #Clear contents of current author list
    for author in all_author.people.origin: #add authors from xml list back in to the larger work cited 'origin' tag
        metd.metadata.idinfo.citation.citeinfo.lworkcit.citeinfo.add_child(author)
    metd.save()

In [9]:
#Reset title of dataset to match that in Citation
for fl in xmls:
    metd=XMLRecord(fl)
    title_from_citation=metd.metadata.idinfo.citation.citeinfo.othercit.text.split("2017, ")[1].split(".")[0]
    metd.metadata.idinfo.citation.citeinfo.title.text=title_from_citation
    metd.save()