In [13]:
import sys, os, subprocess, argparse, time
import json as json
import xml.etree.ElementTree as ET
import xml.dom.minidom as mindom
from xml.sax.saxutils import unescape
#json and xml modules are already standard at python >2.6
#Sangbaek Lee, Jan. 30 2019
#keep every digit in epochtime in the stamps
#do not use 3rd party libraries if possible e.g. pip

In [14]:
#rss follows xml format with very specific tags: title, link, description(required) + some optional tags (e.g. media, pubDate, guid, url,...)
#
class rss_object:
    def __init__(self, tag_main, title='Test',link='https://clas12mon.jlab.org/status/decoding/',description='description',pubDate=None,guid=None,url=None):
        self.type = "rss_object"
        self.tag_main= tag_main
        self.tag_title= ET.SubElement(self.tag_main,"title")
        self.tag_title.text = title
        self.tag_link = ET.SubElement(self.tag_main,"link")
        self.tag_link.text = link
        self.tag_description = ET.SubElement(self.tag_main,"description")
        self.tag_description.text = unescape(description)
        if pubDate is not None:
            self.tag_pubDate= ET.SubElement(self.tag_main,"pubDate")
            self.tag_pubDate.text = pubDate
        if guid is not None:
            self.tag_guid= ET.SubElement(self.tag_main,"guid")
            self.tag_guid.text = guid
        if url is not None:
            self.tag_url= ET.SubElement(self.tag_main,"url")
            self.tag_url.text = url

In [15]:
#Classes for making the very first rss file.
#Initial Setup of essential components for an RSS feed.
#Call it rss main
class rss_main(rss_object):
    def __init__(self, title="Test: converting a json to an rss file",link='https://clas12mon.jlab.org/status/decoding/',description='main description',save=False,fileout='out.xml'):
        self.type = "rss_main"
        self.tag_namespace  = ET.Element('rss')
        self.tag_namespace.set("version","2.0")
        self.tag_channel = ET.SubElement(self.tag_namespace,"channel")
        #sub objects under channel
        rss_object.__init__(self,self.tag_channel, title,link,description)
        if fileout is None:
            self.fileout = fileout
        if save is True:
            self.savexml(fileout)
#a simple ftn to write rss file    
    def savexml(self,fileout=None):
        if fileout is None:
            fileout=self.fileout
        self.dom=mindom.parseString(ET.tostring(self.tag_namespace))
        self.prettyprint=self.dom.toprettyxml()
        self.prettyprint= self.prettyprint.replace("&quot;","\"")
        outfile = open(fileout,"w")
        outfile.write(unescape(self.prettyprint))
        outfile.close()
        
# add rss object
    def add_item(self, title='item_title',link='https://clas12mon.jlab.org/status/decoding/',description='item_description',pubDate=None,guid=None,url=None,save=False, fileout='out.xml'):
        self.tag_item = ET.SubElement(self.tag_channel,"item")
        self.item = rss_object( self.tag_item,title,link,description, pubDate, guid, url)    
        self.dom=mindom.parseString(ET.tostring(self.tag_namespace))
        self.prettyprint=self.dom.toprettyxml()
        if save is True:
            savexml(fileout)     

In [16]:
def nodoublelinebreak(string):
    target_string=['\n\n','\n\t\n\t','\n\t\t\n\t\t','\n\t\t\t\n\t\t\t']#,'\n\t\t\t\t\n\t\t\t\t']
#     desired_string = ['\n','\n\t','\n\t\t','\n\t\t\t']#,'\n\t\t\t\t']
    for i in range(0,len(target_string)):
        while string.find(target_string[i]) >= 0:
            string = string[:string.find(target_string[i])]+string[string.find(target_string[i])+i+1:]
    return string

In [17]:
#Modify an rss file existing and add some items
class rss_modify(rss_object):
    def __init__(self, title='item_title',link='https://clas12mon.jlab.org/status/decoding/',description='item_description',pubDate=None,guid=None,url=None,save=False, item_add = False, filein = 'in.xml', fileout=None):
        self.type = "rss_modify"
        self.tree = ET.parse(filein)
        self.tag_namespace = self.tree.getroot()
        self.tag_channel = self.tag_namespace[0]
        self.filein = filein
        self.fileout = fileout
#         self.dom=mindom.parseString(ET.tostring(self.tag_namespace))
        self.prettyprint=ET.tostring(self.tag_namespace)
        if item_add is True:
            self.tag_item = ET.SubElement(self.tag_channel,"item")
            self.item = rss_object( self.tag_item,title,link,description, pubDate, guid, url)    
            self.add_item(title,link,description,pubDate,guid,url)
        if fileout is None:
            self.fileout = self.filein
        if save is True:
            self.savexml(self.fileout)
        
    def savexml(self,fileout=None):
        if fileout is None:
            fileout=self.fileout
        outfile = open(fileout,"w")
        self.prettyprint = nodoublelinebreak(str(self.prettyprint))
        if self.prettyprint.find('\n\t\n</rss>')>0:
            self.prettyprint=self.prettyprint[:self.prettyprint.find('\n\t\n</rss>')]+self.prettyprint[self.prettyprint.find('\n\t\n</rss>')+2:]
        self.prettyprint= self.prettyprint.replace("&quot;","\"")
        self.prettyprint=self.prettyprint.replace('&lt;![CDATA[','')
        self.prettyprint=self.prettyprint.replace(']]&gt;','')
        self.prettyprint=self.prettyprint.replace("&lt;TABLE", "<![CDATA[<TABLE")
        self.prettyprint=self.prettyprint.replace("&lt;/TABLE&gt;", "</TABLE>]]>")
        outfile.write(unescape(self.prettyprint))
        outfile.close()
        
    def add_item(self, title='item_title',link='https://clas12mon.jlab.org/status/decoding/',description='item_description',pubDate=None,guid=None,url=None,save=False, fileout=None):
        self.tag_item = ET.SubElement(self.tag_channel,"item")
        self.item = rss_object( self.tag_item,title,link,description, pubDate, guid, url)    
        self.dom=mindom.parseString(ET.tostring(self.tag_namespace))
        self.prettyprint=self.dom.toprettyxml()
        self.prettyprint = nodoublelinebreak(str(self.prettyprint))
        if self.prettyprint.find('\n\t\n</rss>')>0:
            self.prettyprint=self.prettyprint[:self.prettyprint.find('\n\t\n</rss>')]+self.prettyprint[self.prettyprint.find('\n\t\n</rss>')+2:]
        if fileout is None:
            fileout=self.fileout
        if save is True:
            savexml(fileout)

In [18]:
#jsonfilename= 'convert_this_to_XML.json'
jsonfilename='rga-decode1_R3432x25_x1300.json'
jsonfile = open(jsonfilename,'r')

In [19]:
# validate/parse a json file 
def json_data(myjsonfile):
  try:
    json_data = json.load(myjsonfile)
  except ValueError, e:
    return False
  return json_data

In [20]:
data = json_data(jsonfile)
jsonfile.close()
if data is False:
    print 'The input file is not a proper json configuration. Check the input file.'
    exit()
data=data[0]

In [21]:
#<?xml-stylesheet type="text/css/" href="clas12_workflow_style.css" ?>
rss_description = '<![CDATA[<TABLE border style=\"border-collapse: collapse;\">'
rss_description += '<TR><TH>workflow name</TH><TH>jobs</TH>\
<TH>succeeded</TH><TH>success</TH><TH>attempts</TH>\
<TH>phase</TH><TH>dispatched</TH><TH>depend</TH>\
<TH>active</TH><TH>update date</TH><TH>current date</TH></TR><TR>'

In [22]:
specific_keys = ['workflow_name','jobs','succeeded','success','attempts','phase','dispatched','auger_depend','auger_active','update_ts','current_ts']
for i in specific_keys:
    dummy= data.get(i)
    if i == 'success':
        dummy = '%2.1f'%(data.get('succeeded')/data.get('jobs')) +' %'
    if dummy == None:
        dummy = '0'
    if i[-3:]=='_ts':
        dummy = str(time.strftime("%a, %d %b %Y %H:%M:%S EST", time.localtime(data.get(i)/1000.)))+'\t + %03d ms'%(data.get(i)%1000)
    dummy= str(dummy)
    rss_description += '<TD>' + dummy+'</TD>'
rss_description += '</TR></TABLE>]]>'

In [24]:
main=rss_main(title="CLAS 12 Workflow")
main.add_item(title=str(data["workflow_name"]),description=rss_description, pubDate=time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime(data["update_ts"]/1000))  )
main.savexml('test.xml')

In [25]:
b=rss_modify(filein='test.xml')
# b.tag_channel[3][2].text ='new description'
b.add_item(title=str(data["workflow_name"]),description=rss_description, pubDate=time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime(data["update_ts"]/1000))  )
b.savexml('test.xml')

In [6]:
# # the json zxfile contains square brackets, unnecessary in some senses.
# # following function removes all the unnecessary square brackets, i.e. if len(list)=1 then list=list[0] iteratively.

# def list_to_data(data):
#     dummy=data
#     while type(dummy) is list and len(dummy)==1:
#         if len(dummy)==1:
#             dummy = dummy[0]         
#     return dummy

# data = list_to_data(data)
# # i=0
# # data = dummy
# # print 'There are %d unneccessary square bracket(s).'%i
# # print 'The default is to ignore these brackets. Turn the flag on for BLAHBLAH(TODO)'

In [133]:
# # initial setup
# xmlmain = xml_main()
# rss_description = '<![CDATA['
# # sort the keys and items in alphabetical order
# # Main Loop
# for keys, values in sorted(data.items()):
#     if type(values) is list:
#         for subkeysvalues in values:
#             xmlmain.add_item()
#             subkeys = str(subkeysvalues.keys()[0])
#             xmlmain.add_subitem('name',keys+' - '+subkeys)
#             rss_description = rss_description + '<p>'+keys+' - '+subkeys +'<ul>'
#             subvalues = list_to_data(subkeysvalues.values())
#             if type(subvalues) is list:
#                 string='<![CDATA['
#                 for text in subvalues:
#                     string=string+text+"\n"
#                     rss_description= rss_description+'<li>' + str(text)+'</li>'
#                 string = string + ']]>'
#                 xmlmain.add_subitem('value',str(string))
#             else:
#                 xmlmain.add_subitem('value',str(subvalues))
#                 rss_description= rss_description+'<li>' + str(subvalues)+'</li>'
#             rss_description = rss_description+ '</ul></p>'
#     else:
#         xmlmain.add_item()
#         milliseconds = None
#         if keys[-3:]=="_ts":
#             milliseconds = values%1000
#             xmlmain.add_subitem('milliseconds','%03d'%milliseconds)
#             values=str(time.strftime("%a, %d %b %Y %H:%M:%S EST", time.localtime(values/1000.)))+'\t + %03d ms'%milliseconds
#         values= str(values)
#         xmlmain.add_subitem('name',keys)
#         xmlmain.add_subitem('value',values)
#         rss_description= rss_description+ '<p>'+keys +'<ul><li>' + values+'</li></ul></p>'
# rss_description= rss_description + ']]>'
# xmlmain.savexml("trial.xml")

In [13]:
# # Create one web summary from main(xml) with style(xsl) file
# #https://www.w3schools.com/xml/xml_xslt.asp
# #https://www.geeksforgeeks.org/displaying-xml-using-xslt/
# #https://www.w3schools.com/xml/xml_namespaces.asp
# class xml_object:
#     def __init__(self,tag_main, tag_sub_title="subtag_title", tag_sub_text="subtag_text"):
#         self.type='xml_object'
#         self.tag_sub_title = tag_sub_title
#         self.tag_sub_text = tag_sub_text
#         self.tag_main = tag_main
#         self.tag_sub = ET.SubElement(self.tag_main,tag_sub_title)
#         self.tag_sub.text = tag_sub_text

# #create one dictionary into one xml file
# #xsl file is versatile
# class xml_main(xml_object):
#     def __init__(self, save=False, fileout='out.xml'):
#         self.type = "xml_main"
#         self.fileout=fileout        
#         self.tag_workflow = ET.Element("CLAS_12_WORKFLOW")
#         self.dom=mindom.parseString(ET.tostring(self.tag_workflow))
# #         self.dom=mindom.parseString(ET.tostring(self.tag_namespace))#,encoding="UTF-8"))
#         self.prettyprint=self.dom.toprettyxml()
#         if save is True:
#             self.savexml(self.fileout)
    
#     def savexml(self,fileout=None):
#         if fileout==None:
#             fileout=self.fileout
#         outfile = open(fileout,"w")
#         count=0
#         for i in range(0,len(self.prettyprint)):
#             if count >0 :
#                 break
#             if '\n'==self.prettyprint[i]:
#                 self.prettyprint= self.prettyprint[0:i]+'\n<?xml-stylesheet type=\"text/xsl\" href=\"clas_workflow_style.xsl\" ?>\n'+self.prettyprint[i+1:]
#                 count +=1
#         outfile.write(unescape(self.prettyprint))
#         outfile.close()
# # add xml object
#     def add_item(self):
#         self.tag_item = ET.SubElement(self.tag_workflow,"items")
        
#     def add_subitem(self, tag_sub_title="subtag_title", tag_sub_text="subtag_text",save=False, fileout=None):
#         self.item = xml_object( self.tag_item,tag_sub_title, tag_sub_text)    
#         self.dom=mindom.parseString(ET.tostring(self.tag_workflow,encoding="UTF-8"))
#         self.prettyprint=self.dom.toprettyxml()
#         if fileout is None:
#             fileout=self.fileout
#         if save is True:
#             savexml(fileout)