# importing data in xml format

In [1]:
import gzip
import pandas as pd
import xml.etree.ElementTree as ET

input = gzip.open('all ptm types_AAC.xml.gz', 'r')
#tree = ET.parse('P05141.xml')
tree = ET.parse(input)
root = tree.getroot()

print (root.tag)
print (root.attrib)

{http://uniprot.org/uniprot}uniprot
{'{http://www.w3.org/2001/XMLSchema-instance}schemaLocation': 'http://uniprot.org/uniprot http://www.uniprot.org/docs/uniprot.xsd'}


In [2]:
# Iterate through the entries in the XML file and print the accession number, 
#protein name, gene name, organism name, ptm and sequence.

class UniprotEntryParser(object):
       # Class to parse a single entry in the Uniprot XML file. 
       # The entry is passed as an ElementTree object.
       # The class has methods to extract the accession number, protein name, gene name, organism name, ptm and sequence.

       def __init__(self, entry):
           self.entry = entry
           self.ns = {'uniprot': 'http://uniprot.org/uniprot'}
           self.name = None
           self.accessions = None
           self.uniprotId = None
           self.gene = None
           self.organism = None
           self.sequence = None
           self.references= None
           self.ptm = []


       def get_accession(self):
              # Get the accession number
              try:
                     self.accessions =  [accession.text for accession in self.entry.findall('{http://uniprot.org/uniprot}accession')],
              except:
                     self.accessions =  None
              return self.accessions

       def get_name(self):

              # Get the protein name
              try:    
                     #self.name =  self.entry.find('{http://uniprot.org/uniprot}name').text
                     self.name =  self.entry.find('uniprot:protein/uniprot:recommendedName/uniprot:fullName', self.ns).text
              except:
                     self.name =  None
              return self.name


       def get_gene(self):
              try:
                     #self.gene = self.entry.find('{http://uniprot.org/uniprot}gene/name[@type="primary"]').text
                     self.gene = self.entry.find('uniprot:gene/uniprot:name[@type="primary"]', self.ns).text
              except:
                     self.gene = None
              return self.gene

       def get_organism(self):
              try:
                     #self.organism = self.entry.find('{http://uniprot.org/uniprot}organism/name[@type="scientific"]').text
                     self.organism = self.entry.find('uniprot:organism/uniprot:name[@type="scientific"]', self.ns).text
              except: 
                     self.organism = None
              return self.organism
       
       def get_sequence(self):
              try:
                     self.sequence = self.entry.find('uniprot:sequence', self.ns).text
              except:
                     self.sequence = None
              return self.sequence
       
       def get_uniprotId(self):
              try:
                     self.uniprotId = self.entry.find('uniprot:accession', self.ns).text
              except:
                     self.uniprotId = None
              return self.uniprotId


       def get_references(self):
              self.references = []
              for ref in self.entry.findall('uniprot:reference', self.ns):
                        reference = {}
                        key = ref.get('key')
                        reference['key'] = key
                        citation_type = ref.find('uniprot:citation', self.ns).get('type')
                        reference["citation_type"] = citation_type

                        #print(i, key, citation_type, ref)
                        if citation_type == 'journal article':
                                try:
                                        reference['journal'] = ref.find('uniprot:citation', self.ns).get('name')
                                except:
                                        reference['journal'] = None
                                try:
                                        reference['date'] = ref.find('uniprot:citation', self.ns).get('date')
                                except:
                                        reference['date'] = None
                                try:
                                        reference['title'] = ref.find('uniprot:citation/uniprot:title', self.ns).text
                                except:
                                        reference['title'] = None
                                try:
                                        reference['authors'] = [author.get('name') for author in ref.findall('uniprot:citation/uniprot:authorList/uniprot:person', self.ns)]
                                except:
                                        reference['authors'] = None
                                try:
                                        reference['pubmedId'] = ref.find('uniprot:citation/uniprot:dbReference[@type="PubMed"]', self.ns).get('id')
                                except:
                                        reference['pubmedId'] = None
                                try:
                                        reference['doi'] = ref.find('uniprot:citation/uniprot:dbReference[@type="DOI"]', self.ns).get('id')
                                except:
                                        reference['doi'] = None
                        if citation_type == 'submission':  
                                try: 
                                        reference['db'] = ref.find('uniprot:citation', self.ns).get('db')
                                except:
                                        reference['db'] = None
                                try:
                                        reference['date'] = ref.find('uniprot:citation', self.ns).get('name')
                                except:
                                        reference['date'] = None
                                try:
                                        reference['scope'] = [scope.text for scope in ref.findall('uniprot:scope', self.ns)]
                                except:
                                        reference['scope'] = None
                                try:
                                        reference['source'] = [source.text for source in ref.findall('uniprot:source', self.ns)]
                                except:
                                        reference['source'] = None
                        if citation_type == 'patent':
                                   try:
                                          reference['number'] = ref.find('uniprot:citation', self.ns).get('number')
                                   except:
                                          reference['number'] = None
                                   try:
                                          reference['country'] = ref.find('uniprot:citation', self.ns).get('country')
                                   except:
                                          reference['country'] = None
                                   try:
                                          reference['title'] = ref.find('uniprot:citation/uniprot:title', self.ns).text
                                   except:
                                          reference['title'] = None
                        if citation_type == 'thesis':
                                   try:
                                          reference['institution'] = ref.find('uniprot:citation', self.ns).get('institution')
                                   except:
                                          reference['institution'] = None
                                   try:
                                          reference['date'] = ref.find('uniprot:citation', self.ns).get('date')
                                   except:
                                          reference['date'] = None
                                   try:
                                          reference['title'] = ref.find('uniprot:citation/uniprot:title', self.ns).text
                                   except:
                                          reference['title'] = None
                                   try:
                                          reference['authors'] = [author.get('name') for author in ref.findall('uniprot:citation/uniprot:authorList/uniprot:person', self.ns)]
                                   except:
                                          reference['authors'] = None
                        if citation_type == 'unpublished observations':
                                   try:
                                          reference['date'] = ref.find('uniprot:citation', self.ns).get('date')
                                   except:
                                          reference['date'] = None
                                   try:
                                          reference['title'] = ref.find('uniprot:citation/uniprot:title', self.ns).text
                                   except:
                                          reference['title'] = None
                                   try:
                                          reference['authors'] = [author.get('name') for author in ref.findall('uniprot:citation/uniprot:authorList/uniprot:person', self.ns)]
                                   except:
                                          reference['authors'] = None
                        if citation_type == 'online journal article':
                                   try:
                                          reference['journal'] = ref.find('uniprot:citation', self.ns).get('name')
                                   except:
                                          reference['journal'] = None
                                   try:
                                          reference['date'] = ref.find('uniprot:citation', self.ns).get('date')
                                   except:
                                          reference['date'] = None
                                   try:
                                          reference['title'] = ref.find('uniprot:citation/uniprot:title', self.ns).text
                                   except:
                                          reference['title'] = None
                                   try:
                                          reference['authors'] = [author.get('name') for author in ref.findall('uniprot:citation/uniprot:authorList/uniprot:person', self.ns)]
                                   except:
                                          reference['authors'] = None
                                   try:
                                          reference['pubmedId'] = ref.find('uniprot:citation/uniprot:dbReference[@type="PubMed"]', self.ns).get('id')
                                   except:
                                          reference['pubmedId'] = None
                                   try:
                                          reference['doi'] = ref.find('uniprot:citation/uniprot:dbReference[@type="DOI"]', self.ns).get('id')
                                   except:
                                          reference['doi'] = None

                        if citation_type == 'book':
                                   try:
                                          reference['title'] = ref.find('uniprot:citation/uniprot:title', self.ns).text
                                   except:
                                          reference['title'] = None
                                   try:
                                          reference['authors'] = [author.get('name') for author in ref.findall('uniprot:citation/uniprot:authorList/uniprot:person', self.ns)]
                                   except:
                                          reference['authors'] = None
                                   try:
                                          reference['publisher'] = ref.find('uniprot:citation/uniprot:dbReference[@type="Publisher"]', self.ns).get('id')
                                   except:
                                          reference['publisher'] = None
                                   try:
                                          reference['isbn'] = ref.find('uniprot:citation/uniprot:dbReference[@type="ISBN"]', self.ns).get('id')
                                   except:
                                          reference['isbn'] = None
                                          
                        self.references.append(reference)
                     

              return self.references

       
       
       
       
       def get_ptm(self):
              try:
                     #self.ptm = self.entry.find('uniprot:feature[@type="modified residue"]', self.ns).text
                     for feature in self.entry.findall('uniprot:feature[@type="modified residue"]', self.ns):
                            # Get the position and description of the modification
                            position = feature.find('uniprot:location/uniprot:position', self.ns).get('position')
                            description = feature.get('description')
                            evidence = feature.get('evidence')   
                            self.ptm.append({'Position': position, 'Description': description, 'Evidence': evidence})
              except:
                     self.ptm = None
              return self.ptm
       

      

In [3]:
for i, entry in enumerate(root.findall('{http://uniprot.org/uniprot}entry')):
    UEP = UniprotEntryParser(entry)
    print (i, UEP.get_references())

0 [{'key': '1', 'citation_type': 'journal article', 'journal': 'J. Biol. Chem.', 'date': '1990', 'title': 'The human fibroblast adenine nucleotide translocator gene. Molecular cloning and sequence.', 'authors': ['Ku D.-H.', 'Kagan J.', 'Chen S.-T.', 'Chang C.-D.', 'Baserga R.', 'Wurzel J.'], 'pubmedId': '2168878', 'doi': '10.1016/s0021-9258(17)46187-4'}, {'key': '2', 'citation_type': 'journal article', 'journal': 'J. Biol. Chem.', 'date': '1987', 'title': 'Molecular cloning of a cDNA for a human ADP/ATP carrier which is growth-regulated.', 'authors': ['Battini R.', 'Ferrari S.', 'Kaczmarek L.', 'Calabretta B.', 'Chen S.T.', 'Baserga R.'], 'pubmedId': '3031073', 'doi': '10.1016/s0021-9258(18)61355-9'}, {'key': '3', 'citation_type': 'journal article', 'journal': 'Nucleic Acids Res.', 'date': '1996', 'title': 'Ordered shotgun sequencing of a 135 kb Xq25 YAC containing ANT2 and four possible genes, including three confirmed by EST matches.', 'authors': ['Chen C.N.', 'Su Y.', 'Baybayan P.',

In [4]:
def get_entry_dict(entry):
    # Get the entry dictionary
    UEP = UniprotEntryParser(entry)
    

    entry_dict = {'accession': UEP.get_accession(),\
                   'name': UEP.get_name(),\
                    'gene': UEP.get_gene(),\
                    'organism': UEP.get_organism(),\
                    'sequence': UEP.get_sequence(),\
                    'uniprotId': UEP.get_uniprotId(),\
                    'ptm': UEP.get_ptm(),\
                    'references': UEP.get_references()}
    
    return entry_dict

In [5]:
entries = [entry for entry in root.findall('{http://uniprot.org/uniprot}entry')]
entry_dicts = [get_entry_dict(entry) for entry in entries]

In [6]:
entry_dicts[0]

{'accession': (['P05141', 'B2RCV1', 'O43350'],),
 'name': 'ADP/ATP translocase 2',
 'gene': 'SLC25A5',
 'organism': 'Homo sapiens',
 'sequence': 'MTDAAVSFAKDFLAGGVAAAISKTAVAPIERVKLLLQVQHASKQITADKQYKGIIDCVVRIPKEQGVLSFWRGNLANVIRYFPTQALNFAFKDKYKQIFLGGVDKRTQFWLYFAGNLASGGAAGATSLCFVYPLDFARTRLAADVGKAGAEREFRGLGDCLVKIYKSDGIKGLYQGFNVSVQGIIIYRAAYFGIYDTAKGMLPDPKNTHIVISWMIAQTVTAVAGLTSYPFDTVRRRMMMQSGRKGTDIMYTGTLDCWRKIARDEGGKAFFKGAWSNVLRGMGGAFVLVLYDEIKKYT',
 'uniprotId': 'P05141',
 'ptm': [{'Position': '1',
   'Description': 'N-acetylmethionine',
   'Evidence': '25'},
  {'Position': '2',
   'Description': 'N-acetylthreonine; in ADP/ATP translocase 2, N-terminally processed',
   'Evidence': '18 19 25 27 29'},
  {'Position': '7', 'Description': 'Phosphoserine', 'Evidence': '5'},
  {'Position': '23', 'Description': 'N6-malonyllysine', 'Evidence': '12'},
  {'Position': '43', 'Description': 'N6-succinyllysine', 'Evidence': '4'},
  {'Position': '52',
   'Description': 'N6,N6,N6-trimethyllysine; alternate',

In [7]:
for item in entry_dicts:
    print(item['references'])

[{'key': '1', 'citation_type': 'journal article', 'journal': 'J. Biol. Chem.', 'date': '1990', 'title': 'The human fibroblast adenine nucleotide translocator gene. Molecular cloning and sequence.', 'authors': ['Ku D.-H.', 'Kagan J.', 'Chen S.-T.', 'Chang C.-D.', 'Baserga R.', 'Wurzel J.'], 'pubmedId': '2168878', 'doi': '10.1016/s0021-9258(17)46187-4'}, {'key': '2', 'citation_type': 'journal article', 'journal': 'J. Biol. Chem.', 'date': '1987', 'title': 'Molecular cloning of a cDNA for a human ADP/ATP carrier which is growth-regulated.', 'authors': ['Battini R.', 'Ferrari S.', 'Kaczmarek L.', 'Calabretta B.', 'Chen S.T.', 'Baserga R.'], 'pubmedId': '3031073', 'doi': '10.1016/s0021-9258(18)61355-9'}, {'key': '3', 'citation_type': 'journal article', 'journal': 'Nucleic Acids Res.', 'date': '1996', 'title': 'Ordered shotgun sequencing of a 135 kb Xq25 YAC containing ANT2 and four possible genes, including three confirmed by EST matches.', 'authors': ['Chen C.N.', 'Su Y.', 'Baybayan P.', '

### Create Mapper

In [8]:
key2pmid = {}
for entry in entry_dicts:
    for reference in entry['references']:
        if reference['citation_type'] == 'journal article':
            #key2pmid[reference['key']] = reference['pubmedId']
            key2pmid[reference['key']] = reference

        elif reference['citation_type'] == 'submission':
            #key2pmid[reference['key']] = reference['db']
            key2pmid[reference['key']] = reference

In [9]:
key2pmid

{'1': {'key': '1',
  'citation_type': 'journal article',
  'journal': 'FEBS Lett.',
  'date': '2005',
  'title': 'A fourth ADP/ATP carrier isoform in man: identification, bacterial expression, functional characterization and tissue distribution.',
  'authors': ['Dolce V.', 'Scarcia P.', 'Iacopetta D.', 'Palmieri F.'],
  'pubmedId': '15670820',
  'doi': '10.1016/j.febslet.2004.12.034'},
 '2': {'key': '2',
  'citation_type': 'journal article',
  'journal': 'Dev. Biol.',
  'date': '2007',
  'title': 'Compartmentalization of a unique ADP/ATP carrier protein SFEC (sperm flagellar energy carrier, AAC4) with glycolytic enzymes in the fibrous sheath of the human sperm flagellar principal piece.',
  'authors': ['Kim Y.-H.',
   'Haidl G.',
   'Schaefer M.',
   'Egner U.',
   'Mandal A.',
   'Herr J.C.'],
  'pubmedId': '17137571',
  'doi': '10.1016/j.ydbio.2006.10.004'},
 '3': {'key': '3',
  'citation_type': 'journal article',
  'journal': 'Genome Res.',
  'date': '2001',
  'title': 'Towards a ca

In [10]:
def splitAndMap(longstring):
    mapped = []
    nlist = longstring.split(' ')
    for item in nlist:
        mapped.append(key2pmid[item])
    return mapped
    

In [11]:
DATA = []
for entry in entry_dicts:
    idata = {}
    idata["accession"] = entry['accession']
    idata["entry"] = entry['name']
    idata["gene"] = entry['gene']
    idata["organism"] = entry['organism']
    idata["sequence"] = entry['sequence']
    idata["uniprotId"] = entry['uniprotId']
    for item in entry['ptm']:
        data = idata.copy()
        data["Position"] = item['Position']
        data["Description"] = item['Description']
        try:
            data["Evidence"] = splitAndMap(item['Evidence'])
        except:
            data["Evidence"] = None
        DATA.append(data)

In [12]:
DATA[0]

{'accession': (['P05141', 'B2RCV1', 'O43350'],),
 'entry': 'ADP/ATP translocase 2',
 'gene': 'SLC25A5',
 'organism': 'Homo sapiens',
 'sequence': 'MTDAAVSFAKDFLAGGVAAAISKTAVAPIERVKLLLQVQHASKQITADKQYKGIIDCVVRIPKEQGVLSFWRGNLANVIRYFPTQALNFAFKDKYKQIFLGGVDKRTQFWLYFAGNLASGGAAGATSLCFVYPLDFARTRLAADVGKAGAEREFRGLGDCLVKIYKSDGIKGLYQGFNVSVQGIIIYRAAYFGIYDTAKGMLPDPKNTHIVISWMIAQTVTAVAGLTSYPFDTVRRRMMMQSGRKGTDIMYTGTLDCWRKIARDEGGKAFFKGAWSNVLRGMGGAFVLVLYDEIKKYT',
 'uniprotId': 'P05141',
 'Position': '1',
 'Description': 'N-acetylmethionine',
 'Evidence': [{'key': '25',
   'citation_type': 'journal article',
   'journal': 'JIMD Rep.',
   'date': '2015',
   'title': 'Two novel mutations in the SLC25A4 gene in a patient with mitochondrial myopathy.',
   'authors': ['Koerver-Keularts I.M.',
    'de Visser M.',
    'Bakker H.D.',
    'Wanders R.J.',
    'Vansenne F.',
    'Scholte H.R.',
    'Dorland L.',
    'Nicolaes G.A.',
    'Spaapen L.M.',
    'Smeets H.J.',
    'Hendrickx A.T.',
    'van den Bosch B.J.'

In [13]:
df = pd.DataFrame(DATA)
df.head()

Unnamed: 0,accession,entry,gene,organism,sequence,uniprotId,Position,Description,Evidence
0,"([P05141, B2RCV1, O43350],)",ADP/ATP translocase 2,SLC25A5,Homo sapiens,MTDAAVSFAKDFLAGGVAAAISKTAVAPIERVKLLLQVQHASKQIT...,P05141,1,N-acetylmethionine,"[{'key': '25', 'citation_type': 'journal artic..."
1,"([P05141, B2RCV1, O43350],)",ADP/ATP translocase 2,SLC25A5,Homo sapiens,MTDAAVSFAKDFLAGGVAAAISKTAVAPIERVKLLLQVQHASKQIT...,P05141,2,"N-acetylthreonine; in ADP/ATP translocase 2, N...",
2,"([P05141, B2RCV1, O43350],)",ADP/ATP translocase 2,SLC25A5,Homo sapiens,MTDAAVSFAKDFLAGGVAAAISKTAVAPIERVKLLLQVQHASKQIT...,P05141,7,Phosphoserine,"[{'key': '5', 'citation_type': 'submission', '..."
3,"([P05141, B2RCV1, O43350],)",ADP/ATP translocase 2,SLC25A5,Homo sapiens,MTDAAVSFAKDFLAGGVAAAISKTAVAPIERVKLLLQVQHASKQIT...,P05141,23,N6-malonyllysine,"[{'key': '12', 'citation_type': 'journal artic..."
4,"([P05141, B2RCV1, O43350],)",ADP/ATP translocase 2,SLC25A5,Homo sapiens,MTDAAVSFAKDFLAGGVAAAISKTAVAPIERVKLLLQVQHASKQIT...,P05141,43,N6-succinyllysine,"[{'key': '4', 'citation_type': 'journal articl..."


In [14]:
df.columns

Index(['accession', 'entry', 'gene', 'organism', 'sequence', 'uniprotId',
       'Position', 'Description', 'Evidence'],
      dtype='object')

In [15]:
sdf = df[['uniprotId', 'entry', 'gene', 'organism', 'Position', 'Description','Evidence', 'sequence']]

In [18]:
#sdf.to_csv('AAC isoforms ptmtypes data for load.csv', index=False)

### ROUGH Code

In [None]:
df['ptm-position'] = df['ptm'].apply(lambda x: [item['Position'] for item in x] if x is not None else None)
df['ptm-description'] = df['ptm'].apply(lambda x: [item['Description'] for item in x] if x is not None else None)

In [None]:
from bs4 import BeautifulSoup

# Open the XML file
with open('P12235.xml', 'r') as f:
    data = f.read()

# Parse the XML file
soup = BeautifulSoup(data, 'xml')

# Find all modified residues
modified_residues = soup.find_all('feature', {'type': 'modified residue'})

# Get a dictionary of all evidences with their associated references
evidences = {evidence.get('key'): evidence for evidence in soup.find_all('evidence')}

# Initialize an empty list to store the data
data = []

# Iterate over all modified residues
for residue in modified_residues:
    position = residue.location.begin.get('position')
    description = residue.get('description')

    # Get the evidence IDs for this residue
    evidence_ids = residue.get('evidence').split()

    # Get the corresponding references for each evidence
    references = []
    for evidence_id in evidence_ids:
        evidence = evidences.get(evidence_id)
        if evidence:
            reference_key = evidence.get('key')
            references.append(reference_key)
    
    data.append({
        'Position': position, 
        'Description': description, 
        'Evidence': references
    })

# Now, 'data' is a list of dictionaries, each containing the position, description, and evidence for a modified residue


In [None]:
import xml.etree.ElementTree as ET
input = gzip.open('all ptm types_AAC.xml.gz', 'r')
tree = ET.parse(input)
root = tree.getroot()

ns = {'uniprot': 'http://uniprot.org/uniprot'}  # Namespace

# Iterate through each entry
for entry in root.findall('uniprot:entry', ns):
    ptm_evidence_map = {}  # Mapping of PTMs to their references

    # Get all references in the entry
    references = {ref.get('key'): ref for ref in entry.findall('uniprot:reference', ns)}

    # Get all evidences in the entry
    evidences = {evidence.get('key'): evidence for evidence in entry.findall('uniprot:evidence', ns)} 

    # Get all PTMs in the entry
    ptms = [feature for feature in entry.findall('uniprot:feature', ns) if feature.get('type') == 'modified residue']

    # For each PTM, find its evidence and then its reference
    for ptm in ptms:
        evidence_key = ptm.get('evidence')
        if evidence_key:
            evidence = evidences.get(evidence_key)
            if evidence:
                reference_key = evidence.get('source')
                if reference_key:
                    reference = references.get(reference_key)
                    if reference:
                        # Add to the mapping
                        ptm_description = ptm.get('description')
                        ptm_evidence_map[ptm_description] = reference
                        

    # Print the mapping
    for ptm, reference in ptm_evidence_map.items():
        print(f'PTM: {ptm}')
        print(f'Reference: {ET.tostring(reference, encoding="unicode")}')


In [None]:
def get_references(self):
    self.references = {}
    for ref in self.entry.findall(self.ns+'reference'):
        key = ref.get('key')
        pubmed_id = ref.find(".//{%s}dbReference[@type='PubMed']" % self.ns)
        if pubmed_id is not None:
            self.references[key] = pubmed_id.get('id')

def get_evidences(self):
    self.ptms = []
    for ptm in self.entry.findall(".//{%s}feature[@type='modified residue']" % self.ns):
        description = ptm.find("{%s}description" % self.ns).text
        position = ptm.find(".//{%s}position" % self.ns).get('position')
        evidence = ptm.get('evidence').split(' ')
        pubmed_ids = [self.references.get(e) for e in evidence]
        self.ptms.append({'description': description, 'position': position, 'pubmed_ids': pubmed_ids})

def parse_entry(self, entry):
    self.entry = entry
    self.get_references()
    self.get_evidences()
    return self.ptms


In [None]:

references = []
ns = {'uniprot': 'http://uniprot.org/uniprot'}
for i, entry in enumerate(root.findall('{http://uniprot.org/uniprot}entry')):
        print(i,"-----------------------------------")
        for ref in entry.findall('uniprot:reference', ns):
                        reference = {}
                        key = ref.get('key')
                        reference['key'] = key
                        citation_type = ref.find('uniprot:citation', ns).get('type')
                        reference["citation_type"] = citation_type

                        #print(i, key, citation_type, ref)
                        if citation_type == 'journal article':
                                try:
                                        reference['journal'] = ref.find('uniprot:citation', ns).get('name')
                                except:
                                        reference['journal'] = None
                                try:
                                        reference['date'] = ref.find('uniprot:citation', ns).get('date')
                                except:
                                        reference['date'] = None
                                try:
                                        reference['title'] = ref.find('uniprot:citation/uniprot:title', ns).text
                                except:
                                        reference['title'] = None
                                try:
                                        reference['authors'] = [author.get('name') for author in ref.findall('uniprot:citation/uniprot:authorList/uniprot:person', ns)]
                                except:
                                        reference['authors'] = None
                                try:
                                        reference['pubmedId'] = ref.find('uniprot:citation/uniprot:dbReference[@type="PubMed"]', ns).get('id')
                                except:
                                        reference['pubmedId'] = None
                                try:
                                        reference['doi'] = ref.find('uniprot:citation/uniprot:dbReference[@type="DOI"]', ns).get('id')
                                except:
                                        reference['doi'] = None
                        if citation_type == 'submission':  
                                try: 
                                        reference['db'] = ref.find('uniprot:citation', ns).get('db')
                                except:
                                        reference['db'] = None
                                try:
                                        reference['date'] = ref.find('uniprot:citation', ns).get('date')
                                except:
                                        reference['date'] = None
                                try:
                                        reference['scope'] = [scope.text for scope in ref.findall('uniprot:scope', ns)]
                                except:
                                        reference['scope'] = None
                                try:
                                        reference['source'] = [source.text for source in ref.findall('uniprot:source', ns)]
                                except:
                                        reference['source'] = None

                        references.append(reference)
                        print(reference)
                        print("---------------------------------------------------")
       
                     

#print(references )           