In [1]:
import requests
from xml.etree import ElementTree as ET
import json
import pandas as pd
import ast
import numpy as np

In [2]:
r = requests.get("https://scsanctions.un.org/resources/xml/en/consolidated.xml")

In [3]:
# Get all the tags in the XML
root = ET.fromstring(r.text)
tags = [children.tag for children in root.find("INDIVIDUALS/INDIVIDUAL")]
mapValues: dict = {}
for item in tags:
    mapValues[item] = None

In [4]:
individuals: list = root.findall("INDIVIDUALS/INDIVIDUAL")

def parse_xml(element, item):
    for child in element:
        if len(child) == 0:
            if child.tag in item:
                if not isinstance(item[child.tag], list):
                    item[child.tag] = [item[child.tag]]
                item[child.tag].append(child.text)
            else:
                item[child.tag] = child.text
        else:
            if child.tag not in item:
                item[child.tag] = []
            sub_item = {}
            parse_xml(child, sub_item)
            item[child.tag].append(sub_item)

parsed_data = {}
parse_xml(individuals, parsed_data)

In [10]:
parsed_data

{'INDIVIDUAL': [{'DATAID': '110447',
   'VERSIONNUM': '1',
   'FIRST_NAME': 'MUHAMMAD',
   'SECOND_NAME': 'TAHER',
   'THIRD_NAME': 'ANWARI',
   'UN_LIST_TYPE': 'Taliban',
   'REFERENCE_NUMBER': 'TAi.005',
   'LISTED_ON': '2001-02-23',
   'NAME_ORIGINAL_SCRIPT': 'Ù\x85Ø\xadÙ\x85Ø¯ Ø·Ø§Ù\x87Ø± Ø£Ù\x86Ù\x88Ø±Ù\x8a',
   'COMMENTS1': 'Belongs to Andar tribe. Review pursuant to Security Council resolution\n1822 (2008) was concluded on 23 Jul. 2010. INTERPOL-UN Security Council Special Notice web link:https://www.interpol.int/en/How-we-work/Notices/View-UN-Notices-Individuals',
   'TITLE': [{'VALUE': 'Mullah'}],
   'DESIGNATION': [{'VALUE': ['a) Director of Administrative Affairs under the Taliban\nregime',
      'Minister of Finance under the Taliban regime']}],
   'NATIONALITY': [{'VALUE': 'Afghanistan'}],
   'LIST_TYPE': [{'VALUE': 'UN List'}],
   'LAST_DAY_UPDATED': [{'VALUE': ['2003-09-03',
      '2007-07-09',
      '2007-09-21',
      '2011-11-29']}],
   'INDIVIDUAL_ALIAS': [{'QUALITY'

In [5]:
df = pd.DataFrame(parsed_data['INDIVIDUAL'])

In [6]:
def first_name(df):
    name = df['FIRST_NAME'].fillna('') + ' ' + df['SECOND_NAME'].fillna('') + ' ' + df['THIRD_NAME'].fillna('') + ' ' + df['FOURTH_NAME'].fillna('')
    return name.apply(lambda x: x.strip())

df['FULL_NAME'] = first_name(df)

In [7]:
def parse_dates(row):
    year  = []
    death = []
    if len(row) == 0:
        pass
    for item in row:
        if 'YEAR' in item:
            year.append(item['YEAR'])
        if 'FROM_YEAR' in item:
            year.append(item['FROM_YEAR'])
        if 'TO_YEAR' in item:
            death.append(item['TO_YEAR'])
    return year, death

            
            

df['INDIVIDUAL_DATE_OF_BIRTH'] = [ [] if x is np.NaN else x for x in df['INDIVIDUAL_DATE_OF_BIRTH'] ]
df['birth_date'], df['death_date'] = zip(*df['INDIVIDUAL_DATE_OF_BIRTH'].copy().apply(lambda x: parse_dates(x)))
#df['birth_date'] = ['' if x == [] else x[0] for x in df['birth_date'].copy()]
#df['death_date'] = ['' if x == [] else x[0] for x in df['death_date'].copy()]
#dict_keys(['TYPE_OF_DATE', 'FROM_YEAR', 'TO_YEAR'])
#df['INDIVIDUAL_DATE_OF_BIRTH'].apply(lambda x: [print(item.keys()) for item in x])

In [8]:
def parse_place_birth(row):
    city = []
    state = []
    country = []
    for item in row:
        if 'CITY' in item:
            city.append(item['CITY'])
        if 'STATE_PROVINCE' in item:
            state.append(item['STATE_PROVINCE'])
        if 'COUNTRY' in item:
            country.append(item['COUNTRY'])

    return city, state, country

df['city_birth'], df['state_birth'], df['country_birth'] = zip(*df['INDIVIDUAL_PLACE_OF_BIRTH'].apply(lambda x: parse_place_birth(x)))


url = "https://api.pocketbase.io/v1/databases/<your-database-id>/collections/<your-collection>"
headers = {"Content-Type": "application/json", "Authorization": "<your-api-key>"}
data = {"name": "Sameer", "age": 23}
response = requests.post(url, headers=headers, data=json.dumps(data))
if response.status_code == 201:
    print("Data stored successfully!")
else:
    print("Failed to store data")