# XML exercise

Using data from [**mondial database**](https://drive.google.com/file/d/14lFT4nWHgwN36ij4XZh6OUuup-K9qLgR/view?usp=sharing) find the answers to following questions:

1. 10 countries with the lowest infant mortality rates
2. 10 cities with the largest population
3. name and country of a) longest river, b) largest lake and c) airport at highest elevation

In [122]:
import xml.etree.ElementTree as ET
import pandas as pd

In [123]:
tree = ET.parse('mondial.xml')

In [124]:
root = tree.getroot()
root

<Element 'mondial' at 0x00000162DE090D18>

In [125]:
first_child=root[0]
print(first_child.attrib)

{'car_code': 'AL', 'area': '28750', 'capital': 'cty-Albania-Tirane', 'memberships': 'org-BSEC org-CEI org-CD org-SELEC org-CE org-EAPC org-EBRD org-EITI org-FAO org-IPU org-IAEA org-IBRD org-ICC org-ICAO org-ICCt org-Interpol org-IDA org-IFRCS org-IFC org-IFAD org-ILO org-IMO org-IMF org-IOC org-IOM org-ISO org-OIF org-ITU org-ITUC org-IDB org-MIGA org-NATO org-OSCE org-OPCW org-OAS org-OIC org-PCA org-UN org-UNCTAD org-UNESCO org-UNIDO org-UPU org-WCO org-WFTU org-WHO org-WIPO org-WMO org-UNWTO org-WTO'}


In [126]:
list_countries=[]
for country in root:
    country_dict={}
    for country_property in country:
        country_dict[country_property.tag]=country_property.text
    list_countries.append(country_dict)

In [127]:
list_countries

[{'name': 'Albania',
  'population': '2821977',
  'population_growth': '0.3',
  'infant_mortality': '13.19',
  'gdp_total': '12800',
  'gdp_agri': '19.5',
  'gdp_ind': '12',
  'gdp_serv': '68.5',
  'inflation': '1.7',
  'unemployment': '16.9',
  'indep_date': '1912-11-28',
  'government': 'parliamentary democracy',
  'encompassed': None,
  'ethnicgroup': 'Greek',
  'religion': 'Christian Orthodox',
  'language': 'Greek',
  'border': None,
  'city': '\n         '},
 {'name': 'Greece',
  'localname': 'Ελληνική Δημοκρατία',
  'population': '10816286',
  'population_growth': '0.01',
  'infant_mortality': '4.78',
  'gdp_total': '243300',
  'gdp_agri': '3.5',
  'gdp_ind': '16',
  'gdp_serv': '80.5',
  'inflation': '-0.8',
  'unemployment': '27.9',
  'indep_date': '1829-01-01',
  'government': 'parliamentary republic',
  'encompassed': None,
  'ethnicgroup': 'Greek',
  'religion': 'Muslim',
  'language': 'Greek',
  'border': None,
  'province': '\n         '},
 {'name': 'North Macedonia',
  '

In [128]:
countries_dt=pd.DataFrame(list_countries)    

In [129]:
countries_dt.head()

Unnamed: 0,name,population,population_growth,infant_mortality,gdp_total,gdp_agri,gdp_ind,gdp_serv,inflation,unemployment,...,estuary,through,latitude,longitude,elevation,height,islands,mountains,gmtOffset,located_on
0,Albania,2821977,0.3,13.19,12800,19.5,12.0,68.5,1.7,16.9,...,,,,,,,,,,
1,Greece,10816286,0.01,4.78,243300,3.5,16.0,80.5,-0.8,27.9,...,,,,,,,,,,
2,North Macedonia,2059794,0.21,7.9,10650,10.2,27.5,62.3,2.8,28.6,...,,,,,,,,,,
3,Serbia,7120666,-0.46,6.16,43680,7.9,31.8,60.3,2.2,20.1,...,,,,,,,,,,
4,Montenegro,620029,-0.49,,4518,0.8,11.3,87.9,4.0,19.1,...,,,,,,,,,,


In [132]:
countries_dt['infant_mortality']=countries_dt['infant_mortality'].astype('float64')

In [133]:
countries_dt[['name','infant_mortality']].sort_values(['infant_mortality']).head(10)

Unnamed: 0,name,infant_mortality
38,Monaco,1.81
98,Japan,2.13
117,Bermuda,2.48
36,Norway,2.48
106,Singapore,2.53
37,Sweden,2.6
10,Czech Republic,2.63
8,Spain,2.7
78,Hong Kong,2.73
79,Macao,3.13


In [134]:
countries_dt['population']=countries_dt['population'].astype('float64')

In [135]:
countries_dt[['name','population']].sort_values(['population'], ascending=False).head(10)

Unnamed: 0,name,population
55,China,1360720000.0
67,India,1210855000.0
120,United States,318857100.0
88,Indonesia,252124500.0
57,Pakistan,207777000.0
176,Brazil,202768600.0
202,Nigeria,193392500.0
65,Bangladesh,149772400.0
23,Russia,143666900.0
98,Japan,127298000.0


In [136]:
countries_dt.columns

Index(['name', 'population', 'population_growth', 'infant_mortality',
       'gdp_total', 'gdp_agri', 'gdp_ind', 'gdp_serv', 'inflation',
       'unemployment', 'indep_date', 'government', 'encompassed',
       'ethnicgroup', 'religion', 'language', 'border', 'city', 'localname',
       'province', 'dependent', 'area', 'abbrev', 'established', 'members',
       'located', 'depth', 'to', 'length', 'source', 'estuary', 'through',
       'latitude', 'longitude', 'elevation', 'height', 'islands', 'mountains',
       'gmtOffset', 'located_on'],
      dtype='object')

In [168]:
list_cities=[]
for country in root:
    cities_dict={}
    for city in country.findall('city'):
        for city_prop in city:
            cities_dict[city_prop.tag]=city_prop.text
    if cities_dict:
        list_cities.append(cities_dict)

In [169]:
list_cities

[{'name': 'Victoria',
  'latitude': '-4.62',
  'longitude': '55.45',
  'elevation': '10',
  'population': '24970',
  'located_at': None,
  'localname': 'سوسة ',
  'located_on': None},
 {'name': 'Victoria',
  'latitude': '-4.62',
  'longitude': '55.45',
  'elevation': '10',
  'population': '24970',
  'located_at': None,
  'localname': 'سوسة ',
  'located_on': None},
 {'name': 'Victoria',
  'latitude': '-4.62',
  'longitude': '55.45',
  'elevation': '10',
  'population': '24970',
  'located_at': None,
  'localname': 'سوسة ',
  'located_on': None},
 {'name': 'Victoria',
  'latitude': '-4.62',
  'longitude': '55.45',
  'elevation': '10',
  'population': '24970',
  'located_at': None,
  'localname': 'سوسة ',
  'located_on': None},
 {'name': 'Victoria',
  'latitude': '-4.62',
  'longitude': '55.45',
  'elevation': '10',
  'population': '24970',
  'located_at': None,
  'localname': 'سوسة ',
  'located_on': None},
 {'name': 'Victoria',
  'latitude': '-4.62',
  'longitude': '55.45',
  'elevatio

In [170]:
cities_dt=pd.DataFrame(list_cities)

In [171]:
cities_dt['population']=cities_dt['population'].astype('float64')

In [172]:
cities_dt[['name','population']].sort_values(['population'], ascending=False).head(10)

Unnamed: 0,name,population
0,Victoria,24970.0
2260,Victoria,24970.0
2262,Victoria,24970.0
2263,Victoria,24970.0
2264,Victoria,24970.0
2265,Victoria,24970.0
2266,Victoria,24970.0
2267,Victoria,24970.0
2268,Victoria,24970.0
2269,Victoria,24970.0


In [167]:
cities_dt[['name','population']][cities_dt['name']=="Busan"]

Unnamed: 0,name,population
