# Notebook to build database on countries from a free API
This Notebook shows how to use a free API (no authorization or API key needed) to download some basic information about various countries around the world and put them in a DataFrame.

### Import libraries

In [77]:
import urllib.request, urllib.parse
from urllib.error import HTTPError,URLError
from urllib.parse import quote  
import pandas as pd
import json

### Define the base URL

In [78]:
serviceurl = 'https://restcountries.eu/rest/v2/name/'

### Define function to pull the country data from the API

In [79]:
def get_country_data(country):
   # country_name=country       #needs to be more sensitive to specia chars and blanks
    country_name=country ##.replace(" ", "%20")
   #country_name=urllib.parse.unquote(country, encoding='utf-8', errors='replace')
    url = serviceurl + quote(country_name)
    try: 
        uh = urllib.request.urlopen(url)
    except HTTPError as e:
        print("Sorry! Could not retrive anything")
        return None
    except URLError as e:
        print('Failed to reach a server.')
        print('Reason: ', e.reason)
        return None
    else:
        data = uh.read().decode()
        data=data.replace('[]',"[0,0]")
        print("Retrieved data on {}. Total {} characters read.".format(country_name,len(data)))
        return data

### Define function to pull the links (for debuging only)

In [80]:
def get_country_data_links(country):
   # country_name=country       #needs to be more sensitive to specia chars and blanks
    country_name=country ##.replace(" ", "%20")
   #country_name=urllib.parse.unquote(country, encoding='utf-8', errors='replace')
    url = (serviceurl + quote(country_name))
    print(url)

### Define a function which can take a list of countries and return a DataFrame containing key info
* Capital
* Region
* Sub-region
* Population
* lattitude/longitude
* Area
* Gini index
* Timezones
* Currencies
* Languages

In [81]:
def build_country_database(list_country):
    """
    """
    # Define an empty dictionary with keys
    country_dict={'Country':[],'Capital':[],'Region':[],'Sub-region':[],'Population':[],
                  'Lattitude':[],'Longitude':[],'Area':[],'Gini':[],'Timezones':[],
                  'Currencies':[],'Languages':[]}
    
    for c in list_country:
        data = get_country_data(c)
        if data!=None:
            x = json.loads(data)
            y=x[0]
            country_dict['Country'].append(y['name'])
            country_dict['Capital'].append(y['capital'])
            country_dict['Region'].append(y['region'])
            country_dict['Sub-region'].append(y['subregion'])
            country_dict['Population'].append(y['population'])
            country_dict['Lattitude'].append(y['latlng'][0])
            country_dict['Longitude'].append(y['latlng'][1])
            country_dict['Area'].append(y['area'])
            country_dict['Gini'].append(y['gini'])
            # Note the code to handle possibility of multiple timezones as a list
            if len(y['timezones'])>1:
                country_dict['Timezones'].append(','.join(y['timezones']))
            else:
                country_dict['Timezones'].append(y['timezones'][0])
            # Note the code to handle possibility of multiple currencies as dictionaries
            if len(y['currencies'])>1:
                lst_currencies = []
                for i in y['currencies']:
                    if i['name'] != None :
                        lst_currencies.append(i['name'])
                country_dict['Currencies'].append(','.join(lst_currencies))
            else:
                country_dict['Currencies'].append(y['currencies'][0]['name'])
            # Note the code to handle possibility of multiple languages as dictionaries
            if len(y['languages'])>1:
                lst_languages = []
                for i in y['languages']:
                    lst_languages.append(i['name'])
                country_dict['Languages'].append(','.join(lst_languages))
            else:
                country_dict['Languages'].append(y['languages'][0]['name'])
    
    # Return as a Pandas DataFrame
    return pd.DataFrame(country_dict)

### Define function to pull the country NAMES data from the API

In [84]:
def list_of_countries(number_to_return=250):

    #select default to all len(z)
    #select only the first x amount 
    
    allurl='https://restcountries.eu/rest/v2/all'

    uh=urllib.request.urlopen(allurl)
    data = uh.read().decode()
    z=json.loads(data)
    
 ##   default_max=250 #len(z)
    lim=number_to_return 
  #  if  number_to_return !=0: 
   #     lim=number_to_return 
  #else :
   #     lim=250
    
    country_list=list()
    
    i=0
    while i < lim :
        y=z[i]
        country_list.append(y['name'])
        i+=1 
        
    return  country_list


In [87]:
def list_of_countries(number_to_return=300):

    allurl='https://restcountries.eu/rest/v2/all'

    uh=urllib.request.urlopen(allurl)
    data = uh.read().decode()
    z=json.loads(data)
    
    lim=number_to_return 
    country_list=list()
    
    i=0
    while i < lim :
        y=z[i]
        country_list.append(y['name'])
        i+=1 
        
    return  country_list

In [88]:
list_of_countries(1)   ##default   #built in return 

['Afghanistan']

In [73]:
country_list    #needs to create this item 

NameError: name 'country_list' is not defined

### Generate data into DF by listing or run all the countries 

In [None]:
df1=build_country_database(['Nigeria','Switzerland','France','Russia','Kenya','Singapore','South Africa','Åland Islands','United States Minor Outlying Islands','Nauru'])

In [83]:
df2=build_country_database(list_of_countries(4))

Retrieved data on Afghanistan. Total 1109 characters read.
Retrieved data on Åland Islands. Total 908 characters read.
Retrieved data on Albania. Total 930 characters read.
Retrieved data on Algeria. Total 1149 characters read.


In [None]:
##Problem 1 was special characters and spaces eg Åland Islands'

In [None]:
##Problem 2 was blank data reading in and the index not being found eg 'United States Minor Outlying Islands'

In [None]:
#Problem 3 was NUll type in currency list eg 'Nauru'

##Note on Dictionaries Structure for JSON OBJECT 
type(y)
y.keys()

for k,v in y.items():
    print(f"{k}: {v}")
    
for i in y['languages']:
    print(i['name'])  
    
y['currencies']    
    