In [1]:
import pandas as pd
import json

In [2]:
raw_country_nationality_dataset_file = 'data/raw_country_nationality.json'
parsed_country_nationality_dataset_file = 'data/parsed_country_nationality.csv'
parsed_currency_country_dataset_file = 'data/parsed_currency_country.csv'

In [3]:
json_data = json.load(open(raw_country_nationality_dataset_file))

## Create country-nationality dataframe

In [4]:
country_nationality_dict = {}

for country in json_data:
    country_official_code = country['cca2']
    country_official_name = country['name']['official']
    country_common_name = country['name']['common']
    country_currency = country['currency']
    country_nationality = country['demonym']
    
    country_nationality_dict[country_official_code] = {
        'Official Name': country_official_name,
        'Common Name': country_common_name,
        'Currencies': country_currency,
        'Nationality': country_nationality
    }
    
country_nationality_df = pd.DataFrame.from_dict(country_nationality_dict, orient='index')

In [5]:
#country_nationality_df['Currencies'] = country_nationality_df['Currencies'].astype(list)
country_nationality_df.head(2)

Unnamed: 0,Official Name,Common Name,Currencies,Nationality
AD,Principality of Andorra,Andorra,[EUR],Andorran
AE,United Arab Emirates,United Arab Emirates,[AED],Emirati


## Create currency-country dataframe

In [6]:
def get_countries_with_currency(currency):
    '''
    This method returns a list of countries
    that use a given currency as an official
    currency.
    '''
    country_list = []
    for index, row in country_nationality_df.iterrows():
        cur_country_currencies = row['Currencies']
        if (currency in cur_country_currencies):
            country_list.append(index)
    return country_list

In [7]:
all_currencies = []
for index, currencies in country_nationality_df['Currencies'].iteritems():
    all_currencies += currencies

unique_currencies = set(all_currencies)
currency_country_dict = {}
for currency in unique_currencies:
    currency_country_dict[currency] = {'Countries': get_countries_with_currency(currency)}

# Create dataframe from the collected data
currency_country_df = pd.DataFrame.from_dict(currency_country_dict, orient='index')
currency_country_df.head(2)

Unnamed: 0,Countries
AED,[AE]
AFN,[AF]


In [8]:
# Reset the index and rename ID column
country_nationality_df = country_nationality_df.reset_index().rename(columns={'index':'ID'})
currency_country_df = currency_country_df.reset_index().rename(columns={'index':'ID'})

In [9]:
# Store computed dataframes into files
country_nationality_df.to_csv(parsed_country_nationality_dataset_file, encoding='utf-8', index=False, compression='gzip')
currency_country_df.to_csv(parsed_currency_country_dataset_file, encoding='utf-8', index=False, compression='gzip')