In [1]:
import pandas as pd
import numpy as np

# Load the dataset
new_columns = ['reporter_iso_3', 'reporter_name', 'partner_iso_3', 'partner_name', 'year', 'trade_flow_name', 'trade_value_1000_usd']
trade_data = pd.read_csv('trade_1988_2021.csv')
display(trade_data.head())

Unnamed: 0,ReporterISO3,ReporterName,PartnerISO3,PartnerName,Year,TradeFlowName,TradeValue in 1000 USD
0,AFG,Afghanistan,SWE,Sweden,2017,Export,86.752
1,AFG,Afghanistan,JOR,Jordan,2018,Export,2796.481
2,AFG,Afghanistan,JOR,Jordan,2017,Export,3100.187
3,AFG,Afghanistan,ITA,Italy,2018,Export,279.918
4,AFG,Afghanistan,ITA,Italy,2017,Export,416.642


In [2]:
trade_data.columns = new_columns
trade_data["trade_value_usd"] = trade_data["trade_value_1000_usd"].apply(lambda x: x * 1000)
display(trade_data.head(30))

Unnamed: 0,reporter_iso_3,reporter_name,partner_iso_3,partner_name,year,trade_flow_name,trade_value_1000_usd,trade_value_usd
0,AFG,Afghanistan,SWE,Sweden,2017,Export,86.752,86752.0
1,AFG,Afghanistan,JOR,Jordan,2018,Export,2796.481,2796481.0
2,AFG,Afghanistan,JOR,Jordan,2017,Export,3100.187,3100187.0
3,AFG,Afghanistan,ITA,Italy,2018,Export,279.918,279918.0
4,AFG,Afghanistan,ITA,Italy,2017,Export,416.642,416642.0
5,AFG,Afghanistan,IRQ,Iraq,2018,Export,13190.08,13190080.0
6,AFG,Afghanistan,IRQ,Iraq,2017,Export,14563.223,14563223.0
7,AFG,Afghanistan,IRN,"Iran, Islamic Rep.",2018,Export,19508.634,19508634.0
8,AFG,Afghanistan,IRN,"Iran, Islamic Rep.",2017,Export,18112.759,18112759.0
9,AFG,Afghanistan,IND,India,2018,Export,359437.858,359437858.0


In [3]:
import pycountry_convert as pc
from pycountry_convert import country_alpha3_to_country_alpha2,country_alpha2_to_continent_code

In [4]:
def country_to_continent(country_code):
#    country_name = country_alpha3_to_country_name(country_code)
    country_alpha2 = pc.country_alpha3_to_country_alpha2(country_code)
    country_continent_code = pc.country_alpha2_to_continent_code(country_alpha2)
    country_continent_name = pc.convert_continent_code_to_continent_name(country_continent_code)
    return country_continent_name

# Example
country_code = 'AFG'
print(country_to_continent(country_code))

Asia


In [5]:
#create new columns
trade_data["reporter_continent"]=""
trade_data["partner_continent"]=""

In [6]:
for i in range(0,len(trade_data)):
    try:
        trade_data.loc[i,"reporter_continent"]=country_to_continent(trade_data.loc[i,"reporter_iso_3"])
        trade_data.loc[i,"partner_continent"]=country_to_continent(trade_data.loc[i,"partner_iso_3"])
    except KeyError:
        # ignore countries that don't have an alpha-2 or alpha-3 code
        pass

In [7]:
trade_data.head()

Unnamed: 0,reporter_iso_3,reporter_name,partner_iso_3,partner_name,year,trade_flow_name,trade_value_1000_usd,trade_value_usd,reporter_continent,partner_continent
0,AFG,Afghanistan,SWE,Sweden,2017,Export,86.752,86752.0,Asia,Europe
1,AFG,Afghanistan,JOR,Jordan,2018,Export,2796.481,2796481.0,Asia,Asia
2,AFG,Afghanistan,JOR,Jordan,2017,Export,3100.187,3100187.0,Asia,Asia
3,AFG,Afghanistan,ITA,Italy,2018,Export,279.918,279918.0,Asia,Europe
4,AFG,Afghanistan,ITA,Italy,2017,Export,416.642,416642.0,Asia,Europe


In [8]:
import geopy.geocoders
from geopy.geocoders import Nominatim
geolocator=Nominatim(user_agent="my_app")

In [9]:
list_countries_reporter= list(trade_data.reporter_name.unique())
list_countries_partner= list(trade_data.partner_name.unique())
list_countries=(list_countries_reporter+list_countries_partner)
list_countries=list(dict.fromkeys(list_countries))

In [10]:
# define the function that will generate the second column
def get_longitude(country):
    try:
        return geolocator.geocode(country).longitude
    except AttributeError:
        # ignore countries that don't have an alpha-2 or alpha-3 code
        pass

# define the function that will generate the second column
def get_latitude(country):
    try:
        return geolocator.geocode(country).latitude
    except AttributeError:
        # ignore countries that don't have an alpha-2 or alpha-3 code
        pass

# create a list of values for the second column using the function
longitude_list = [get_longitude(item) for item in list_countries]
latitude_list=[get_latitude(item) for item in list_countries]

# create a dictionary with the column names as keys and the columns as values
my_dict = {'country': list_countries, 'longitude': longitude_list, 'latitude': latitude_list}

# create the DataFrame from the dictionary
coordinates_df = pd.DataFrame(my_dict)

In [11]:
coordinates_df = pd.DataFrame(my_dict)

In [12]:
#create new columns
trade_data["reporter_lat"]=""
trade_data["reporter_long"]=""
trade_data["partner_lat"]=""
trade_data["partner_long"]=""

In [13]:
for i in range(0,len(trade_data)):
    try:
        trade_data.loc[i,"reporter_lat"]=coordinates_df.loc[coordinates_df["country"]==trade_data.loc[i,"reporter_name"],"latitude"].iloc[0]
        trade_data.loc[i,"reporter_long"]=coordinates_df.loc[coordinates_df["country"]==trade_data.loc[i,"reporter_name"],"longitude"].iloc[0]
        trade_data.loc[i,"partner_lat"]=coordinates_df.loc[coordinates_df["country"]==trade_data.loc[i,"partner_name"],"latitude"].iloc[0]
        trade_data.loc[i,"partner_long"]=coordinates_df.loc[coordinates_df["country"]==trade_data.loc[i,"partner_name"],"longitude"].iloc[0]
    except AttributeError:
        # ignore countries that don't have an alpha-2 or alpha-3 code
        pass

In [14]:
trade_data.to_csv('cleaned_trade_data.csv', index=False)