In [1]:
# loading all relevant libraries

import pandas as pd
import seaborn as sns
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt

import pycountry # used for mapping
import geopy # used for mapping
from geopy.geocoders import Nominatim # used for mapping

from geopy.exc import GeocoderTimedOut 
import folium

ModuleNotFoundError: No module named 'pycountry'

In [None]:
# creating dataframe for exports
df = pd.read_csv(r'World Butter Exporters 5 years.csv', header = 0)

In [None]:
# now, fill the nas with the above state
df['State'] = df['State'].fillna(method='ffill')

In [None]:
# next issue - we have rows where the commodity is null so we should drop these rows
df = df[df['Commodity'].notna()]

In [None]:
# we have entries in the dataframe that are '..' - we need to convert them to 0's so we can group the data
df = df.replace('..', 0)

In [None]:
# Our data is stored as objects, rather than floats - we need to convert all numeric columns to float so we can group them
# we can do this in 2 steps: 1st create a copy of the dataframe excluding the state & commodity, 2nd we convert the remaining columns to numeric values
cols = df.columns.drop(['State', 'Commodity'])
df[cols] = df[cols].apply(pd.to_numeric, errors='coerce')

In [None]:
# there's a column named total which is a sum of other totals - lets remove this
df = df[df.Commodity !='Total merchandise trade (0 - 9)']

In [None]:
# so, who do we export to the most? We can find this out by grouping by state
countries = df.groupby('State', as_index=False).sum()

In [None]:
# now, create a new column that sums all the months for each state
countries['Total'] = countries.drop('State', axis=1).sum(axis=1)

In [None]:
# create a df with only state & total
countries_total = countries[['State', 'Total']]

In [None]:
# excluding non descriptive countries - we can't map these countries so let's remove them from the df
countries_total = countries_total[(countries_total.State != 'EU country not specified') & (countries_total.State != 'Other countries')]

In [None]:
# getting the country codes so we can find the longs & lats (not needed for mapping here but useful for Tableau)
# 1st - get the names of all the states
country_names = countries_total.State


# open a dictionary list & find the 3 letter country code for all countries in our df
countries = {}
for country in pycountry.countries:
    countries[country.name] = country.alpha_3

# create a df with the country codes 
country_codes = [countries.get(country, 'Unknown code') for country in country_names]

In [None]:
# now, create a new column that contains the country codes
countries_total['country_codes'] = country_codes

In [None]:
# function to manually enter codes that didn't appear

# giving our function the state column and the country code column & creating a new df with the final country codes
def fix_codes(x, y):
    if x == 'Czech Republic':
        return "CZE"
    elif x == 'Great Britain':
        return "GBR"
    elif x == 'Northern Ireland':
        return "GBR"
    elif x =='Russia':
        return "RUS"
    elif x == 'South Korea':
        return "KOR"
    elif x == 'Taiwan':
        return "TWN"
    elif x =='USA':
        return 'USA'
    else:
        return y

func = np.vectorize(fix_codes)
updated_codes = func(countries_total["State"], countries_total["country_codes"])

In [None]:
# setting the country codes in our df to include the updated ones
countries_total['country_codes'] = updated_codes

In [None]:
# finding the longitude & latitudes using the country codes

longitude = [] 
latitude = []




# creating a function to find the longs & lats
def findGeocode(country): 

    try: 
          
        geolocator = Nominatim(user_agent="blank_app_name")
          
        return geolocator.geocode(country) 
      
    except GeocoderTimedOut: 
          
        return findGeocode(country)     

# now, using the function to find the longs & lats
for i in (countries_total["country_codes"]): 
      
    if findGeocode(i) != None: 
           
        loc = findGeocode(i) 
          
        latitude.append(loc.latitude) 
        longitude.append(loc.longitude) 
       
    else: 
        latitude.append(np.nan) 
        longitude.append(np.nan) 

In [None]:
# creating new columns with the longs & lats

countries_total["Longitude"] = longitude 
countries_total["Latitude"] = latitude 

In [None]:
# now to create a map with folium
# we can get the country shapes using the json file in at the github page for the folium package

url = 'https://raw.githubusercontent.com/python-visualization/folium/master/examples/data'
country_shapes = f'{url}/world-countries.json'

In [None]:
# now we can create the map - we're going to create a map that highlights each country depending on how much we export to them

# Initialize the base map - these are starting points for the map:
m2 = folium.Map(location=[20, 0], zoom_start=2)
 
# defining the settings for the chloropleth:
m2.choropleth(
 geo_data=country_shapes, # using the country shapes we got the json file
 name='Total Irish Exports between 2015 & 2020',  # name of our map
 data=countries_total, # what data we want to map
 columns=['country_codes', 'Total'], # what columns from our dataframe that we want to map
 key_on='feature.id', # what we're matching with - in our instance, we're joining the country codes to the IDs in the json file
 fill_color='YlGnBu', # the colours we want to use in the mpa
 fill_opacity=0.5, # similar to transparency - colour setting
 line_opacity=0.5,
 legend_name='Value of Exports', # the name under our legend
 smooth_factor=0, # lines around the countries as we zoom in 
 highlight=True, # does the map highlight the country when we hover over it with a mouse 
)
folium.LayerControl().add_to(m2)


In [None]:
# now call the map - countries not in our dataset have no colour
m2

In [None]:
# lets take a look at the overlal numbers - we can see that exports to the US are nearly double that of the other countries
countries_total.sort_values('Total', ascending = False)

In [None]:
# finally, sometimes its useful to view the % total value, rather than absolute values
countries_total['perc'] = round(countries_total['Total']/countries_total['Total'].sum(),2)*100

In [None]:
# and now lets map the percentage rather than total value
# now we can create the map - we're going to create a map that highlights each country depending on how much we export to them

# Initialize the base map - these are starting points for the map:
m3 = folium.Map(location=[20, 0], zoom_start=2)
 
# defining the settings for the chloropleth:
m3.choropleth(
 geo_data=country_shapes, # using the country shapes we got the json file
 name='Total Irish Exports between 2015 & 2020',  # name of our map
 data=countries_total, # what data we want to map
 columns=['country_codes', 'perc'], # what columns from our dataframe that we want to map
 key_on='feature.id', # what we're matching with - in our instance, we're joining the country codes to the IDs in the json file
 fill_color='YlGnBu', # the colours we want to use in the mpa
 fill_opacity=0.5, # similar to transparency - colour setting
 line_opacity=0.5,
 legend_name='Percentage Value of Exports', # the name under our legend
 smooth_factor=0, # lines around the countries as we zoom in 
 highlight=True, # does the map highlight the country when we hover over it with a mouse 
)
folium.LayerControl().add_to(m3)


In [None]:
m3

In [None]:
# finally, export the DF to a csv file
countries_total.to_csv(r"Irish Globalisation Data\Trade_Statistics_Export_Commodities_DF.csv", index=False)