## Adding Country Codes to our European Butter Prices 10 years Dataset ##



In [1]:
# loading all relevant libraries

import pandas as pd
import seaborn as sns
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt

import pycountry # used for mapping
import geopy # used for mapping
from geopy.geocoders import Nominatim # used for mapping

from geopy.exc import GeocoderTimedOut 
import folium

In [2]:
#pip install geopy

In [3]:
#pip install folium

In [39]:
# creating dataframe for exports
df = pd.read_csv('European Butter Prices 10 years.csv', header = 0)

In [33]:
df.head

<bound method NDFrame.head of        Year  Week Member State Product  Begin Date    End Date  \
0      2000    52      Belgium  BUTTER  25/12/2000  31/12/2000   
1      2000    52      Denmark  BUTTER  25/12/2000  31/12/2000   
2      2000    52      Germany  BUTTER  25/12/2000  31/12/2000   
3      2000    52      Ireland  BUTTER  25/12/2000  31/12/2000   
4      2000    52       Greece  BUTTER  25/12/2000  31/12/2000   
...     ...   ...          ...     ...         ...         ...   
14219  2022     1        Italy  BUTTER  03/01/2022  09/01/2022   
14220  2022     1  Netherlands  BUTTER  03/01/2022  09/01/2022   
14221  2022     1       Poland  BUTTER  03/01/2022  09/01/2022   
14222  2022     1     Portugal  BUTTER  03/01/2022  09/01/2022   
14223  2022     1     Slovakia  BUTTER  03/01/2022  09/01/2022   

       Price (�/100kg)  
0               326.10  
1               376.70  
2               352.79  
3               292.04  
4               460.16  
...                ...  
14

In [40]:
df.dtypes

Year                 int64
Week                 int64
Member State        object
Product             object
Begin Date          object
End Date            object
Price (�/100kg)    float64
dtype: object

In [41]:
df.rename(columns = {'Member State':'State'}, inplace = True)

In [42]:
# so, who do we export to the most? We can find this out by grouping by state
countries = df.groupby('State', as_index=False).sum()

In [43]:
df.head

<bound method NDFrame.head of        Year  Week        State Product  Begin Date    End Date  \
0      2000    52      Belgium  BUTTER  25/12/2000  31/12/2000   
1      2000    52      Denmark  BUTTER  25/12/2000  31/12/2000   
2      2000    52      Germany  BUTTER  25/12/2000  31/12/2000   
3      2000    52      Ireland  BUTTER  25/12/2000  31/12/2000   
4      2000    52       Greece  BUTTER  25/12/2000  31/12/2000   
...     ...   ...          ...     ...         ...         ...   
14219  2022     1        Italy  BUTTER  03/01/2022  09/01/2022   
14220  2022     1  Netherlands  BUTTER  03/01/2022  09/01/2022   
14221  2022     1       Poland  BUTTER  03/01/2022  09/01/2022   
14222  2022     1     Portugal  BUTTER  03/01/2022  09/01/2022   
14223  2022     1     Slovakia  BUTTER  03/01/2022  09/01/2022   

       Price (�/100kg)  
0               326.10  
1               376.70  
2               352.79  
3               292.04  
4               460.16  
...                ...  
14

In [44]:
# getting the country codes 
country_names = df.State

# open a dictionary list & find the 3 letter country code for all countries in our df
countries = {}
for country in pycountry.countries:
    countries[country.name] = country.alpha_3

# create a df with the country codes 
country_codes = [countries.get(country, 'Unknown code') for country in country_names]

In [45]:
# now, create a new column that contains the country codes
df['country_codes'] = country_codes

In [46]:
# function to manually enter codes that didn't appear

# giving our function the state column and the country code column & creating a new df with the final country codes
def fix_codes(x, y):
    if x == 'Czech Republic':
        return "CZE"
    elif x == 'Great Britain':
        return "GBR"
    elif x == 'Northern Ireland':
        return "GBR"
    elif x =='Russia':
        return "RUS"
    elif x == 'South Korea':
        return "KOR"
    elif x == 'Taiwan':
        return "TWN"
    elif x =='USA':
        return 'USA'
    else:
        return y

func = np.vectorize(fix_codes)
updated_codes = func(df["State"], df["country_codes"])

In [47]:
# setting the country codes in our df to include the updated ones
df['country_codes'] = updated_codes

In [48]:
df.tail()

Unnamed: 0,Year,Week,State,Product,Begin Date,End Date,Price (�/100kg),country_codes
14219,2022,1,Italy,BUTTER,03/01/2022,09/01/2022,514.0,ITA
14220,2022,1,Netherlands,BUTTER,03/01/2022,09/01/2022,608.0,NLD
14221,2022,1,Poland,BUTTER,03/01/2022,09/01/2022,574.43,POL
14222,2022,1,Portugal,BUTTER,03/01/2022,09/01/2022,558.87,PRT
14223,2022,1,Slovakia,BUTTER,03/01/2022,09/01/2022,514.43,SVK


In [50]:
# finally, export the DF to a csv file
df.to_csv(r"European Butter Prices 10 YearsCodes.csv", index=False)