In [62]:
import pandas as pd

pd.set_option('display.max_columns', None)
all_df = pd.read_csv('./data/Production_Crops_Livestock_E_All_Data/Production_Crops_Livestock_E_All_Data_NOFLAG.csv', encoding="ISO-8859-1")
area_code_df = pd.read_csv('./data/Production_Crops_Livestock_E_All_Data/Production_Crops_Livestock_E_AreaCodes.csv')

code_to_name_dict = area_code_df[['Area Code', 'Area']].set_index('Area Code').to_dict()['Area']
all_df.loc[:, 'Area'] = all_df['Area Code'].replace(code_to_name_dict)
lat_lon = pd.read_csv('./data/world_country_and_usa_states_latitude_and_longitude_values.csv')
population = pd.read_csv('./data/population_by_country.csv')

def make_year_cols(start, end):
    return [f'Y{y}' for y in range(start, end)]

In [63]:
all_df = all_df.merge(
    lat_lon[['latitude', 'longitude', 'country']],
    how='left',
    left_on='Area',
    right_on='country',
)

all_df = all_df.melt(
    id_vars=['Area', 'Area Code', "longitude", "latitude", 'Item', 'Element', 'Unit'], 
    value_vars=make_year_cols(2000, 2021), 
    value_name='Quantity', 
    var_name='Year'
)

all_df['Year'] = all_df['Year'].str.strip('Y').astype(int)

In [64]:
population = population.melt(
    id_vars=['Country Name'],
    value_vars=[str(y) for y in range(2000, 2021)],
    value_name='Population',
    var_name='Year',
)

population['Year'] = population['Year'].astype(int)

In [65]:
all_df = pd.merge(
    all_df, population,
    how='left',
    left_on=['Area', 'Year'],
    right_on=['Country Name', 'Year']
)

# check country name

In [24]:
with open('./data/ne_110m.json', 'r') as f:
    text = f.read()

In [None]:
for country_name in all_df['Area'].unique():
    if text.find('"NAME":"{}"'.format(country_name)) == -1:
        print(country_name)


# wrangle file

In [68]:
df = all_df[
    (all_df['Item'].str.endswith('Total')) &\
    (all_df['Area Code'] <= 300) &\
    (all_df['Element'].isin(['Production']))
].dropna(
  axis=0,
  how='any',
  subset=['latitude', 'longitude', 'Population']
)

df.to_csv('data/processed/production_major_categories_per_country_2000-2020.csv')