In [81]:
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import os
import datetime
from scipy.spatial.distance import pdist, squareform
import math 

# Global variables
path_root = "G:\\CIAT\\Code\\CWR\\ideal_diet\\src\\"
path_inputs = path_root +"inputs\\"
path_outputs = path_root + "outputs\\"

# Loading data
groups = pd.read_csv(path_inputs + "groups.csv", encoding = "ISO-8859-1")
fao = pd.read_csv(path_inputs + "faostat-food_supply-2013.csv", encoding = "ISO-8859-1")
dash = pd.read_csv(path_inputs + "dash_reference.csv", encoding = "ISO-8859-1")

# Mergin data
data = pd.merge(fao, groups, left_on = "Item Code",right_on="Item Code",how='inner')


In [96]:
# Function to get analysis for Area
# (string) country: Name of country
# (int) year: Year to analyze data
# (int) element: Element code from FAOSTAT
# (string) title: Name of analysis
# (string) path: Folder to save outputs
def area_analysis(country, year, element, title, path):
    print("Start " + country)
    year_name = "Y" + str(year)
    element_name = "M" + str(element)
    columns = ["Area","Item_x","Food group","Description","Element Code","Element","Unit",year_name]
    
    # Filtering data
    element = data.loc[((data.Area == country) & (data["Element Code"]==element) & (data["Food group"] != "Grand Total")), columns]
    
    # Aggregating
    element_agg = element.groupby('Food group').agg(['mean']).sort_values(['Food group'],ascending=[1]).reset_index()
    dash_tmp = dash[dash["food_group"].isin(element_agg['Food group'].values)].sort_values(['food_group'],ascending=[1]).reset_index()
            
    # Boxplot food group
    #fig1, ax1 = plt.subplots()
    #element.boxplot(column=[year_name],by='Food group',rot=45, fontsize=10, ax=ax1)
    #fig1.savefig(path + "\\" + country + "-food_group-boxplot.png", dpi=300, bbox_inches="tight")
    
    # Compare ideal diet with country   
    #fig2, ax2 = plt.subplots()
    #element_agg.plot.bar(y=year_name,x="Food group", ax=ax2,rot=45, fontsize=10)
    #dash_tmp[element_name].plot(ax=ax2, color = 'orange',rot=45, fontsize=10)    
    #fig2.savefig(path + "\\" + country + "-food_group-compare_diet.png", dpi=300, bbox_inches="tight")
    
    data_tmp = pd.merge(element_agg, dash_tmp, left_on = "Food group",right_on="food_group",how='inner')
    
    data_tmp["distance_" + element_name] = (data_tmp[(year_name, 'mean')]**2) + (data_tmp[element_name]**2).apply(math.sqrt)    
    data_tmp.to_csv(path + "\\" + country + "-food_group-distances.csv", index = False)
    #print(path + "\\" + country + "-food_group-distances.csv")
    
    print("End " + country)





In [98]:
# Preparing folder for outputs
folder = path_outputs + datetime.datetime.now().strftime("%Y-%m-%d")
if not os.path.exists(folder):
    os.mkdir(folder)

elements_analysis = [(645,"Food supply quantity (g/capita/day)"),(664,"Food supply (kcal/capita/day)")]

# List of countries
countries = data.Area.drop_duplicates().values
countries = countries[0:2]


for key, value in elements_analysis:    
    folder_element = folder + "\\" + str(key)    
    os.mkdir(folder_element)
   
    for c in countries:
        area_analysis(c, 2013, key, value, folder_element)




Start Afghanistan
0        0.016936
1     7892.785514
2       31.468825
3      629.308080
4    13300.524887
5       46.706086
6      198.039033
7        4.267337
8      172.314318
9     6644.575154
dtype: float64
End Afghanistan
Start Albania
0       1697.891536
1     173620.790000
2         86.143920
3       6384.604810
4       5868.441614
5        147.259148
6        708.994109
7          0.000000
8          2.312066
9       4280.903358
10     87827.242875
dtype: float64
End Albania
Start Afghanistan
0          0.000000
1       3061.250000
2        448.224490
3        305.010000
4     123390.790123
5        244.367347
6        501.123967
7          1.000000
8          1.440000
9       1892.250000
10       449.000000
dtype: float64
End Afghanistan
Start Albania
0       841.000000
1     71431.250000
2      1290.551111
3      1730.506944
4     31313.127551
5       531.677686
6      2086.174515
7         6.250000
8         7.111111
9      9702.250000
10     4852.250000
dtype: float64
End

