In [41]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import os
import math 
import seaborn as sns

# Global variables
print("Configuration")
path_root = "G:\\CIAT\\Code\\CWR\\ideal_diet\\src\\"
path_inputs = path_root +"inputs\\"
path_outputs = path_root + "outputs\\"

print("\tSetting parameters")
fao_element = 664

print("\tLoading parameters")
groups = pd.read_excel(path_inputs + "configurations.xlsx", sheet_name='groups')
diets = pd.read_excel(path_inputs + "configurations.xlsx", sheet_name='diets')
years = pd.read_excel(path_inputs + "configurations.xlsx", sheet_name='years')
indicators_country = pd.read_excel(path_inputs + "configurations.xlsx", sheet_name='indicators_country')

print("\tOther things")
years_fao_colnames = ["Y"+str(y.year) for y in years.itertuples(index=True, name='Pandas')]

print("\tSuccessful")

Configuration
	Setting parameters
	Loading parameters
	Other things
	Successful


In [42]:
print("Merging fao data with groups")

print("\tLoading fao data")
data_fao = pd.read_csv(path_inputs + "faostat-food_supply.csv", encoding = "ISO-8859-1")

print("\tMerging fao data with groups")
data_fao_groups = pd.merge(data_fao, groups, left_on = "Item Code",right_on="Item Code",how='inner')

print("\tFiltering columns")
data_fao_colnames = ["ID","Area Code","Area","Item Code","Item_x","Element Code","Element","Unit","Description","Food group"]
data_fao_colnames.extend(years_fao_colnames)
data_fao_groups = data_fao_groups[data_fao_colnames]
data_fao_groups_colnames = ["id","area_code","area","item_code","item","element_code","element","unit","description","food_group"]
data_fao_groups_colnames.extend(years_fao_colnames)
data_fao_groups.columns = data_fao_groups_colnames

print("\tFiltering element " + str(fao_element))
data_fao_groups = data_fao_groups[data_fao_groups.element_code == fao_element]

print("\tSummarizing food groups")
food_group = data_fao_groups.food_group.unique()
data_fao_groups_colnames_fg = ["area","food_group"]
data_fao_groups = data_fao_groups.groupby(data_fao_groups_colnames_fg)[years_fao_colnames].sum().reset_index()

print("\tSaving")
data_fao_groups.to_excel(path_outputs +'data_fao_groups.xlsx', engine='openpyxl', index = False)

print("\tSuccessful")

Merging fao data with groups
	Loading fao data
	Merging fao data with groups
	Filtering columns
	Filtering element 664
	Summarizing food groups
	Saving
	Successful


In [43]:
print("Merging fao data with diets")

print("\tMerging fao data with diet")
data_fao_diet = pd.merge(data_fao_groups, diets, left_on = "food_group",right_on="food_group",how='inner')

print("\tSaving")
data_fao_diet.to_excel(path_outputs +'data_fao_diet.xlsx', engine='openpyxl', index = False)

print("\tSuccessful")

Merging fao data with diets
	Merging fao data with diet
	Saving
	Successful


In [44]:
print("Calculating categories")

print("\tCreating categories fields")
data_fao_cat = data_fao_diet.copy()

for y in years_fao_colnames:
    data_fao_cat["category" + y] = 0

print("\tGetting food groups")
food_group = data_fao_cat.food_group.unique()


quintals = pd.DataFrame()
diet = "d" + str(fao_element)
for fg in food_group:
    print("\t\tCalculating food group: " + fg)
    fg_fao = data_fao_cat[data_fao_cat.food_group == fg]
    
    for y in years_fao_colnames:
        print("\t\t\tCalculating year: " + y)
        
        # Food groups above
        fg_fao_a = fg_fao[fg_fao.type == "A"] 
        if(fg_fao_a.shape[0]>0):
            # Splitting in two datasets: above or below of diet
            fg_fao_a_above = fg_fao_a[fg_fao_a[y] >= fg_fao_a[diet]]
            fg_fao_a_below = fg_fao_a[fg_fao_a[y] < fg_fao_a[diet]]
            
            # Calculating quintals for below data
            q = fg_fao_a_below[y].quantile([0.25, 0.5,0.75, 1])
            q["year"] = y.replace("Y","")            
            q["food_group"] = fg
            q["type"] = "A"
            quintals = quintals.append(q, ignore_index=True)
            
            # Setting category A
            data_fao_cat["category" + y].iloc[fg_fao_a_above.index] = 5
            fg_fao_a_below["category" + y] = fg_fao_a_below[y].apply(lambda x: 1 if x<q[0] else (2 if x<q[1] else (3 if x<q[2] else 4)))
            data_fao_cat["category" + y].iloc[fg_fao_a_below.index] = fg_fao_a_below["category"+ y]
        
        # Food groups below
        fg_fao_b = fg_fao[fg_fao.type == "B"] 
        if(fg_fao_b.shape[0]>0):
            # Splitting in two datasets: above or below of diet
            fg_fao_b_below = fg_fao_b[fg_fao_b[y] <= fg_fao_b[diet]]
            fg_fao_b_above = fg_fao_b[fg_fao_b[y] > fg_fao_b[diet]]
            
            # Calculating quintals for below data
            q = fg_fao_b_above[y].quantile([0.25, 0.5,0.75, 1])
            q["year"] = y.replace("Y","")            
            q["food_group"] = fg
            q["type"] = "B"
            quintals = quintals.append(q, ignore_index=True)
            
            # Setting category B
            data_fao_cat["category"  + y].iloc[fg_fao_b_below.index] = 5
            fg_fao_b_above["category" + y] = fg_fao_b_above[y].apply(lambda x: 1 if x>q[3] else (2 if x>q[2] else (3 if x>q[1] else 4)))
            data_fao_cat["category" + y].iloc[fg_fao_b_above.index] = fg_fao_b_above["category"+ y]

print("\tSaving")
quintals.to_excel(path_outputs +'quintals.xlsx', engine='openpyxl', index = False)  
data_fao_cat.to_excel(path_outputs +'data_fao_cat.xlsx', engine='openpyxl', index = False)

print("\tSuccessful")

Calculating categories
	Creating categories fields
	Getting food groups
		Calculating food group: Alcohol
			Calculating year: Y2013


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


		Calculating food group: Dairy
			Calculating year: Y2013


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


		Calculating food group: Fats & Oils
			Calculating year: Y2013
		Calculating food group: Fruits
			Calculating year: Y2013
		Calculating food group: Grains (Starchy staples)
			Calculating year: Y2013
		Calculating food group: Legumes, Nuts & Seeds
			Calculating year: Y2013
		Calculating food group: Meats
			Calculating year: Y2013
		Calculating food group: Miscellaneous
			Calculating year: Y2013
		Calculating food group: Spices
			Calculating year: Y2013
		Calculating food group: Sugars
			Calculating year: Y2013
		Calculating food group: Vegetables
			Calculating year: Y2013
	Saving
	Successful


In [45]:
print("Creating matrix for countries analysis")

print("\tPivoting table by country")
data_countries = pd.DataFrame(columns=['area'])
for y in years_fao_colnames:
    print("\t\tYear: " + y)
    countries_pivot = pd.pivot_table(data_fao_cat, values='category' + y, index=['area'], columns=['type'], aggfunc=np.sum)
    if(data_countries.shape[0] == 0):
        data_countries.area = countries_pivot.index.values
    data_countries["A" + y] = countries_pivot.A.values
    data_countries["B" + y] = countries_pivot.B.values
    data_countries["Total" + y] = data_countries["A" + y] + data_countries["B" + y]

print("\tSaving")
data_countries.to_excel(path_outputs +'data_countries.xlsx', engine='openpyxl', index = False)


print("\tSuccessful")


Creating matrix for countries analysis
	Pivoting table by country
		Year: Y2013
	Saving
	Successful


In [46]:
print("Countries analysis")

print("\tMerging countries with indicators")
countries_indicators = pd.merge(data_countries, indicators_country, left_on = "area",right_on="area",how='inner')

print("\tSaving")
countries_indicators.to_excel(path_outputs +'countries_indicators.xlsx', engine='openpyxl', index = False)

# Correlation
print("\tCalculating correlation")
countries_correlation = countries_indicators.corr(method ='pearson')
#countries_correlation.to_excel(path_outputs +'countries_correlation.xlsx', engine='openpyxl', index = False)
def magnify():
    return [dict(selector="th",props=[("font-size", "7pt")]),
            dict(selector="td",props=[('padding', "0em 0em")]),
            dict(selector="th:hover",props=[("font-size", "12pt")]),
            dict(selector="tr:hover td:hover",props=[('max-width', '200px'),('font-size', '12pt')])]

cmap = sns.diverging_palette(5, 250, as_cmap=True)
countries_table_correlation = countries_correlation.style.background_gradient(cmap, axis=1)\
    .set_properties(**{'max-width': '80px', 'font-size': '10pt'})\
    .set_caption("Hover to magify")\
    .set_precision(2)\
    .set_table_styles(magnify())

print("\tSaving")
countries_table_correlation.to_excel(path_outputs +'countries_table_correlation.xlsx', engine='openpyxl')

print("\tSuccessful")

Countries analysis
	Merging countries with indicators
	Saving
	Calculating correlation
	Saving
	Successful
