# Global Food Prices

Get dataset

In [102]:
import os
from download import download
import pandas as pd
from matplotlib import pyplot as plt

%matplotlib inline

url_2 = "https://raw.githubusercontent.com/lukes/ISO-3166-Countries-with-Regional-Codes/master/all/all.csv"
regional_file_name = "regions.csv"

url = "http://vam.wfp.org/sites/data/WFPVAM_FoodPrices_05-12-2017.csv"
file_name = 'WFPVAM_FoodPrices_05-12-2017.csv'

def get_dataset(url, file_name):
    if not os.path.isfile(file_name):
        download(url, file_name)
    
    return pd.read_csv(file_name, encoding='latin-1')

df = get_dataset(url, file_name)

region_df = get_dataset(url_2, regional_file_name)

In [103]:
# Pre-process
region_df.rename(columns={'name': 'adm0_name'}, inplace=True)
new_regions = region_df.loc[:, ['adm0_name', 'sub-region']]

df_regions = pd.merge(df, new_regions, on='adm0_name', how='left')

df = df_regions.copy()

df['datetime'] = pd.to_datetime(df.mp_year*10000+df.mp_month*100+1, format='%Y%m%d')


In [104]:
import numpy as np
avg_price = df.groupby(['adm0_name', 'cm_name', 'datetime'])['mp_price']
df_means = avg_price.mean().reset_index()


normalized = df_means.groupby(['adm0_name', 'cm_name'])['mp_price'].apply(lambda x: (x - np.mean(x)) / (np.max(x) - np.min(x)))

In [120]:
df_means['mp_price_norm'] = normalized

# only_apples = df_means.loc[df_means['sub-region'] == 'Central Asia'].loc[df_means['cm_name'] == 'Apples']
# print("MAX", only_apples['mp_price'].max(), "MIN", only_apples['mp_price'].min(), "MEAN", only_apples['mp_price'].mean())

In [106]:
# plt.plot(df_means['datetime'], df_means['mp_price'], '-')

# Make a graph per product. Each line represents a country (or region for later).


from bokeh.io import output_file, show, save
# output_notebook()

from bokeh.models import ColumnDataSource, HoverTool
from bokeh.plotting import figure

In [119]:
from bokeh.palettes import Dark2_5 as palette
import itertools
from bokeh.layouts import column

b = df_means.groupby(['cm_name'])

figures = []

idx = 0
for group, row in b:
    color_idx = 0
    country_group = row.groupby(['adm0_name'])
    p = figure(plot_width=800, plot_height=300, x_axis_type='datetime')
    p.title.text = group
    p.legend.click_policy="hide"
    p.legend.location = "top_left"
    
    for group2, row2 in country_group:
        datetime = []
        prices = []
        for i, data in row2.iterrows():
            datetime.append(data['datetime'])
            prices.append(data['mp_price_norm'])
        p.line(datetime, prices, line_width=4, legend=data['adm0_name'], color=palette[color_idx % 5])
        color_idx += 1
    figures.append(p)
    idx += 1
    if idx > 20:
        break

output_file("groups" + ".html")
show(column(figures))
