In [22]:
import pandas as pd
import os
import numpy as np
from bokeh.layouts import gridplot

url_2 = "https://raw.githubusercontent.com/lukes/ISO-3166-Countries-with-Regional-Codes/master/all/all.csv"
regional_file_name = "regions.csv"

file_name = 'WFPVAM_FoodPrices_version4_Retail.csv'

# Add region and datetime cols.
df = pd.read_csv(file_name)
df['date'] = pd.to_datetime(df.date, format='%Y-%m')


country = df['adm0_name'] == "Rwanda"
print(df['adm0_name'].unique())
print(df[country]['cm_name'].unique())



array(['Armenia', 'Democratic Republic of the Congo', 'Gambia',
       'Kyrgyzstan', "Lao People's Democratic Republic", 'Peru',
       'State of Palestine'], dtype=object)

In [None]:
# only_bread.plot(x='date', y='Diff')

def datetime(x):
    return np.array(x, dtype=np.datetime64)


from bokeh.layouts import gridplot
from bokeh.plotting import figure, show, output_file
from bokeh.models import HoverTool


# COLORS
from bokeh.palettes import Dark2_5 as palette
import itertools


colors = itertools.cycle(palette)

# Question 1

Are there any food prices that are show negative/positive correlation, and is this correlation present throughout the years, or perhaps only in certain period? Can you perhaps detect possible ingredients of a certain other food product?



In [None]:
# Each product in a graph for each country.

country = 'Uganda'
y_axis = 'Gradient'

df_country = df.loc[df.adm0_name == country]
plots = []
country_groups = df_country.groupby(['mkt_name'])

# allow for interactive hover tool
# Import the ColumnDataSource
from bokeh.models import ColumnDataSource



for country_group, country_row in country_groups:
    plot = figure(x_axis_type="datetime", title="Products in {}".format(country_group), tools='hover,pan,wheel_zoom,box_zoom,reset')
    product_groups = country_row.groupby(['cm_name'])
    for (group, row), color in zip(product_groups, colors):
        # create CDS to enable dynamic hovering
        row_cds = ColumnDataSource(row)
        # set source to CDS
        # OK to use Gradient for derivative?
        plot.line('date', y_axis, color=color, legend=group, source=row_cds)
    plot.legend.click_policy = "hide"
    plot.legend.location = "top_right"
    hover = plot.select(dict(type=HoverTool))
    # set preffered info for hovertool to show
    hover.tooltips = [
        # need to fix daytime again now....
        ("date", "@date"),
        ("product", "@cm_name"),
        ("Gradient", "@Gradient")
        ]
    
    plots.append(plot)

show(gridplot([plots]))

# Question 2
Do countries in similar regions, also show similar price differences? And if differences occur, can you find a potential explanation?

In [None]:
# Each product in a graph for each country.


region_df = pd.read_csv(regional_file_name)
region_df.rename(columns={'name': 'adm0_name'}, inplace=True)
new_regions = region_df.loc[:, ['adm0_name', 'sub-region']]

df_regions = pd.merge(df, new_regions, on='adm0_name', how='left')
df = df_regions.copy()

In [None]:
y_axis = 'Gradient'


plots = []
product_groups = df.groupby(['sub-region', 'cm_name', 'date'])[y_axis].mean().reset_index()
group = product_groups.groupby('sub-region')

for gr1, row1 in group:
    plot = figure(x_axis_type="datetime", title="Avg Products in {}".format(gr1), tools='hover,pan,wheel_zoom,box_zoom,reset')
    product_groups = row1.groupby(['cm_name'])
    for (gr2, row2), color in zip(product_groups, colors):
        row2_cds = ColumnDataSource(row2)
        plot.line('date', y_axis, color=color, legend=gr2, source=row2_cds)
    plot.legend.click_policy = "hide"
    plot.legend.location = "top_right"
    hover = plot.select(dict(type=HoverTool))
    hover.tooltips = [
        ("Product", "@cm_name"),
        ("Value", "@Gradient"),
        ]
    
    plots.append(plot)
show(gridplot([plots]))

# Question 3
Can we see any correlations between other types of data, like: currency fluctuations, weather patterns, and/or refugee movements. Can we perhaps visualize refugee movements from the food price data? Do certain weather conditions influence market prices?

In [1]:
import pandas as pd
import os
import numpy as np

# get datasets
# https://data.world/unhcr/asylum-applications/workspace/file?filename=asylum_seekers_monthly_2016_12_08_152150.csv
asylum_file = 'asylum_seekers_monthly_2016_12_08_152150.csv'
df = pd.read_csv(asylum_file, low_memory=False)

food_prices_file = 'WFPVAM_FoodPrices_version5_Retail.csv'
df_food = pd.read_csv(food_prices_file)


####### pre-processing #######################################################################

df.columns = ['destination', 'origin', 'year', 'month', 'frequency']
# remove abundant rows
df.drop(df.index[:2], inplace=True)

# map month names to corresponding number to allow for convertion to daytime
MONTH_DICT = {'January': 1, 'February': 2, 'March': 3, 'April': 4, 'May': 5, 'June': 6, 
              'July': 7, 'August': 8, 'September': 9, 'October': 10, 'November': 11,
              'December': 12}

df['month'] = df['month'].map(MONTH_DICT)

# month and year column to single daytime column 'date'
df = df.assign(date=pd.to_datetime(df[['year', 'month']].assign(day=1)))
# drop now abundant columns month and year
df = df.drop('month', axis=1)
df = df.drop('year', axis=1)

# drop rows with undefined frequency
df = df.drop(df[df.frequency == '*'].index)
# also drop rows with unknown/various origins
df = df.drop(df[df.origin == 'Various/unknown'].index)

# NB: we could also drop the destination columns, since it only includes countries
# outside of our original dataset. But maybe it will allow for some dynamic hovering later on...
# so we'll keep it for now.

# see if different names are used for identical counrtries in the two databases:
# will end up with a list with countries that don't match; manually see if they should...
countries_food = df_food['Iadm0_name'].unique()
countries_refugee = df['origin'].unique()

possible_mismatches_1 = [country for country in countries_food 
                       if country not in countries_refugee]

possible_mismatches_2 = [country for country in countries_refugee
                        if country not in countries_food]

definite_mismatches = possible_mismatches_1 + possible_mismatches_2

mismatches = definite_mismatches.sort()
# print(mismatches)

# {'country name in refugee file': 'corresponding country name in food dataset'}
MISMATCH_DICT = {'Iran (Islamic Rep. of)': 'Iran  (Islamic Republic of)', 'Dem. Rep. of the Congo': 'Democratic Republic of the Congo',
                 'Central African Rep.': 'Central African Republic'}

# fix mis-matches
for (original, new) in MISMATCH_DICT.items():
    mask = df.origin == original
    df.loc[mask, 'origin'] = new
    
# reset index
df = df.reset_index(drop=True)
df.head()
###################################### pre-processing done ###################################


# set food_prices dataset to daytime to allow merging with refugee dataset
#df_food['date'] = pd.to_datetime(df_food.date, format='%Y-%m')
# merge datasets 

#new_df = pd.merge(df_food, df,  how='left', left_on=['Iadm0_name','date'], right_on = ['origin','date'])
# drop now abundantan origin row (equals Iadm0_name)
#new_df[:50]

# drop destination column
df = df.drop('destination', axis=1)


df[:200]



Unnamed: 0,origin,frequency,date
0,Afghanistan,1,2007-11-01
1,Afghanistan,1,2012-09-01
2,Afghanistan,18,2016-06-01
3,Afghanistan,19,2016-07-01
4,Algeria,1,2006-11-01
5,Algeria,1,2011-07-01
6,Egypt,1,2005-04-01
7,Egypt,1,2006-11-01
8,Egypt,1,2010-02-01
9,Barbados,1,2012-11-01
