In [4]:
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.plotting import figure
from bokeh.io import show, output_file
import pandas as pd
import math
from decimal import Decimal

df = pd.read_csv('../data/global_food_prices.csv')

In [5]:
year = 2015
product = 'Bread'
df = df[['country', 'product', 'year', 'unit', 'avg_price', 'currency', 'price_in_dollars']]
unit = sorted(list(set(df['unit'])))[0]
bin_count = 10
his_range = 3.4


output_file('../graphs/histomgram_global_yearly_product_prices.html')

df = df[(df['product']==product) & (df['year']==year)]
countries = sorted(list(set(df['country'])))
avg_prices = []
for country in countries:
    subset = df[df['country']==country]
    unit = list(set(subset['unit']))[0]
    prices = [price_to_kg(float(price), unit) for price in list(set(subset['price_in_dollars'])) if price != 'UNKNOWN']
    avg_prices.append(round(sum(prices) / len(prices), 2)) if len(prices) else avg_prices.append(0)
bins = [0 for i in range(bin_count)]
his_range_axis = [round((i + 1) * (Decimal(his_range) / Decimal(bin_count)), 2) for i in range(bin_count)]
bin_info = [[] for i in range(bin_count)]
for country, price in zip(countries, avg_prices):
    subset = df[df['country']==country]
    currency = list(set(subset['currency']))[0]
    unit = list(set(subset['unit']))[0]
    avg_price = list(set(subset['avg_price']))[0]
    if avg_price != 'UNKNOWN':
        avg_price = round(price_to_kg(float(avg_price), unit), 2)
    b = int(price // (his_range / bin_count))
    bins[b] += 1
    bin_info[b].append("{} ({} {})".format(country, avg_price, currency))
    
his_df = pd.DataFrame()
his_df['counts'] = bins
his_df['left'] = [i - Decimal('0.34') for i in his_range_axis]
his_df['right'] = his_range_axis
his_df['info'] = bin_info
src = ColumnDataSource(his_df)

p = figure(plot_height = 600, plot_width = 600,\
           title = 'Average {} (per KG) price distrubution in {}.'.format(product, year),\
           x_axis_label = 'Price (US Dollars)',\
           y_axis_label = 'Number of countries',\
           toolbar_location = None,\
           tools="")
p.xgrid.grid_line_color = None
p.quad(source = src, bottom=0, top='counts',\
       left='left', right='right',\
       line_color='black',\
       fill_alpha = 0.75,\
       hover_fill_alpha = 1.0,\
       hover_fill_color = 'navy')
h = HoverTool(tooltips = [('Countries + local price', '@info')])
p.add_tools(h)
show(p)





In [1]:
def price_to_kg(price, unit):
    to_kg = {'KG': 1, 'Unit': 1, '500 G': 2, '400 G': 2.5, '150 G': 6.6666667, 'Pound': 2.20462262}
    return price * to_kg[unit]

In [2]:
def univariate_non_graphical(data):
    data.sort()
    mean = sum(data) / len(data)
    median = data[int(len(data) // 2) - 1]
    dif = 0
    for x in data:
        xdif = x - mean
        dif += xdif * xdif
    variance = dif / (len(data) - 1)    
    standard_deviation = math.sqrt(variance)
    print("Univariate non-graphical data for {} average {} prices (in US Dollars)".format(year, product))
    print("mean     : {}".format(round(mean, 2)))
    print("median   : {}".format(round(median, 2)))
    print("variance : {}".format(round(variance, 2)))
    print("sd       : {}".format(round(standard_deviation, 2)))
    
univariate_non_graphical(avg_prices)

NameError: name 'avg_prices' is not defined

In [3]:
def interpolate(values):
    for i in range(len(values)):
        if values[i] == None:
            ii = i
            x = None
            while x == None:
                ii -= 1
                if ii < 0: x = values[i]
                x = values[ii]
            ii = i
            y = None
            while y == None:
                ii -= 1
                if ii >= len(values): y = values[i]
                y = values[ii]
            values[i] = (x + y) / 2
    return values