In [4]:
import pandas as pd
import numpy as np
import os
from bokeh.plotting import figure
from bokeh.io import output_file, show, save
from functies_voor_selectief_plotten import *
from sklearn.metrics import mean_squared_error
from bokeh.models import HoverTool, BoxSelectTool
from products_per_country import *
from bokeh.plotting import figure, output_file, show, ColumnDataSource

In [3]:
# controleer altijd of je in de map helpfiles zit
os.chdir('../helpfiles')
print(os.getcwd())

C:\Users\boble\Documents\GitHub\DataProcessing\helpfiles


In [None]:
from select_df import select_df

df_foodprices = select_df(1, 'foodprices')
df_migration = select_df(1, 'migration_movements')
df_weather = select_df(1, 'temperature_and_precipitation')

In [15]:
# plot genormaliseerde foodprices van een product tegenover migration movements in een bepaald land
# 95% van de scatters ligt met een afwijking van (RMSE * 2) t.o.v. de lijn

def plot_scatter_regression(country, product):
    # selecteer de input datasets
    dataset_x = normalize(select_plot_foodprices_average(df_foodprices, country, product, 1990, 2020), 'average_price')
    dataset_y = normalize(select_plot_migration_movements(df_migration, country, 1990, 2020), 'Value')

    # de lijsten die worden gegenereerd
    covered_months = []
    output_x = []
    output_y = []
    
    # hovertool
    hover = HoverTool(
        tooltips=[
            ("Normalised foodprice", "$x{1.11}"),
            ("Refugees to Europe", "$y{int}"),
#             ("Month", "{}".format(covered_months[int('$y')])),
        ]
    )
    
    # maak het ontwerp van de grafiek
    f = figure(plot_width=600, plot_height=600, tools=[hover])
    f.title.text="Correlation between {} and refugees in {}".format(product, country)
    f.xaxis.axis_label = "Normalized foodprices"
    f.yaxis.axis_label = "Migration movements"

    # vergelijk de mogelijke maanden en zet ze in een lijst wanneer ze gelijk zijn
    for month_x in dataset_x.year:
        for month_y in dataset_y.Timestamp:    
            if round(month_x, 4) == round(month_y, 4):
                output_x.append(float(dataset_x.loc[dataset_x['year'] == month_x]['Normalized_data']))
                output_y.append(float(dataset_y.loc[dataset_y['Timestamp'] == month_y][0:1]['Normalized_data']))
                covered_months.append(month_x)

    # regression line    
    regression_x = np.vstack(output_x)
    regression_x = np.column_stack((regression_x, np.ones(regression_x.shape[0])))
    
    # scale
    x = pd.Series(output_x)
    a, b = np.linalg.lstsq(regression_x, output_y)[0]
    
    # RMSE
    y = a * x + b
    rmse = round(np.sqrt(mean_squared_error(output_y, y)) * 2, 2)

    
    print(country, product, rmse)
    # plot the graph
#     f.scatter(output_x, output_y, color='red', legend="RMSE = {}".format(rmse))
#     f.line(x, a * x + b, color='blue')
    
    # save
#     output_file("{}_{}.html".format(country, product))
#     save(f)
#     show(f)
    
plot_scatter_regression('Sudan', 'Wheat')

Sudan Wheat 0.06




In [8]:
os.chdir('../plotted comparisons/normalized_regression_plots')

In [9]:
print(os.getcwd())

C:\Users\boble\Documents\GitHub\DataProcessing\plotted comparisons\normalized_regression_plots


In [10]:
# Helpfile landen en producten

Africa = ['Central African Republic', 'Djibouti', 'Gambia', 'Guinea', 'Kenya', 'Lesotho', 'Liberia', 'Madagascar', 
                'Malawi', 'Mali', 'Mauritania', 'Mozambique', 'Niger', 'Nigeria', 'Rwanda', 'Senegal', 'South Sudan', 
                'Sudan', 'Uganda', 'Zambia', 'Zimbabwe']
# Kenya, Lesotho veroorzaakte problemen
Middle_East = ['Afghanistan', 'Iraq', 'Iran', 'Lebanon', 'State of Palestine', 'Pakistan']

# zie onderste cell voor alle producten

In [None]:
# plot voor alle landen alle producten in een scatter plot met een regression line

for country in Africa:
    for product in products_country(df_foodprices, country):
        try:
            plot_scatter_regression(country, product)
        except:
            print('Error: {}_{}'.format(country, product))



Central African Republic Maize 0.21
Central African Republic Rice 0.18
Central African Republic Wheat flour 0.07
Central African Republic Oil (palm) 0.26
Central African Republic Sorghum 0.06
Central African Republic Millet 0.17
Central African Republic Groundnuts (unshelled) 0.21
Central African Republic Beans (niebe) 0.16
Central African Republic Meat (beef) 0.28
Central African Republic Sesame 0.07
Central African Republic Oil (groundnut) 0.19
Central African Republic Cassava (cossette) 0.17
Djibouti Bread 0.37
Djibouti Wheat flour 0.35
Djibouti Rice (imported) 0.32
Djibouti Sorghum 0.18
Djibouti Beans (white) 0.37
Djibouti Millet 0.37
Djibouti Oil (vegetable) 0.37
Djibouti Sugar 0.37
Djibouti Pasta 0.36
Djibouti Sorghum (white) 0.4
Djibouti Sorghum (red) 0.41
Djibouti Fuel (kerosene) 0.37
Gambia Maize (local) 0.19
Gambia Sorghum 0.33
Gambia Millet 0.4
Gambia Groundnuts (unshelled) 0.39
Gambia Groundnuts (shelled) 0.37
Gambia Rice (long grain, imported) 0.27
Gambia Rice (medium grai

In [12]:
# plot alle voedselprijzen van een land in een scatterplot

def plot_scatter_regression_all(country):
    # de lijsten die worden gegenereerd
    covered_months = []
    output_x = []
    output_y = []

    # hovertool
    hover = HoverTool(
        tooltips=[
            ("Normalised foodprice", "$x{1.11}"),
            ("Refugees to Europe", "$y{int}"),
#             ("Product", "{}".format(product))
        ]
    )

    # maak het ontwerp van de grafiek
    f = figure(plot_width=600, plot_height=600, tools=[hover])
    f.title.text="Correlation between foodprices and refugees in {}".format(country)
    f.xaxis.axis_label = "Normalized foodprices"
    f.yaxis.axis_label = "Migration movements"

    for product in products_country(df_foodprices, country):
        # selecteer de input datasets
        dataset_x = normalize(select_plot_foodprices_average(df_foodprices, country, product, 1990, 2020), 'average_price')
        dataset_y = select_plot_migration_movements(df_migration, country, 1990, 2020)

        # vergelijk de mogelijke maanden en zet ze in een lijst wanneer ze gelijk zijn
        for month_x in dataset_x.year:
            for month_y in dataset_y.Timestamp:    
                if round(month_x, 4) == round(month_y, 4):
                    output_x.append(float(dataset_x.loc[dataset_x['year'] == month_x]['Normalized_data']))
                    output_y.append(float(dataset_y.loc[dataset_y['Timestamp'] == month_y][0:1]['Value']))
                    covered_months.append(month_x)

    # plot the graph
    f.scatter(output_x, output_y, color='red')

    show(f)
    
plot_scatter_regression_all('Zimbabwe')

In [217]:
# plot genormaliseerde foodprices van een product tegenover genormaliseerde migration movements in een bepaald land

def plot_scatter_regression_normalized(country, product):
    # selecteer de input datasets
    dataset_x = normalize(select_plot_foodprices_average(df_foodprices, country, product, 1990, 2020), 'average_price')
    dataset_y = normalize(select_plot_migration_movements(df_migration, country, 1990, 2020), 'Value')   

    # de lijsten die worden gegenereerd
    covered_months = []
    output_x = []
    output_y = []
    
    # maak het ontwerp van de grafiek
    f = figure(plot_width=600, plot_height=600)
    f.title.text="Correlation between {} and refugees in {}".format(product, country)
    f.xaxis.axis_label = "Normalized foodprices"
    f.yaxis.axis_label = "Normalized migration movements"

    # vergelijk de mogelijke maanden en zet ze in een lijst wanneer ze gelijk zijn
    for month_x in dataset_x.year:
        for month_y in dataset_y.Timestamp:    
            if round(month_x, 4) == round(month_y, 4):
                output_x.append(float(dataset_x.loc[dataset_x['year'] == month_x]['Normalized_data']))
                output_y.append(float(dataset_y.loc[dataset_y['Timestamp'] == month_y][0:1]['Normalized_data']))
                covered_months.append(month_x)

    # regression line    
    regression_x = np.vstack(output_x)
    regression_x = np.column_stack((regression_x, np.ones(regression_x.shape[0])))
    
    # scale
    x = pd.Series(output_x)
    a, b = np.linalg.lstsq(regression_x, output_y)[0]
    
    # plot the graph
    f.circle(output_x, output_y, color='red')
    f.line(x, a * x + b, color='blue')
    
    show(f)
    
plot_scatter_regression_normalized('Gambia', 'Rice (paddy, long grain, local)')



In [184]:
# compare_refugees_Gambia_and_price_of_Rice (paddy, long grain, local)

def plot_timeline(country, product, year1, year2):
    # scatter
    x1 = select_plot_foodprices_average(df_foodprices, country, product, year1, year2)['year']
    y1 = normalize(select_plot_foodprices_average(df_foodprices, country, product, year1, year2), 'average_price')['Normalized_data']    
    
    # regression line    
    x2 = np.vstack(select_plot_foodprices_average(df_foodprices, country, product, year1, year2)['year'])
    x2 = np.column_stack((x2, np.ones(x2.shape[0])))
    y2 = normalize(select_plot_foodprices_average(df_foodprices, country, product, year1, year2), 'average_price')['Normalized_data']
    x = select_plot_foodprices_average(df_foodprices, country, product, year1, year2)['year']
    # Now get out m and b values for our best fit line
    a, b = np.linalg.lstsq(x2, y2)[0]

    # set the layout
    f = figure(plot_width=600, plot_height=600)
    
    f.title.text="Price of {} in {} over time".format(product, country)
    f.xaxis.axis_label = "Years"
    f.yaxis.axis_label = "Normalised migration movements"

    # plot the graph
    f.circle(x1, y1, color='red')
    f.line(x, a * x + b, color='blue')

    show(f)
    
plot_timeline('Gambia', 'Rice (paddy, long grain, local)', 2005, 2018)

  
