In [None]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.offline as pyo
from scipy import signal
import scipy.cluster.hierarchy as spc
from pandas import read_excel
from ipywidgets import widgets
from ipywidgets import interactive, HBox, VBox
import plotly.io as pio
from sklearn.cluster import KMeans
from sklearn import linear_model
from sklearn.linear_model import Ridge
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn.metrics import r2_score
from plotly.graph_objs import *
from plotly.subplots import make_subplots
import plotly.express as px
import json

In [None]:
country_list = ["FR","BE","BG","CY","CZ","DE","DK","EE","ES",
                "FI","AT","GB","GR","HR","HU","IE","IT","LT",
                "LU","LV","MT","NL","PL","PT","RO","SE","SI","SK"]

country_list_iso = ["FRA","BEL","BGR","CYP","CZE","DEU","DNK",
                    "EST","ESP","FIN","AUT","GBR","GRC","HRV",
                    "HUN","IRL","ITA","LTU","LUX","LVA","MLT",
                    "NLD","POL","PRT","ROU","SWE","SVN","SVK"]


df_unemp = pd.read_csv('data_nico/Unemp_total.csv', index_col=None)
df_unemp = df_unemp[['LOCATION', 'TIME', 'Value']].pivot_table(values='Value', index='TIME', columns ='LOCATION')

df_gdp = pd.read_csv('data_vinc/PIB28.csv')

In [None]:
def change_config(fig):
    """
    This function return fig with the layout modified to make it prettier 
    """
    # params 
    # title and labels
    size_title = 18
    size_axes_labels = 12
    family = 'Andale Mono, monospace' # font 
    color = '#F2F2F2'
    
    
    # plot config
    width = 1300
    height = 650
    
    # configuration of the axes (ticks for each label on the axes)
    ticks = 'outside'
    tickwidth = 2 
    tickcolor = '#F2F2F2'
    ticklen = 10
    
    # grid color 
    grid_color_x = 'rgba(153, 163, 164,0.5)'
    grid_color_y = 'rgba(153, 163, 164,0.5)'
    
    
    # font for title and labels 
    font_title = dict(size=size_title, family=family, color=color)
    font_axes_labels =  dict(size=size_axes_labels, family=family, color=color)
    
    
    # remove background and configure size
    layout = Layout(
        paper_bgcolor='rgba(0,0,0,0)',
        plot_bgcolor='rgba(0,0,0,0)',
        font = font_axes_labels,
        titlefont = font_title,
        height=height,
        #width=width,
    )
    
    fig.update_layout(layout)
    
    # update the axes configuration
    fig.update_xaxes(
        ticks = ticks,
        tickwidth = tickwidth, 
        tickcolor = tickcolor,
        ticklen = ticklen,
        title_font = font_axes_labels,
        gridcolor = grid_color_x
    )
    
    fig.update_yaxes(
        ticks = ticks,
        tickwidth = tickwidth, 
        tickcolor = tickcolor,
        ticklen = ticklen,
        title_font = font_axes_labels, 
        gridcolor = grid_color_y,
        #gridwidth=None
    )
    
    return fig

In [None]:
#select column name
column = "Value"
df_gdp = df_gdp[["LOCATION","TIME",column]].pivot_table(values=column, index='TIME', columns ='LOCATION')#values=column, index='TIME', columns='LOCATION')
df_gdp

In [None]:
def plotMapSlider(data,title,zMax,zMin,colorbarTitle,vWidth,vHeight):
    """
    Plot data over multiple timestamps
    """
    data_slider = []
    for ix in range(0,10):
        data_one_year = dict(
                            type='choropleth',
                            locations=country_list_iso, # Spatial coordinates
                            z = list(data[country_list_iso].iloc[ix].values),
                            locationmode = 'ISO-3', # set of locations match entries in `locations`
                            autocolorscale=False,
                            colorscale = "Magma",
                            reversescale=True,
                            zmax = zMax,
                            zmin = zMin,
                            colorbar_title = "GDP (€)",
                            )
        ix = ix+1
        data_slider.append(data_one_year) 
    steps = []

    for i in range(len(data_slider)):
        step = dict(method='restyle',args=['visible', [False] * len(data_slider)],label=str(int(2004+i)))
        step['args'][1][i] = True
        steps.append(step)

    #create 'sliders' object from the 'steps' 
    sliders = [dict(active=0, pad={"t": 1}, steps=steps)] 

    layout = dict(geo=dict(scope='europe', bgcolor='rgba(0,0,0,0)'),sliders=sliders,title={
        'text': "Evolution of the GDP over the years",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},autosize=True,dragmode = False, height=1000, paper_bgcolor='rgba(0,0,0,0)',
        plot_bgcolor='rgba(0,0,0,0)')
    fig = dict(data=data_slider, layout=layout,) 
    #UNCOMMENT TO SAVE
    l# = pio.from_json(json.dumps(fig))
    #pio.write_html(fig, '../DataStoryMeatConsumption/graphs/economy/map_gdp.html')
    pio.show(fig)

In [None]:
#plot map with the value of gdp per country
plotMapSlider(df_gdp, 'GDP over the years', 50000, 0, 'oij', 1300, 900)

# PIB / CONSO VIANDE

In [None]:
df_meat = pd.read_csv('data_vinc/meat_consumption_per_habita_2004_2013.csv',  index_col=0)

#some countries are not available in this dataset so we puse the data only for 
country_list2 = ['AUT', 'BEL', 'BGR', 'HRV', 'DNK', 'EST',
        'FIN', 'FRA', 'DEU', 'GRC', 'HUN', 'IRL', 'ITA', 
        'LVA', 'LTU', 'LUX', 'MLT', 'NLD', 'POL','POR', 
        'ROU', 'SVK', 'SVN', 'ESP', 'SWE','GBR']
#select column name
column = "consumption per habita"
df_meat = df_meat[["Area","years",column]].pivot_table(values=column, index='years', columns ='Area')#values=column, index='TIME', columns='LOCATION')
df_meat.columns = country_list2

In [None]:
df_meat

In [None]:
def intersection(lst1, lst2): 
    lst3 = [value for value in lst1 if value in lst2] 
    return lst3 

#countries both regroups the countries that are present in the GDP dataset and the meat consumption dataset
countries_both = intersection(df_meat.columns, df_gdp.columns)
print("data meat and GDP available for:")
print(countries_both)

In [None]:
#Sort the columns by their name to have the same order on each dataset
df_meat = df_meat[countries_both].reindex(sorted(df_meat[countries_both].columns), axis=1)
df_gdp = df_gdp[countries_both].reindex(sorted(df_gdp[countries_both].columns), axis=1)

In [None]:
def plot_kmeans_slider():
    degree = 2

    colors_kmeans = ['#FF4464', '#2B2B2B', '#F2F2F2']
    color_centroid = '#0d98ba'
    color=[f'rgb({np.random.randint(0,256)}, {np.random.randint(0,256)}, {np.random.randint(0,256)})' for _ in range(25)]

    k_means_groups = []

    # Create figure
    fig = go.Figure()
    fig_lin = go.Figure()

    for i in range(10):
        visible = (i==0)
        #regression on data of one year
        model = make_pipeline(PolynomialFeatures(degree), linear_model.LinearRegression())
        y=df_meat[countries_both].iloc[i].to_numpy()[:, np.newaxis]
        x=df_gdp[countries_both].iloc[i].to_numpy()[:, np.newaxis]
        model.fit(x, y)
        t = np.linspace(0, 100000, 1000)
        t = t[:, np.newaxis]
        yt = model.predict(t)
        #fig.add_trace(go.Scatter(x = t[:,0],y = yt[:,0], name='Interpolation', marker_color = 'red', opacity=0.,line=dict(color='firebrick', width=3, dash='dot'), visible=visible))

        #kmean
        data_kmean = []
        for c in countries_both:
            data_kmean.append([df_gdp[c].iloc[i], df_meat[c].iloc[i]])
        data_kmean = np.array(data_kmean)
        k_means = KMeans(n_clusters=3, random_state=39847).fit(data_kmean)#81982 #123 #39847
        groups = k_means.predict(data_kmean)
        k_means_groups.append(groups)

        #one scatter per country
        for j, c in enumerate(countries_both):
            fig.add_trace(go.Scatter(y=[df_meat[c].iloc[i]], x=[df_gdp[c].iloc[i]], mode='markers', 
                marker=dict(
                    size=16,
                    color=colors_kmeans[groups[j]],#[str(i) for i in range(df_gdp.iloc[0].size)]
                    line=dict(
                    width=2,
                    color='purple')),
                text=c, name=c, visible=visible))

        fig.add_trace(go.Scatter(x = k_means.cluster_centers_[:,0],y = k_means.cluster_centers_[:,1],
                                 mode = 'markers', name='K means centroïds',
                                 marker_color = color_centroid,
                                 opacity=1, visible=visible,
                                 marker=dict(size=10, symbol = 'square', line=dict(width=1, color='blue'))))


    fig.update_xaxes(range=[8000, 100000])
    fig.update_yaxes(range=[30, 120])

    # Make 10th trace visible
    fig.data[0].visible = True

    # Create and add slider
    n = 26
    steps = []
    for i in range(0, len(fig.data), n):
        step = dict(
            method="restyle",
            args=["visible", [False] * len(fig.data)],
            label=str(int(2004+i/n))
        )
        for j in range(n):
            step["args"][1][i+j] = True  # Toggle i'th trace to "visible"
        steps.append(step)





    sliders = [dict(
        active=10,
        currentvalue={"prefix": "year: "},
        pad={"t": 50},
        steps=steps,
        name = "date"
    )]

    fig.update_layout(
        #visible=[False for i in range(270)],
        autosize=True,
        sliders=sliders,
        title={
            'text': "Meat consumption depending on the salary",
            'y':0.9,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'},
        yaxis_title="Yearly meat consumption per capita",

    )
    
    fig = change_config(fig)
    return fig
    
plot_kmeans_slider().show()

In [None]:
#pio.write_html(fig, file='../DataStoryMeatConsumption/graphs/economy/meat_salary.html', auto_open=True)

### mean of the consumption of k means groups

In [None]:
#kmeans mean
means_g1 = []
means_g2 = []
g1s = []
g2s = []

#index 0 5 and -1 correspond to the data of years 2004, 2009, 2013:
for y in [0, 5, -1]:
    group1 = []
    group2 = []
    for i in range(len(k_means_groups[y])):
        if k_means_groups[y][i] == 0:
            group1.append(countries_both[i])
        if k_means_groups[y][i] == 1:
            group2.append(countries_both[i])
    
    g1s.append(group1)
    g2s.append(group2)
    means_g1.append(df_meat.iloc[y][group1].mean())
    means_g2.append(df_meat.iloc[y][group2].mean())
            
print("means 2004, 2009, 2013 group poor:", means_g1)
print("means 2004, 2009, 2013 group rich:", means_g2)

In [None]:

fkmg = make_subplots(rows=1, cols=1,
                    specs=[[{"secondary_y": True}]])
    
fkmg.add_trace(go.Scatter(y = df_meat[g1s[0]].mean(axis=1), x=[2004+i for i in range(10)], mode='lines', name='mean meat consumption', line=dict(color='black', width=5, dash='dashdot')))

fkmg.add_trace(go.Scatter(y = df_unemp[['EST', 'HUN', 'LVA', 'LTU', 'POL', 'SVK', 'SVN']].mean(axis=1)[6:16], x=[2004+i for i in range(10)], mode='lines', name='unemployment', line=dict(color='red', width=5, dash='dot')), secondary_y=True)

fkmg.update_yaxes(title_text="<b>meat cons", secondary_y=False)
fkmg.update_yaxes(title_text="<b>unemp", secondary_y=True)
fkmg.update_layout(title="group poor")

fkmg.show()
    

In [None]:
fkmg2 = go.Figure()

fkmg2 = make_subplots(rows=1, cols=1,
                    specs=[[{"secondary_y": True}]])
#for i in range(len(g2s[0])):
#    fkmg2.add_trace(go.Scatter(y=df_meat[g2s[0][i]], x=[2004+i for i in range(10)], mode='lines', name=g2s[0][i]))
    
fkmg2.add_trace(go.Scatter(y = df_meat[g2s[0]].mean(axis=1), x=[2004+i for i in range(10)], mode='lines', name='mean meat cons', line=dict(color='black', width=5, dash='dashdot')))
  
fkmg2.add_trace(go.Scatter(y = df_unemp[g2s[0]].mean(axis=1)[6:16], x=[2004+i for i in range(10)], mode='lines', name='unemp', line=dict(color='red', width=5, dash='dot')), secondary_y=True)
    
fkmg2.update_yaxes(title_text="<b>meat cons", secondary_y=False)
fkmg2.update_yaxes(title_text="<b>unemp", secondary_y=True)
fkmg2.update_layout(title="group rich")
    
fkmg2.show()
    

### interpolation of the meat consumption over the GDP

In [None]:
#this is the same list as countries both but without the Luxembourg because we can consider this country as an outlier
#due to the big GDP
countries_both_nLUX = ['AUT', 'BEL', 'BGR', 'HRV', 'DNK', 'EST', 'FIN', 'FRA', 'DEU', 'GRC', 'HUN', 'IRL', 'ITA', 'LVA', 'LTU', 'MLT', 'NLD', 'POL', 'ROU', 'SVK', 'SVN', 'ESP', 'SWE', 'GBR']

In [None]:
#model 1 to 4 makes an interpolation of degrees 1 to 4
model1 = make_pipeline(PolynomialFeatures(1), linear_model.LinearRegression())
model2 = make_pipeline(PolynomialFeatures(2), linear_model.LinearRegression())
model3 = make_pipeline(PolynomialFeatures(3), linear_model.LinearRegression())
model4 = make_pipeline(PolynomialFeatures(4), linear_model.LinearRegression())

#mix the data over all year so it becomes intemporal:
y=df_meat[countries_both_nLUX].iloc[0].to_numpy()#[:, np.newaxis]
x=df_gdp[countries_both_nLUX].iloc[0].to_numpy()#[:, np.newaxis]
for i in range(1, 10):
    y = np.concatenate((y, df_meat[countries_both_nLUX].iloc[i].to_numpy()))
    x = np.concatenate((x, df_gdp[countries_both_nLUX].iloc[i].to_numpy()))
y_=y[:, np.newaxis]
x_=x[:, np.newaxis]

#do the regression
model1.fit(x_, y_)
model2.fit(x_, y_)
model3.fit(x_, y_)
model4.fit(x_, y_)

In [None]:
#make the prediction to plot nice curves
t = np.linspace(0, 100000, 1000)
t = t[:, np.newaxis]
yt1 = model1.predict(t)
yt2 = model2.predict(t)
yt3 = model3.predict(t)
yt4 = model4.predict(t)

In [None]:
#score the best 2 models
y_m1 = model1.predict(x_)[:, 0]
y_m3 = model3.predict(x_)[:, 0]
m1_r2 = r2_score(y_[:,0], y_m1)
m3_r2 = r2_score(y_[:,0], y_m3)
print('score model 1: ', m1_r2)
print('score model 3: ', m3_r2)

In [None]:
#print weights of linear model
linreg1 = model1.named_steps['linearregression']
print("model 1 weights:", linreg1.intercept_[0], ', ', linreg1.coef_[0, 1])

In [None]:
#print the weights of model of degree 3 
linreg3 = model3.named_steps['linearregression']
print("model 3 weights:", linreg3.intercept_[0], ', ', linreg3.coef_[0, 1],', ', linreg3.coef_[0, 2],', ', linreg3.coef_[0, 3])

In [None]:
#print the result
#2 and 4 are commentend because they do not represent well the data in our opinion

countries_both_nLUX_all = []
for i in range(10):
    countries_both_nLUX_all+=countries_both_nLUX
fig3 = go.Figure()
fig3.add_trace(go.Scatter(y=y, x=x, mode='markers', 
            marker=dict(
                opacity=0.5,
                size=16,
                color='#FF4464',
                line=dict(
                width=1,
                color='DarkSlateGrey')), name='Countries', text=countries_both_nLUX_all),)

#fig3.add_trace(go.Scatter(x = t[:,0],y = yt2[:,0], name='Interpolation with d=2', marker_color = 'red', opacity=1,line=dict(color='firebrick', width=3, dash='dash')))
fig3.add_trace(go.Scatter(x = t[:,0],y = yt1[:,0], name='Interpolation with d=1', marker_color = 'red', opacity=1,line=dict(color='LightGrey', width=3, dash='dashdot'), visible=True))
fig3.add_trace(go.Scatter(x = t[:,0],y = yt3[:,0], name='Interpolation with d=3', marker_color = 'red', opacity=1,line=dict(color='#2980b9 ', width=3, dash='longdashdot')))
#fig3.add_trace(go.Scatter(x = t[:,0],y = yt4[:,0], name='Interpolation with d=4', marker_color = 'red', opacity=1,line=dict(color='dodgerblue', width=3, dash='solid')))
fig3.update_xaxes(range=[8000, 60000])
fig3.update_yaxes(range=[30, 120])
fig3.update_layout(title={
        'text': "Meat consumption depending on the salary from 2004 to 2013",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})

change_config(fig3).show()

In [None]:
#pio.write_html(fig3, file='../DataStoryMeatConsumption/graphs/economy/interpolation.html', auto_open=True)

### map

In [None]:
def plotMapSlider(title,zMax,zMin,colorbarTitle,vWidth,vHeight):
    """
    Plot map with temporal data
    """
    data_slider = []
    for ix in range(0,10):
        visible = (ix==0)
        data_one_year = dict(
                            type='choropleth',
                            locations=countries_both, # Spatial coordinates
                            z = list(k_means_groups[ix]),
                            locationmode = 'ISO-3', # set of locations match entries in `locations`
                            autocolorscale=False,
                            colorscale = "YlGnBu",
                            zmax = zMax,
                            zmin = zMin,
                            visible=visible
                            )
        ix = ix+1
        data_slider.append(data_one_year) 
    steps = []

    for i in range(len(data_slider)):
        step = dict(method='restyle',args=['visible', [False] * len(data_slider)],label=(str(int(2004+i))))
        step['args'][1][i] = True
        steps.append(step)

    #create 'sliders' object from the 'steps' 
    sliders = [dict(active=0, pad={"t": 1}, steps=steps)] 

    layout = dict(geo=dict(scope='europe', bgcolor='rgba(0,0,0,0)'),sliders=sliders,autosize=False, width=vWidth,height=vHeight,dragmode = False,title={
        'text': "Evolution of the 3 clusters over the years",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},paper_bgcolor='rgba(0,0,0,0)',
        plot_bgcolor='rgba(0,0,0,0)')
    fig = dict(data=data_slider, layout=layout,) 
    pio.show(fig)
    #l = pio.from_json(json.dumps(fig))
    pio.write_html(fig, '../DataStoryMeatConsumption/graphs/economy/map_groups.html')

In [None]:
#plot map with the value of the group assigned with the k means over the years
plotMapSlider("Eurobarometer",2,0," population thinks about it's one of the most important threat",700,700)