In [None]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.offline as pyo
from scipy import signal
import scipy.cluster.hierarchy as spc
from pandas import read_excel
from ipywidgets import widgets
from ipywidgets import interactive, HBox, VBox
import plotly.io as pio

In [None]:
country_list_iso = ["FRA","BEL","BGR","CYP","CZE","DEU","DNK","EST","ESP","FIN","AUT","GBR","GRC","HRV","HUN","IRL","ITA","LTU","LUX","LVA","MLT","NLD","POL","PRT","ROU","SWE","SVN","SVK"]

PATH_Q = "data_nico_quarterly/"
PATH_H = "data_nico_household/"
names_q = [name for name in os.listdir(PATH_Q) if ".csv" in name]
names_h = [name for name in os.listdir(PATH_H) if ".csv" in name]

In [None]:
def proc_cols(df, value_name):
    df = df.rename(columns={"Value": value_name})
    return df.drop(['INDICATOR', 'SUBJECT','MEASURE','FREQUENCY','Flag Codes'], axis=1)

def generate_df(csv_names,path):
    df = None
    str_end = len(".csv")
    for name in csv_names:
        if df is None:
            df = proc_cols(pd.read_csv(path+name),name[:-str_end])
        else:
            new_df = proc_cols(pd.read_csv(path+name),name[:-str_end])
            df = pd.merge(df, new_df,  how='outer', left_on=['LOCATION','TIME'], right_on = ['LOCATION','TIME'])
    return df

def df_date_format(df):
    return df.replace({'-Q1':'-01-01','-Q2':'-04-01','-Q3':'-07-01','-Q4':'-10-01'}, regex=True)

#adds date format and only starting from 2004
def df_date_format_yearly(df):
    df = df[df["TIME"] >= 2004].copy() #copy for warning
    df["TIME"] = df["TIME"].astype(str) + "-01-01"
    return df

### quarterly unemployment and GDP growth

In [None]:
#https://data.oecd.org/gdp/gross-domestic-product-gdp.htm
#https://data.oecd.org/unemp/harmonised-unemployment-rate-hur.htm#indicator-chart
df = generate_df(names_q,PATH_Q)

In [None]:
df.head()

In [None]:
df_hur = df_date_format(df[['LOCATION','TIME','HUR']])
df_gdp = df_date_format(df[['LOCATION','TIME','GDP_growth']])

In [None]:
len(df_gdp['LOCATION'].unique())

In [None]:
df_hur = df_hur.pivot_table(values='HUR', index='TIME', columns='LOCATION')
df_gdp = df_gdp.pivot_table(values='GDP_growth', index='TIME', columns='LOCATION')

In [None]:
df_hur.head()

In [None]:
def plot_lines(df,value_smooth):
    fig = go.Figure()
    for x in df.columns:
        if value_smooth == 0:
            fig.add_trace(go.Scatter(x=df.index, y=df[x], mode='lines',name=x))
        else:
            fig.add_trace(go.Scatter(x=df.index, y=signal.savgol_filter(df[x],value_smooth, 3), mode='lines',name=x))
    return fig

In [None]:
len(df_hur.iloc[1])

In [None]:
plot_lines(df_hur,0).show()
plot_lines(df_gdp,0).show()

In [None]:
df_corr_hur = df_hur.corr(method ='spearman')
df_corr_gdp = df_gdp.corr(method ='spearman')

In [None]:
df_corr_hur.shape

In [None]:
def getIdx(corr,variableTweak):
    pdist = spc.distance.pdist(corr)
    linkage = spc.linkage(pdist, method='complete')
    idx = spc.fcluster(linkage, variableTweak * pdist.max(), 'distance')
    return idx

def getGroup(number,idx):
    group = []
    for i in range(0,len(idx)):
        x = idx[i]
        if x == number:
            group.append(i)
    return group


def plotDifferentGroup(idx,dfPlot,indicePlot,val):
    for i in range(1,max(idx)+1):
        df1 = dfPlot[dfPlot.columns[getGroup(i,idx)]]
        plot_lines(df1,indicePlot).update_layout(title_text = 'Visualization of {} for group '.format(val)+str(i),).show()

#values to plot 4 groups
idx = getIdx(df_corr_hur,0.65)
# idx = getIdx(df_corr_gdp,0.8)

In [None]:
plotDifferentGroup(idx,df_hur,0,"Unemployment")
# plotDifferentGroup(idx,df_gdp,0,"Gdp growth")

In [None]:
def plotMapStatic(idx, val):
    fig = go.Figure(data=go.Choropleth(
        locations=country_list_iso, # Spatial coordinates
        z = idx, # Data to be color-coded
        locationmode = 'ISO-3', # set of locations match entries in `locations`
        #colorscale=[            [0, "rgb(12,140,113)"],[0.5, "rgb(12,140,113)"],[0.5, "rgb(26,188,156)"],[1, "rgb(26,188,156)"]],
        colorscale="Blugrn",
        colorbar_title = "Millions USD",
        showscale = False,
    ))

    fig.update_layout(
        title_text = 'Grouping countries according to similarity in {}'.format(val),
        geo_scope='europe', # limite map scope to USA
        autosize=False,
        width=800,
        height=800,
        dragmode = False,
    )
    fig.show()

idx2=idx.copy()  
plotMapStatic(idx2,'unemployment')
# plotMapStatic(idx2,'GDP growth')

### yearly household information

In [None]:
#https://data.oecd.org/hha/household-disposable-income.htm#indicator-chart
df = generate_df(names_h,PATH_H)

In [None]:
df = df_date_format_yearly(df)

In [None]:
df

# ICI TINMAR

In [None]:
#split europe:

In [None]:
df2 = pd.read_csv('PIB28.csv')
df2['LOCATION'].unique().size

In [None]:
#select column name
column = "Value"
df_debt = df2[["LOCATION","TIME",column]].pivot_table(values=column, index='TIME', columns ='LOCATION')#values=column, index='TIME', columns='LOCATION')

In [None]:
df2
#df_debt['LOCATIONS'].unique()#[df_debt['LOCATION']=='ESP']

In [None]:
from sklearn.cluster import KMeans

In [None]:
def plotMapStatic(idx, country_list, title):
    fig = go.Figure(data=go.Choropleth(
        locations=country_list, # Spatial coordinates
        z = idx, # Data to be color-coded
        locationmode = 'ISO-3', # set of locations match entries in `locations`
        #colorscale=[            [0, "rgb(12,140,113)"],[0.5, "rgb(12,140,113)"],[0.5, "rgb(26,188,156)"],[1, "rgb(26,188,156)"]],
        colorscale="Blugrn",
        colorbar_title = "Millions USD",
        showscale = False,
    ))

    fig.update_layout(
        title_text = title,
        geo_scope='europe', # limite map scope to USA
        autosize=False,
        width=800,
        height=800,
        dragmode = False,
    )
    fig.show()

def plot_k_mean_temporal_data(df, title, n_clusters=3):
    country_list = df.columns.values
    k_means = KMeans(n_clusters=n_clusters, random_state=0).fit(df.transpose().values)
    groups = k_means.predict(df.transpose().values)
    #print(groups)
    #print(len(groups))
    #print(len(country_list))
    plotMapStatic(groups, country_list, title)
    return groups

In [None]:
df_debt.columns.values#.size

In [None]:
plot_k_mean_temporal_data(df_debt, 'lol',3)

In [None]:
#select column name
column = "House_net_worth"
df_debt = df[["LOCATION","TIME",column]].pivot_table(values=column, index='TIME', columns='LOCATION')

In [None]:
plot_lines(df_debt,0).show()

In [None]:
df_corr_debt = df_debt.corr(method ='spearman')

In [None]:
df_corr_debt = df_debt.corr(method ='spearman')
idx = getIdx(df_corr_debt,0.8)
plotDifferentGroup(idx,df_debt,0,'household debt')

In [None]:
idx2=idx.copy()  
plotMapStatic(idx2,'household debt')