In [None]:
from pytrends.request import TrendReq
import matplotlib.pyplot as plt
import pandas as pd
import plotly.graph_objects as go
import plotly.offline as pyo
from scipy import signal
import scipy.cluster.hierarchy as spc
from pandas import read_excel
from ipywidgets import widgets
from ipywidgets import interactive, HBox, VBox
import plotly.io as pio
import seaborn as sns
import numpy as np
from sklearn.cluster import KMeans

### Loading pytrend data

In [None]:
pytrends = TrendReq(hl='en-US', tz=360)

#list topics 
kw_list = []

kw_list.append(["/m/032nch"]) #cruauté envers les animaux, Sujet
kw_list.append(["l214"])
kw_list.append(["/m/047v0jr"]) #abattage
kw_list.append(["/m/07628"]) #spécisme
kw_list.append(["/m/07_lq"]) #cuisine vegetarienne
kw_list.append(["/m/07_jd"]) #végétarisme
kw_list.append(["/m/07_hy"]) #véganisme
kw_list.append(["/m/020953"]) #substitut de viande

#list country https://abbreviations.yourdictionary.com/articles/abbreviations-european-union.html
country_list = ["FR","BE","BG","CY","CZ","DE","DK","EE","ES","FI","AT","GB","GR","HR","HU","IE","IT","LT","LU","LV","MT","NL","PL","PT","RO","SE","SI","SK"]
country_list_iso = ["FRA","BEL","BGR","CYP","CZE","DEU","DNK","EST","ESP","FIN","AUT","GBR","GRC","HRV","HUN","IRL","ITA","LTU","LUX","LVA","MLT","NLD","POL","PRT","ROU","SWE","SVN","SVK"]



In [None]:
for x in range(len(kw_list)):
    
    #adding just the first one
    pytrends.build_payload(kw_list[x], cat=0, timeframe='all', geo=country_list[0], gprop='')
    pytrends.interest_over_time()
    newDataframe = pytrends.interest_over_time()[kw_list[x]]
    listColumn = []
    listColumn.append(country_list[0])
        
    for y in range(1, len(country_list)): 
        pytrends.build_payload(kw_list[x], cat=0, timeframe='all', geo=country_list[y], gprop='')
        ledata = pytrends.interest_over_time()
        if not ledata.empty:
            ledata.drop(ledata.columns[len(ledata.columns)-1], axis=1, inplace=True)
            newDataframe = pd.concat([newDataframe, ledata], axis=1, join='inner')
            listColumn.append(country_list[y])

    newDataframe.columns = listColumn
    newDataframe = newDataframe[19:] #remove first strange Data
    newDataframe.to_pickle("./data_vinc/GoogleTrend"+str(x)+".pkl") 



In [None]:
df_animal_cruelty = pd.read_pickle("./data_vinc/GoogleTrend0.pkl")
df_l214 = pd.read_pickle("./data_vinc/GoogleTrend1.pkl")
df_abattage = pd.read_pickle("./data_vinc/GoogleTrend2.pkl")
df_specism = pd.read_pickle("./data_vinc/GoogleTrend3.pkl")
df_vegan_cook = pd.read_pickle("./data_vinc/GoogleTrend4.pkl")
df_vegetarism = pd.read_pickle("./data_vinc/GoogleTrend5.pkl")
df_veganism = pd.read_pickle("./data_vinc/GoogleTrend6.pkl")
df_meat_substitute = pd.read_pickle("./data_vinc/GoogleTrend7.pkl")

### Plot raw data

In [None]:
def plot_lines(df,value_smooth):
    fig = go.Figure()
    for x in df.columns:
        if value_smooth == 0:
            fig.add_trace(go.Scatter(x=df.index, y=df[x], mode='lines',name=x))
        else:
            fig.add_trace(go.Scatter(x=df.index, y=signal.savgol_filter(df[x],value_smooth, 3), mode='lines',name=x))
    return fig

In [None]:
event_dates = [['2014-03-15', 'tortured kitty in marseille'],
               ['2016-03-15', 'l214 slaughter video of lambs'],
               ['2016-03-15', 'l214 slaughter video of lambs'],
               ['2018-11-03', 'l214 slaughter video, slaughtering building closed'],
               ['2015-12-01', 'dog abused in florida, covered by es media']
               
              ]

In [None]:
fig_animal_cruelty = plot_lines(df_animal_cruelty,7)
fig_l214 = plot_lines(df_l214[['FR']],7)
fig_abattage = plot_lines(df_abattage,7)
fig_specism = plot_lines(df_specism,7)
fig_vegan_cook = plot_lines(df_vegan_cook,7)
fig_vegetarism = plot_lines(df_vegetarism,7)
fig_veganism = plot_lines(df_veganism,7)
fig_meat_substitute = plot_lines(df_meat_substitute,7)

In [None]:
import plotly.io as pio
pio.write_html(fig_veganism, file='couilles.html', auto_open=True)

In [None]:
fig_animal_cruelty.update_layout(title_text='Animal Cruelty interest over years', title_xanchor='left').show()
fig_l214.update_layout(title_text='L214 interest over years', title_xanchor='left').show()
fig_abattage.update_layout(title_text='Slaughter interest over years', title_xanchor='left').show()
fig_specism.update_layout(title_text='Specism interest over years', title_xanchor='left').show()
fig_vegan_cook.update_layout(title_text='Vegan cook interest over years', title_xanchor='left').show()
fig_vegetarism.update_layout(title_text='Vegetarism interest over years', title_xanchor='left').show()
fig_veganism.update_layout(title_text='Veganism interest over years', title_xanchor='left').show()
fig_meat_substitute.update_layout(title_text='Meat substitute interest over years', title_xanchor='left').show()

### Hierarchical clustering

In [None]:
plot_lines(df_veganism, 0)

In [None]:
#cluster countries based on correlation measure
def getIdx(corr,variableTweak):
    pdist = spc.distance.pdist(corr)
    linkage = spc.linkage(pdist, method='complete')
    idx = spc.fcluster(linkage, variableTweak * pdist.max(), 'distance')
    return idx

def getGroupe(number,idx):
    groupe = []
    for i in range(0,len(idx)):
        x = idx[i]
        if x == number:
            groupe.append(i)
    return groupe

def plotDifferentGroup(idx,dfPlot,indicePlot):
    for i in range(1,max(idx)+1):
        df1 = dfPlot[dfPlot.columns[getGroupe(i,idx)]]
        plot_lines(df1,indicePlot).show()
        

In [None]:
def plotMapStatic(idx, title):
    fig = go.Figure(data=go.Choropleth(
        locations=country_list_iso, # Spatial coordinates
        z = idx, # Data to be color-coded
        locationmode = 'ISO-3', # set of locations match entries in `locations`
        colorscale="Blugrn",
        colorbar_title = "Millions USD",
        showscale = False,
    ))

    fig.update_layout(
        title_text = title,
        geo_scope='europe', # limite map scope to USA
        autosize=False,
        width=800,
        height=800,
        dragmode=False
    )
    #fig.show()
    return fig
    
def plotByGroup(df, threshold=0.5, method='hierarchical', n_clusters=3):
    df_corr= df.corr(method='kendall')
    idx = None
    if method== 'hierarchical':
        idx = getIdx(df_corr, threshold)
    else:
        idx=getKMeansGroups(df, n_clusters)+1
    plotDifferentGroup(idx, df, 11)
    return plotMapStatic(idx, 'Clustering by interest on subject')
    
def getKMeansGroups(df, n_clusters=3):
    k_means = KMeans(n_clusters=n_clusters, random_state=0).fit(df.transpose().values)
    return k_means.predict(df.transpose().values)

In [None]:
f = plotByGroup(df_veganism, method='kmeans')

In [None]:
import plotly.io as pio
pio.write_html(f, file='couille.html', auto_open=True)

In [None]:
plotByGroup(df_vegan_cook, n_clusters=2, method='kmeans')

In [None]:
plotByGroup(df_animal_cruelty, n_clusters=2, method='kmeans')

In [None]:
plotByGroup(df_general, n_clusters=2, method='kmeans')

# Topics interest over years

In [None]:
def create_slider_from_df(df):
    ### create empty list for data object:    
    data_slider = []

    for ix in range(df.shape[0]):
        ### create the dictionary with the data for the current year
        data_one_year = dict(
                            type='choropleth',
                            locations=country_list_iso, # Spatial coordinates
                            z = df.iloc[ix], # Data to be color-coded
                            locationmode = 'ISO-3', # set of locations match entries in `locations`
                            autocolorscale=False,
                            colorscale = "Blugrn",
                            zmax = 100,
                            zmin = 0,
                            colorbar_title = "Interest",
                            )
        #ix = ix+1
        data_slider.append(data_one_year)  # I add the dictionary to the list of dictionaries for the slider
    return data_slider

def plot_interest(df, title):
    data_slider = create_slider_from_df(df)
    steps = []
    for i in range(df.shape[0]):
        step = dict(method='restyle',
                    args=['visible', [False] * len(data_slider)],
                    label=df.index[i].strftime("%m/%d/%Y")) # label to be displayed for each step (year)
        step['args'][1][i] = True
        steps.append(step)
    ##  I create the 'sliders' object from the 'steps' 
    sliders = [dict(active=0, pad={"t": 1}, steps=steps)] 
    layout = dict(geo=dict(scope='europe',),sliders=sliders,title_text = title, autosize=False, width=800,height=800,dragmode=False)
    # I create the figure object:
    fig = dict(data=data_slider, layout=layout,) 
    pio.show(fig)
    
def getGroup(number,idx):
    group = []
    for i in range(0,len(idx)):
        x = idx[i]
        if x == number:
            group.append(i)
    return group
    
def plotDifferentGroup(idx,dfPlot,indicePlot):
    for i in range(1,maxGroupe(idx)+1):
        df1 = dfPlot[dfPlot.columns[getGroupe(i,idx)]]
        plot_lines(df1,indicePlot).update_layout(title_text = 'Visualization Google Trend "Animal Cruelty" and "Veganism" for group '+str(i2),).show()

In [None]:
plot_interest(df_vegetarism, 'Evolution of interest over topic vegetarism between 2010 - 2018')

In [None]:
plot_interest(df_veganism)

In [None]:
plot_interest(df_meat_substitute)

### k-means

In [None]:
def plotMapStatic(idx, title):
    fig = go.Figure(data=go.Choropleth(
        locations=country_list_iso, # Spatial coordinates
        z = idx, # Data to be color-coded
        locationmode = 'ISO-3', # set of locations match entries in `locations`
        #colorscale=[            [0, "rgb(12,140,113)"],[0.5, "rgb(12,140,113)"],[0.5, "rgb(26,188,156)"],[1, "rgb(26,188,156)"]],
        colorscale="Blugrn",
        colorbar_title = "Millions USD",
        showscale = False,
    ))

    fig.update_layout(
        title_text = title,
        geo_scope='europe', # limite map scope to USA
        autosize=False,
        width=800,
        height=800,
        dragmode = False,
    )
    fig.show()
    
def plot_k_mean_temporal_data(df, title, n_clusters=3):
    k_means = KMeans(n_clusters=n_clusters, random_state=0).fit(df.transpose().values)
    groups = k_means.predict(df.transpose().values)
    print(len(groups))
    plotMapStatic(groups, title)
    return groups

def plotDifferentGroup(idx,dfPlot,indicePlot):
    for i in range(1,maxGroupe(idx)+1):
        df1 = dfPlot[dfPlot.columns[getGroupe(i,idx)]]
        plot_lines(df1,indicePlot).update_layout(title_text = 'Visualization Google Trend "Animal Cruelty" and "Veganism" for group '+str(i2),).show()

In [None]:
plot_k_mean_temporal_data(df_vegetarism, 'vegetarism')

In [None]:
plot_k_mean_temporal_data(df_meat_substitute, 3)

In [None]:
plot_k_mean_temporal_data(df_vegetarism+df_veganism, 5)

In [None]:
pd.read_json('data_vinc/JO1301K1.json')

In [None]:
df_viande_swe = pd.read_csv('data_vinc/swedend_meat_cons.csv')

In [None]:
plt.figure(figsize=(20,20))
plt.plot(df_viande_swe[df_viande_swe['type']=='boeuf'].drop('type', axis=1).iloc[0])