In [1]:
import plotly
import plotly.plotly as py
import plotly.graph_objs as go

import findspark
findspark.init()

import pyspark
from pyspark.sql import *
import pyspark.sql.functions as func
from pyspark.sql.types import *

import pandas as pd
import numpy as np

In [2]:
# Create spark session
spark = SparkSession.builder.getOrCreate()
sc = spark.sparkContext

In [3]:
category_metrics = pd.read_csv('../docs/assets/data/category_metrics.csv')
category_metrics

Unnamed: 0,Group,negativity,positivity,agreement,vulgarity
0,Sports,0.089352,0.140203,0.899411,0.084548
1,Video Games,0.079255,0.149531,0.856568,0.068149
2,Music,0.071611,0.159956,0.9153,0.104271
3,Politics,0.10301,0.112316,0.843408,0.129858


In [4]:
category_metrics.Group.values

array(['Sports', 'Video Games', 'Music', 'Politics'], dtype=object)

In [5]:
layout = go.Layout(
    title="Negativity of general topics",
    xaxis=dict(
        tickfont=dict(
            size=14,
            color='rgb(107, 107, 107)'
        )
    ),
    yaxis=dict(
        title='Negativity',
        titlefont=dict(
            size=16,
            color='rgb(107, 107, 107)'
        ),
        tickfont=dict(
            size=14,
            color='rgb(107, 107, 107)'
        )
    ),
    legend=dict(
        x=0,
        y=1.0,
        bgcolor='rgba(255, 255, 255, 0)',
        bordercolor='rgba(255, 255, 255, 0)'
    )
)

negativity_data = [go.Bar(
                x=category_metrics.Group.values,
                y=category_metrics.negativity.values,
                marker=dict(
                    color=['rgba(204,204,204,1)', 'rgba(222,45,38,0.8)',
                           'rgba(204,204,204,1)', 'rgba(204,204,204,1)']))]

cat_neg_fig = go.Figure(data=negativity_data, layout=layout)

plotly.offline.plot(cat_neg_fig, auto_open=False, filename='category_negativity')


Your filename `category_negativity` didn't end with .html. Adding .html to the end of your file.



'file:///home/valentin/Documents/EPFL/Master/Semestre1/ADA/Reddit-opinions-polarization/plots/category_negativity.html'

In [6]:
layout = go.Layout(
    title="Vulgarity of general topics",
    xaxis=dict(
        tickfont=dict(
            size=14,
            color='rgb(107, 107, 107)'
        )
    ),
    yaxis=dict(
        title='Vulgarity',
        titlefont=dict(
            size=16,
            color='rgb(107, 107, 107)'
        ),
        tickfont=dict(
            size=14,
            color='rgb(107, 107, 107)'
        )
    ),
    legend=dict(
        x=0,
        y=1.0,
        bgcolor='rgba(255, 255, 255, 0)',
        bordercolor='rgba(255, 255, 255, 0)'
    )
)

vulgarity_data = [go.Bar(
                x=category_metrics.Group.values,
                y=category_metrics.vulgarity.values,
                marker=dict(
                    color=['rgba(204,204,204,1)', 'rgba(222,45,38,0.8)',
                           'rgba(204,204,204,1)', 'rgba(204,204,204,1)']))]

cat_vulg_fig = go.Figure(data=vulgarity_data, layout=layout)

plotly.offline.plot(cat_vulg_fig, auto_open=False, filename='category_vulgarity')


Your filename `category_vulgarity` didn't end with .html. Adding .html to the end of your file.



'file:///home/valentin/Documents/EPFL/Master/Semestre1/ADA/Reddit-opinions-polarization/plots/category_vulgarity.html'

In [7]:
subreddits_metrics = pd.read_csv('../docs/assets/data/subreddit_metrics.csv')
subreddits_metrics

Unnamed: 0,subreddit,count_pos,count_neg,agreement_factor,pos,neg,neu,vulg,count,neg_pos
0,AskReddit,63378,4638,0.93181,0.119226,0.089602,0.790424,0.144155,68016,0.029624
1,funny,21933,3868,0.850083,0.123265,0.08885,0.786654,0.119805,25801,0.034415
2,politics,16991,4020,0.808672,0.114051,0.103525,0.782111,0.120485,21011,0.010526
3,pics,17451,3407,0.836657,0.132045,0.085981,0.781025,0.106753,20858,0.046064
4,nfl,14686,1466,0.909237,0.139407,0.096617,0.762533,0.09252,16152,0.04279
5,leagueoflegends,11642,2908,0.800137,0.156009,0.092606,0.75025,0.080653,14550,0.063403
6,nba,12699,1750,0.878884,0.138332,0.092943,0.767919,0.087987,14449,0.045389
7,worldnews,11101,3301,0.770796,0.108674,0.108632,0.782373,0.132544,14402,4.2e-05
8,gaming,12104,2107,0.851735,0.137304,0.087848,0.773803,0.105931,14211,0.049456
9,WTF,12080,1960,0.860399,0.113473,0.105821,0.779859,0.149445,14040,0.007652


In [8]:
subreddits_metrics_names = pd.read_csv('../docs/assets/data/subreddit_metrics_names.csv')
names = subreddits_metrics_names['subreddit'].values
names = list(names)
additional = ['hearstone, GlobalOffensive', 'DotA2', 'soccer', 'gaming', 'Games']
names = names + additional
names.remove('MMA')
names.remove('TwoXChromosomes')
names

['AskReddit',
 'funny',
 'politics',
 'pics',
 'nfl',
 'leagueoflegends',
 'nba',
 'worldnews',
 'news',
 'The_Donald',
 'trees',
 'technology',
 'anime',
 'aww',
 'TumblrInAction',
 'starcraft',
 'europe',
 'pokemon',
 'hearstone, GlobalOffensive',
 'DotA2',
 'soccer',
 'gaming',
 'Games']

In [9]:
sub_video_games_names = ['leagueoflegends', 'gaming', 'DotA2', 'Games', 'GlobalOffensive', 'hearthstone', 'starcraft']
sub_politics_names = ['politics', 'worldnews', 'news']
sub_wholesome_names = ['aww', 'trees']
sub_donald_names = ['The_Donald']

In [10]:
def sub_filtered(filter_list):
    return subreddits_metrics['subreddit'].apply(lambda x: x in filter_list)

def inverse_sub_filtered():
    return subreddits_metrics['subreddit'].apply(lambda x: x not in sub_video_games_names and x not in sub_politics_names and x not in sub_wholesome_names and x not in sub_donald_names and x not in ['asoiaf', 'fffffffuuuuuuuuuuuu'])

In [11]:
video_games_filter = sub_filtered(sub_video_games_names)
politics_filter = sub_filtered(sub_politics_names)
wholesome_filter = sub_filtered(sub_wholesome_names)
donald_filter = sub_filtered(sub_donald_names)
others_filter = inverse_sub_filtered()

In [12]:
sub_video_games = subreddits_metrics.loc[video_games_filter]
sub_politics = subreddits_metrics.loc[politics_filter]
sub_wholesome = subreddits_metrics.loc[wholesome_filter]
sub_donald = subreddits_metrics.loc[donald_filter]
sub_others = subreddits_metrics.loc[others_filter]

In [13]:
def create_scatter_trace(subreddit, sub_name, marker_symbol, marker_color, marker_size):
    trace = go.Scatter(
                x=subreddit.agreement_factor,
                y=subreddit.pos,
                name=sub_name,
                marker=dict(
                    color=marker_color,
                    symbol=marker_symbol,
                    size=marker_size
                ),
                mode='markers+text',
                text=subreddit['subreddit'].values,
                textposition='top center',
                textfont=dict(size=10),
                showlegend=False)
    return trace

In [14]:
layout = go.Layout(
    title='Communities: Agreement and Positivity',
    xaxis=dict(
        titlefont=dict(
            size=12,
            color='rgb(107, 107, 107)'
        ),
        range=[0.73, 1]
    ),
    yaxis=dict(
        titlefont=dict(
            size=12,
            color='rgb(107, 107, 107)'
        )
    ),
    legend=dict(
        x=-0.1,
        y=-0.175
    )
)

agree_pos_vg_trace = create_scatter_trace(sub_video_games, 'Video games', 2, 'rgb(146, 197, 122)', 8)
agree_pos_politics_trace = create_scatter_trace(sub_politics, 'Politics', 1, 'rgb(103, 75, 169)', 8)
agree_pos_wholesome_trace = create_scatter_trace(sub_wholesome, 'Wholesome', 17, 'rgb(248, 179, 101)', 10)
agree_pos_donald_trace = create_scatter_trace(sub_donald, 'The_Donald', 4, 'rgb(226, 101, 99)', 8)
#agree_pos_others_trace = create_scatter_trace(sub_others, 'Others', 28, 'rgb(168, 168, 168)')

sub_others_names = sub_others['subreddit'].apply(lambda x: ' ' if x not in names else x).values
agree_pos_others_trace = go.Scatter(
                            x=sub_others.agreement_factor,
                            y=sub_others.pos,
                            name='Other communities',
                            marker=dict(
                                color='rgb(168, 168, 168)',
                                symbol=28,
                                size=6
                            ),
                            mode='markers+text',
                            text=sub_others_names,
                            textposition='top center',
                            textfont=dict(
                                size=10))

agree_pos_data = [agree_pos_vg_trace, agree_pos_politics_trace, agree_pos_wholesome_trace, agree_pos_donald_trace, agree_pos_others_trace]

scatter_fig = go.Figure(data=agree_pos_data, layout=layout)

plotly.offline.plot(scatter_fig, auto_open=False, filename='category_agree_vs_pos_vulg')


Your filename `category_agree_vs_pos_vulg` didn't end with .html. Adding .html to the end of your file.



'file:///home/valentin/Documents/EPFL/Master/Semestre1/ADA/Reddit-opinions-polarization/plots/category_agree_vs_pos_vulg.html'

### OLD SCATTER PLOT

In [15]:
vulgarity_min = subreddits_metrics['vulg'].min()
vulgarity_max = subreddits_metrics['vulg'].max()
vulgarity_space = np.linspace(vulgarity_min, vulgarity_max, 4)
vulgarity_colors = ['rgb(3, 196, 42)', 'rgb(255, 158, 12)', 'rgb(178, 10, 10)']
vulgarity_min, vulgarity_max

(0.035238504512247526, 0.2886367268659102)

In [16]:
subreddits_metrics.loc[:, 'vulg_rgb'] = -1
subreddits_metrics.loc[:, 'vulg_rgb'].loc[subreddits_metrics['vulg'] <= vulgarity_space[1]] = 0
subreddits_metrics.loc[:, 'vulg_rgb'].loc[(subreddits_metrics['vulg'] > vulgarity_space[1]) & (subreddits_metrics['vulg'] <= vulgarity_space[2])] = 1
subreddits_metrics.loc[:, 'vulg_rgb'].loc[subreddits_metrics['vulg'] > vulgarity_space[2]] = 2
subreddits_metrics.head()



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



Unnamed: 0,subreddit,count_pos,count_neg,agreement_factor,pos,neg,neu,vulg,count,neg_pos,vulg_rgb
0,AskReddit,63378,4638,0.93181,0.119226,0.089602,0.790424,0.144155,68016,0.029624,1
1,funny,21933,3868,0.850083,0.123265,0.08885,0.786654,0.119805,25801,0.034415,1
2,politics,16991,4020,0.808672,0.114051,0.103525,0.782111,0.120485,21011,0.010526,1
3,pics,17451,3407,0.836657,0.132045,0.085981,0.781025,0.106753,20858,0.046064,0
4,nfl,14686,1466,0.909237,0.139407,0.096617,0.762533,0.09252,16152,0.04279,0


In [17]:
sub_low = subreddits_metrics[subreddits_metrics['vulg_rgb'] == 0]
sub_medium = subreddits_metrics[subreddits_metrics['vulg_rgb'] == 1]
sub_high = subreddits_metrics[subreddits_metrics['vulg_rgb'] == 2]

sub_low_names = [' ' if x in ['asoiaf', 'fffffffuuuuuuuuuuuu'] else x for x in sub_low.subreddit.values]
sub_medium_names = [' ' if x in ['asoiaf', 'fffffffuuuuuuuuuuuu'] else x for x in sub_medium.subreddit.values]
sub_high_names = [' ' if x in ['asoiaf', 'fffffffuuuuuuuuuuuu'] else x for x in sub_high.subreddit.values]

In [18]:
layout = go.Layout(
    title="Subreddits Agreement VS Positivity",
    xaxis=dict(
        title='Agreement',
        titlefont=dict(
            size=16,
            color='rgb(107, 107, 107)'
        )
    ),
    yaxis=dict(
        title='Positivity',
        titlefont=dict(
            size=16,
            color='rgb(107, 107, 107)'
        )
    ),
    legend=dict(
        x=0,
        y=-0.05,
        font=dict(size=16),
        bgcolor='rgba(255, 255, 255, 0)',
        bordercolor='rgba(255, 255, 255, 0)',
        orientation='h'
    )
)

agree_pos_low_vulg_trace = go.Scatter(
                x=sub_low.agreement_factor,
                y=sub_low.pos,
                name='Low vulgarity',
                marker=dict(
                    color=vulgarity_colors[0]
                ),
                mode='markers+text',
                text=sub_low_names,
                textposition='top center')

agree_pos_medium_vulg_trace = go.Scatter(
                x=sub_medium.agreement_factor,
                y=sub_medium.pos,
                name='Moderate vulgarity',
                marker=dict(
                    color=vulgarity_colors[1]
                ),
                mode='markers+text',
                text=sub_medium_names,
                textposition='top center')

agree_pos_high_vulg_trace = go.Scatter(
                x=sub_high.agreement_factor,
                y=sub_high.pos,
                name='High vulgarity',
                marker=dict(
                    color=vulgarity_colors[2]
                ),
                mode='markers+text',
                text=sub_high_names,
                textposition='top center')


old_scatter_fig = go.Figure(data=[agree_pos_low_vulg_trace, agree_pos_medium_vulg_trace, agree_pos_high_vulg_trace], layout=layout)

plotly.offline.plot(old_scatter_fig, auto_open=False, filename='blablablabla')


Your filename `blablablabla` didn't end with .html. Adding .html to the end of your file.



'file:///home/valentin/Documents/EPFL/Master/Semestre1/ADA/Reddit-opinions-polarization/plots/blablablabla.html'

In [19]:
daily_agree = pd.read_csv('../docs/assets/data/daily_agreement.csv')
dates = pd.to_datetime(daily_agree.loc[:, 'created'])
dates_filter = dates.apply(lambda x: x.year >= 2011)
daily_agree = daily_agree.loc[dates_filter]
daily_agree.head()

Unnamed: 0,created,daily_agreement_60d_avg
729,2011-01-01,0.886314
730,2011-01-02,0.886281
731,2011-01-03,0.886209
732,2011-01-04,0.886569
733,2011-01-05,0.88685


In [20]:
daily_pos = pd.read_csv('../docs/assets/data/neg_vs_pos_daily_avg.csv')
dates = pd.to_datetime(daily_pos.loc[:, 'creation_date'])
dates_filter = dates.apply(lambda x: x.year >= 2011)
daily_pos = daily_pos.loc[dates_filter].loc[:, ['creation_date', 'nltk_positivity_60d_avg']]
daily_pos.head()

Unnamed: 0,creation_date,nltk_positivity_60d_avg
1817,2011-01-01,0.135623
1818,2011-01-02,0.135559
1819,2011-01-03,0.135456
1820,2011-01-04,0.13539
1821,2011-01-05,0.135251


In [21]:
bw_full_name = '../data/nlp_bw_metrics_daily_full_0.01.parquet/'
bw_sql_table_name = 'bw_metrics'
bw_sql_query = (f"""
SELECT
    creation_date,
    
    AVG(msg_count) OVER (
        ORDER BY creation_date
        RANGE BETWEEN 30 PRECEDING AND 30 FOLLOWING
    ) AS msg_count_60d_avg,

    AVG(sum_nb_bw_matches) OVER (
        ORDER BY creation_date
        RANGE BETWEEN 30 PRECEDING AND 30 FOLLOWING
    ) AS nb_bw_matches_60d_avg

FROM {bw_sql_table_name}
""")

In [22]:
def spark_to_pandas(spark_metrics):
    metrics_pd = spark_metrics.toPandas()
    metrics_pd = metrics_pd.set_index('creation_date')
    metrics_pd = metrics_pd.sort_index()
    return metrics_pd

In [23]:
def get_metrics(filename, sql_query, sql_table_name):
    spark_metrics = spark.read.load(filename)
    spark_metrics.registerTempTable(sql_table_name)
    avg_metrics = spark.sql(sql_query)

    pd_metrics = spark_to_pandas(spark_metrics)
    pd_metrics_avg = spark_to_pandas(avg_metrics)
    pd_metrics_n = pd_metrics.div(pd_metrics['msg_count'], axis=0)
    pd_metrics_avg_n = pd_metrics_avg.div(pd_metrics_avg['msg_count_60d_avg'], axis=0)
    
    return pd_metrics_n, pd_metrics_avg_n, pd_metrics, pd_metrics_avg

In [24]:
vulg_n, vulg_avg_n, vulg, vulg_avg = get_metrics(bw_full_name, bw_sql_query, bw_sql_table_name)

In [25]:
daily_vulg = vulg_avg_n.copy()
daily_vulg = daily_vulg.reset_index().drop('msg_count_60d_avg', axis=1)
dates = pd.to_datetime(daily_vulg.loc[:, 'creation_date'])
dates_filter = dates.apply(lambda x: x.year >= 2011)
daily_vulg = daily_vulg[dates_filter]
daily_vulg.head()

Unnamed: 0,creation_date,nb_bw_matches_60d_avg
1817,2011-01-01,0.122371
1818,2011-01-02,0.122122
1819,2011-01-03,0.122508
1820,2011-01-04,0.122262
1821,2011-01-05,0.12217


In [26]:
#daily_vulg = spark.read.load('../data/nlp_bw_metrics_daily_full_0.01.parquet/')
#daily_vulg = daily_vulg.toPandas()
#daily_vulg = daily_vulg.sort_values(by='creation_date')
#dates = pd.to_datetime(daily_vulg.loc[:, 'creation_date'])
#dates_filter = dates.apply(lambda x: x.year >= 2011)
#daily_vulg = daily_vulg[dates_filter]
#daily_vulg['vulgarity'] = daily_vulg['sum_nb_bw_matches'] / daily_vulg['msg_count']
#daily_vulg = daily_vulg.loc[:, ['creation_date', 'vulgarity']]
#daily_vulg.head()

In [27]:
daily_contrib = pd.read_csv('../docs/assets/data/monthly_contributions.csv')
daily_contrib = daily_contrib.loc[:, ['date', 'avg_nb_sub']]
dates = pd.to_datetime(daily_contrib.loc[:, 'date'])
dates_filter = dates.apply(lambda x: x.year >= 2011)
daily_contrib = daily_contrib[dates_filter]
daily_contrib.head()

Unnamed: 0,date,avg_nb_sub
24,2011-01-01,4.678156
25,2011-02-01,4.487831
26,2011-03-01,4.459591
27,2011-04-01,4.418013
28,2011-05-01,4.484318


In [28]:
def normalize(serie):
    serie_range = serie.max() - serie.min()
    return (serie - serie.min()) / serie_range

In [29]:
daily_agree['daily_agreement_60d_avg_n'] = normalize(daily_agree['daily_agreement_60d_avg'])
daily_pos['nltk_positivity_60d_avg_n'] = normalize(daily_pos['nltk_positivity_60d_avg'])
daily_vulg['vulgarity_n'] = normalize(daily_vulg['nb_bw_matches_60d_avg'])
daily_contrib['avg_nb_sub_n'] = normalize(daily_contrib['avg_nb_sub'])

In [30]:
agree_trace = go.Scatter(
    x = daily_agree['created'].values,
    y = daily_agree['daily_agreement_60d_avg'].values,
    mode='lines',
    name='Agreement',
    line = dict(
        shape='spline',
        width=4,
        color='rgb(61, 133, 198)'
    )
)

agree_trace_n = go.Scatter(
    x = daily_agree['created'].values,
    y = daily_agree['daily_agreement_60d_avg_n'].values,
    mode='lines',
    name='Agreement'
)

pos_trace = go.Scatter(
    x = daily_pos['creation_date'].values,
    y = daily_pos['nltk_positivity_60d_avg'].values,
    mode='lines',
    name='Positivity',
    line = dict(
        shape='spline',
        width=4,
        color='rgb(61, 133, 198)'
    )
)

pos_trace_n = go.Scatter(
    x = daily_pos['creation_date'].values,
    y = daily_pos['nltk_positivity_60d_avg_n'].values,
    mode='lines',
    name='Positivity'
)

vulg_trace = go.Scatter(
    x = daily_vulg['creation_date'].values,
    y = daily_vulg['nb_bw_matches_60d_avg'].values,
    mode='lines',
    name='Vulgarity',
    line = dict(
        shape='spline',
        width=4,
        color='rgb(61, 133, 198)'
    )
)

vulg_trace_n = go.Scatter(
    x = daily_vulg['creation_date'].values,
    y = daily_vulg['vulgarity_n'].values,
    mode='lines',
    name='Vulgarity'
)

contrib_trace = go.Scatter(
    x = daily_contrib['date'].values,
    y = daily_contrib['avg_nb_sub'].values,
    mode='lines',
    name='Contributions',
    line = dict(
        shape='spline',
        width=4,
        color='rgb(204, 65, 37)'
    )
)

contrib_trace_n = go.Scatter(
    x = daily_contrib['date'].values,
    y = daily_contrib['avg_nb_sub_n'].values,
    mode='lines',
    name='Contributions'
)


In [31]:
layout = go.Layout(
    xaxis=dict(
        title='Time',
        titlefont=dict(
            size=22,
            color='rgb(107, 107, 107)'
        ),
        tickfont=dict(
            size=22
        )
    ),
    yaxis=dict(
        title='Agreement',
        titlefont=dict(
            size=22,
            color='rgb(107, 107, 107)'
        ),
        tickfont=dict(
            size=22
        )
    ),
    legend=dict(
        x=0,
        y=-0.05,
        font=dict(size=16),
        bgcolor='rgba(255, 255, 255, 0)',
        bordercolor='rgba(255, 255, 255, 0)',
        orientation='h'
    )
)
agree_line_fig = go.Figure(data=[agree_trace], layout=layout)

layout = go.Layout(
    xaxis=dict(
        title='Time',
        titlefont=dict(
            size=22,
            color='rgb(107, 107, 107)'
        ),
        tickfont=dict(
            size=22
        )
    ),
    yaxis=dict(
        title='Positivity',
        titlefont=dict(
            size=22,
            color='rgb(107, 107, 107)'
        ),
        tickfont=dict(
            size=22
        ),
        dtick=0.004
    ),
    legend=dict(
        x=0,
        y=-0.05,
        font=dict(size=16),
        bgcolor='rgba(255, 255, 255, 0)',
        bordercolor='rgba(255, 255, 255, 0)',
        orientation='h'
    )
)
pos_line_fig = go.Figure(data=[pos_trace], layout=layout)

layout = go.Layout(
    xaxis=dict(
        title='Time',
        titlefont=dict(
            size=22,
            color='rgb(107, 107, 107)'
        ),
        tickfont=dict(
            size=22
        )
    ),
    yaxis=dict(
        title='Vulgarity',
        titlefont=dict(
            size=22,
            color='rgb(107, 107, 107)'
        ),
        tickfont=dict(
            size=22
        ),
        dtick=0.01
    ),
    legend=dict(
        x=0,
        y=-0.05,
        font=dict(size=16),
        bgcolor='rgba(255, 255, 255, 0)',
        bordercolor='rgba(255, 255, 255, 0)',
        orientation='h'
    )
)
vulg_line_fig = go.Figure(data=[vulg_trace], layout=layout)

layout = go.Layout(
    xaxis=dict(
        title='Time',
        titlefont=dict(
            size=22,
            color='rgb(107, 107, 107)'
        ),
        tickfont=dict(
            size=22
        )
    ),
    yaxis=dict(
        title='Contribution',
        titlefont=dict(
            size=22,
            color='rgb(107, 107, 107)'
        ),
        tickfont=dict(
            size=22
        )
    ),
    legend=dict(
        x=0,
        y=-0.05,
        font=dict(size=16),
        bgcolor='rgba(255, 255, 255, 0)',
        bordercolor='rgba(255, 255, 255, 0)',
        orientation='h'
    )
)
contrib_line_fig = go.Figure(data=[contrib_trace], layout=layout)

layout = go.Layout(
    title="Sentiment over time",
    xaxis=dict(
        title='Time',
        titlefont=dict(
            size=16,
            color='rgb(107, 107, 107)'
        )
    ),
    yaxis=dict(
        title='Sentiments',
        titlefont=dict(
            size=16,
            color='rgb(107, 107, 107)'
        )
    ),
    legend=dict(
        x=0,
        y=-0.05,
        font=dict(size=16),
        bgcolor='rgba(255, 255, 255, 0)',
        bordercolor='rgba(255, 255, 255, 0)',
        orientation='h'
    )
)
lines_fig = go.Figure(data=[agree_trace_n, pos_trace_n, vulg_trace_n, contrib_trace_n], layout=layout)

In [32]:
plotly.offline.plot(agree_line_fig, auto_open=False, filename='agree_over_time')
plotly.offline.plot(pos_line_fig, auto_open=False, filename='pos_over_time')
plotly.offline.plot(vulg_line_fig, auto_open=False, filename='vulg_over_time')
plotly.offline.plot(contrib_line_fig, auto_open=False, filename='contrib_over_time')
plotly.offline.plot(lines_fig, auto_open=False, filename='sentiment_over_time')


Your filename `agree_over_time` didn't end with .html. Adding .html to the end of your file.


Your filename `pos_over_time` didn't end with .html. Adding .html to the end of your file.


Your filename `vulg_over_time` didn't end with .html. Adding .html to the end of your file.


Your filename `contrib_over_time` didn't end with .html. Adding .html to the end of your file.


Your filename `sentiment_over_time` didn't end with .html. Adding .html to the end of your file.



'file:///home/valentin/Documents/EPFL/Master/Semestre1/ADA/Reddit-opinions-polarization/plots/sentiment_over_time.html'

### Save images

In [33]:
import os
import plotly.io as pio

In [34]:
if not os.path.exists('images'):
    os.mkdir('images')

In [36]:
pio.write_image(cat_neg_fig, 'images/cat_net_fig.svg', width=960, height=576)
pio.write_image(cat_vulg_fig, 'images/cat_vulg_fig.svg', width=960, height=576)
#pio.write_image(scatter_fig, 'images/scatter_fig.svg', width=1800, height=900)
#pio.write_image(scatter_fig, 'images/scatter_fig.svg', width=1200, height=800)
#pio.write_image(scatter_fig, 'images/scatter_fig.svg', width=960, height=640)
pio.write_image(scatter_fig, 'images/scatter_fig.svg', width=620, height=480)
#pio.write_image(agree_line_fig, 'images/agree_line_fig.svg', width=1200, height=720)
pio.write_image(agree_line_fig, 'images/agree_line_fig.svg', width=960, height=576)
pio.write_image(pos_line_fig, 'images/pos_line_fig.svg', width=960, height=576)
pio.write_image(vulg_line_fig, 'images/vulg_line_fig.svg', width=960, height=576)
pio.write_image(contrib_line_fig, 'images/contrib_line_fig.svg', width=960, height=576)
pio.write_image(lines_fig, 'images/lines_fig.svg', width=960, height=576)

# What should we use as units for the different plots? Those are not really % as we did not scale them up to 100.