In [84]:
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import numpy as np
pd.set_option('display.max_colwidth', -1)

In [85]:
def plot_tweet_intensity(data,freq):
    data["timestamp"] = pd.to_datetime(data["timestamp"])
    tweets_per_hour = data.groupby([pd.Grouper(key='timestamp',freq=freq)]).size().reset_index(name='count')
    fig = px.line(tweets_per_hour, x="timestamp", y="count")
    fig.add_trace(go.Scatter(x=tweets_per_hour["timestamp"], y=tweets_per_hour["count"],
                        mode='markers',name='Tweet count'))
    fig.show()

In [86]:
def add_annotation(date, text, fig, anno_height, start_height = 0):
    fig.add_shape(
        # Line Vertical
        go.layout.Shape(
            type="line",
            x0=date,
            y0=start_height,
            x1=date,
            y1=anno_height,
            line=dict(
                color="RoyalBlue",
                width=3, 
                dash='dot'
            )
    ))
    
    fig.add_annotation(
        go.layout.Annotation(
            x=date,
            y=anno_height,
            xref="x",
            yref="y",
            text=text,
            showarrow=True,
            arrowhead=7,
            ax=0,
            ay=-20,
            textangle=25, 
        )
    )

In [87]:
def annotate_chart_with_brexit_dates(fig, anno_height = 6000, start_height = 0):
    add_annotation('2018-12-17', 'Vote date on Brexit<br> deal announced', fig, anno_height, start_height)
    add_annotation('2019-02-15', 'May loses meaningful vote', fig, anno_height, start_height)
    add_annotation('2019-03-29', 'Brexit day I<br>May loses 2nd meaningful vote', fig, anno_height, start_height)
    add_annotation('2019-05-24', 'May announces resignation', fig, anno_height, start_height)
    add_annotation('2019-06-24', 'Boris Johnson becomes new PM', fig, anno_height, start_height)
    add_annotation('2019-10-17', 'Johnson agrees new Withdrawal<br> Agreement with EU', fig, anno_height, start_height)
    fig.update_annotations(dict(
            xref="x",
            yref="y",
            showarrow=True,
            arrowhead=7,
            ax=0,
            ay=-40
    ))
    

In [88]:
def plot_tweet_sentiment_per_party(filename):
    data = pd.read_csv(filename)
    layout = go.Layout(yaxis=dict(range = [-1, 1]))

    fig = go.Figure(layout = layout)
    parties = data.party.unique()
    data['month_year'] = pd.to_datetime(data['timestamp']).dt.to_period('D')

    anno_height = 1.0

    for party in parties:
        df_party = data[data.party == party]
        counts = df_party.groupby('month_year')['score_num'].mean()
        labels = [str(c) for c in counts.index]
        fig.add_trace(go.Scatter(x=labels,y=counts.values,name=party))
    
    annotate_chart_with_brexit_dates(fig, anno_height, -1.0)

    fig.update_layout(
        xaxis_rangeslider_visible = True
    )
    fig.show()

In [89]:
def plot_party_sentiment(filename,freq, from_range, to_range):
    data = pd.read_csv(filename)
    layout = go.Layout(yaxis=dict(range = [from_range, to_range]))

    fig = go.Figure(layout = layout)
    parties = data.party.unique()
    
    anno_height = to_range
    
    data['month_year'] = pd.to_datetime(data['timestamp']).dt.to_period(freq)
    x_axis_labels = set(data['month_year'])

    for party in parties:
        df_party = data[data.party == party]
        counts = df_party['month_year'].value_counts().sort_index()
        labels = [str(c) for c in counts.index]
        fig.add_trace(go.Scatter(x=labels,y=counts.values,name=party))
    
  
    annotate_chart_with_brexit_dates(fig, anno_height)
    fig.update_layout(
        xaxis_rangeslider_visible = True
    )
    fig.show()

In [90]:
plot_party_sentiment("preprocessed/ppc_accounts_timeline_year_sentiment.csv",'M', 0, 65500)

In [91]:
plot_party_sentiment("preprocessed/ppc_accounts_timeline_year_sentiment.csv",'D', 0, 2750)

In [92]:
plot_tweet_sentiment_per_party('preprocessed/ppc_accounts_timeline_year_sentiment_polarity.csv')

## Funkcje wyznaczajace procent negatywnych/pozytywnych postow z podzialem na partie oraz osoby

In [93]:
def plot_tweet_sentiment_per_person(filename, person_name):
    data = pd.read_csv(filename)
    layout = go.Layout(yaxis=dict(range = [-1, 1]))

    fig = go.Figure(layout = layout)
    parties = data.party.unique()

    anno_height = 1.0
    data = data[data['user_name'] == person_name]

    labels = [str(c) for c in data['timestamp']]
    fig.add_trace(go.Scatter(x=labels,y=data['score_num']))

#     annotate_chart_with_brexit_dates(fig, anno_height, -1.0)

    fig.update_layout(
        xaxis_rangeslider_visible = True
    )
    fig.show()

In [101]:
def as_perc(value, total):
    return value/float(total)

def get_sentiment_post_percentage_per_party(filename, positive):
    df = pd.read_csv(filename)
    
    df_party_posts_count = df.groupby('party')['party'].count().reset_index(name='total_posts')
    if positive:
        col_name = 'positive_percent'
        df_min_sentiment = df[df.score_num >= 0.5]
    else:
        col_name = 'negative_percent'
        df_min_sentiment = df[df.score_num <= -0.5]
        
    total_num = len(df)
    df_grouped = df_min_sentiment.groupby(['party'])['party'] \
                             .count() \
                             .reset_index(name='count') \
                             .sort_values(['count'], ascending=False)
    
    df_merged = df_grouped.merge(df_party_posts_count, left_on='party', right_on='party')
    df_merged[col_name] = (df_merged['count'] / df_merged['total_posts']) * 100
    
    df_merged = df_merged.sort_values(by=[col_name], ascending=False)
    print('-------------------------------------------------------------')
    print(df_merged)

def get_sentiment_post_percentage_per_person(filename, positive, max_person_num):
    df = pd.read_csv(filename)
    
    df_person_posts_count = df.groupby('user_name')['user_name'].count().reset_index(name='total_posts')
    if positive:
        col_name = 'positive_percent'
        df_min_sentiment = df[df.score_num >= 0.5]
    else:
        col_name = 'negative_percent'
        df_min_sentiment = df[df.score_num <= -0.5]
        
    df_grouped_person = df_min_sentiment.groupby(['party', 'user_name'])['user_name'] \
                         .count() \
                         .reset_index(name='count') \
                         .sort_values(['count'], ascending=False) \
                         .head(max_person_num)
    
    df_merged = df_grouped_person.merge(df_person_posts_count, left_on='user_name', right_on='user_name')
    df_merged[col_name] = (df_merged['count'] / df_merged['total_posts']) * 100
    
    df_merged = df_merged.sort_values(by=[col_name], ascending=False)
    print('-------------------------------------------------------------')
    print(df_merged)

In [102]:
ppc_accounts_year_sentiment_filename = 'preprocessed/ppc_accounts_timeline_year_sentiment_polarity.csv'
get_sentiment_post_percentage_per_party(ppc_accounts_year_sentiment_filename, positive = True)
get_sentiment_post_percentage_per_person(ppc_accounts_year_sentiment_filename, positive = True, max_person_num = 15)

get_sentiment_post_percentage_per_party(ppc_accounts_year_sentiment_filename, positive = False)
get_sentiment_post_percentage_per_person(ppc_accounts_year_sentiment_filename, positive = False, max_person_num = 15)

plot_tweet_sentiment_per_person(ppc_accounts_year_sentiment_filename, 'Diane Abbott')

-------------------------------------------------------------
                              party  count  total_posts  positive_percent
1  Conservative                      3387   22283        15.199928       
3  Scottish National Party           517    3987         12.967143       
7  The Brexit Party                  192    1576         12.182741       
0  Labour                            4637   38571        12.021985       
2  Liberal Democrats                 1212   10722        11.303861       
6  Green Party                       277    2591         10.690853       
4  The Independent Group for Change  342    3208         10.660848       
5  Independent                       312    3196         9.762203        
-------------------------------------------------------------
                               party            user_name  count  total_posts  positive_percent
9   Conservative                      Michael Gove         332    1630         20.368098       
0   Conservative  

## Funkcja znajdujaca maxymalna roznice w sentymencie miedzy partiami liczona dla kazdego dnia w przeciagu roku
### Wybrane zostaly dnie i te partie dla ktorych roznica sentymentu wyliczona ze sredniej dla danego dnia jest wieksza od 0.8
### TODO znalezc jakas zaleznosc dla tych dni, ktore wyplulo

In [96]:
def find_max_sentiment_diff(filename):
    df = pd.read_csv(filename)
    
    columns = ['day_year', 'party1', 'party2', 'min_val', 'max_val', 'diff']
    new_df = pd.DataFrame(columns = columns)
    
    df['day_year'] = pd.to_datetime(df['timestamp']).dt.to_period('D')
    
    df_party_mean = df.groupby(['day_year', 'party'])['score_num'].mean().reset_index(name='mean_val')

    i = 0
    for day in df_party_mean['day_year'].unique():
        
        df_party_mean_day = df_party_mean[df_party_mean['day_year'] == day]

        min_row = df_party_mean_day.loc[df_party_mean_day['mean_val'].idxmin()]
        max_row = df_party_mean_day.loc[df_party_mean_day['mean_val'].idxmax()]
        diff = max_row[2] - min_row[2]
        if diff >= 0.8:
            new_df.loc[i, 'day_year'] = min_row[0]
            new_df.loc[i, 'party1'] = min_row[1]
            new_df.loc[i, 'party2'] = max_row[1]
            new_df.loc[i, 'min_val'] = min_row[2]
            new_df.loc[i, 'max_val'] = max_row[2]
            new_df.loc[i, 'diff'] = diff
            i+=1
    new_df = new_df.sort_values(by=['diff'], ascending=False)

    print(new_df)

In [97]:
find_max_sentiment_diff('preprocessed/ppc_accounts_timeline_year_sentiment_polarity.csv')

     day_year                            party1                            party2     min_val   max_val      diff
3  2019-03-09  Liberal Democrats                 Scottish National Party          -0.0416667   1         1.04167 
7  2019-10-03  The Brexit Party                  The Independent Group for Change -0.8         0.225614  1.02561 
1  2019-01-27  Liberal Democrats                 Scottish National Party           0.00811107  1         0.991889
2  2019-02-23  The Brexit Party                  Green Party                      -0.275       0.704167  0.979167
4  2019-04-02  Scottish National Party           Green Party                      -0.6         0.285421  0.885421
0  2018-12-25  Liberal Democrats                 Scottish National Party           0           0.85      0.85    
5  2019-06-30  The Independent Group for Change  The Brexit Party                 -0.6         0.236161  0.836161
6  2019-08-02  The Brexit Party                  Green Party                       0    