# Dynamic graph - Sentiment scores of articles and associated named entities

In [1]:
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt

import datetime
import ipywidgets as widgets
import plotly.graph_objects as go
from IPython.display import display, HTML, Image
import networkx as nx

In [2]:
# Loading the dataset
with open('final_dataset.pickle', 'rb') as file:
    df_final = pickle.load(file)

In [3]:
df_final = df_final.rename(columns={'level_0': 'index'})

In [4]:
def get_entities(df):
    # getting all the unique named entities
    entities_all = []
    for i in range(len(df)):
        for j in range(len(df['entities'].iloc[i])):
            entities_d = {}
            entities_d.update({'entity': df['entities'].iloc[i][j]})
            entities_all.append(entities_d)
    entities_df_t = pd.DataFrame.from_dict(entities_all)
    entities_df_t = entities_df_t.reset_index()
    
    return entities_df_t

In [5]:
entities_df = get_entities(df_final)
# entities_df

In [6]:
def entities_assign_num(df):
    # assigning numbers to the various named entities identified
    d_ents_t = {}
    for i in enumerate(list(df['entity'].unique())):
        d_ents_t[i[1]] = i[0]
    
    return d_ents_t

In [7]:
# dictionary for finding named entities
d_ents = entities_assign_num(entities_df)

In [8]:
def find_entities(df, num_l):
    # assigning numbers to the various named entities identified
    d_ents_t = {}
    for i in enumerate(list(df['entity'].unique())):
        d_ents_t[i[0]] = i[1]
        
    all_ents = []
    for i in num_l:
        ents_d = {}
        ents_d.update({'entity_number': i,'entity': d_ents_t[i]})
        all_ents.append(ents_d)
    
    return pd.DataFrame.from_dict(all_ents)

In [9]:
# setting colours and weights to different ranges of sentimetn scores
colors_list = ['black', 'darkorchid', 'red', 'dodgerblue', 'lawngreen']
score_thresholds = np.arange(-1, 1.1, 0.4)[1:]
weights_list = np.arange(0.4, 2.1, 0.4)

In [10]:
def cols_weights_thresholds(cols_l, weights_l, thresholds_l):
    l_thres = []
    l_thres.append(f"-1 to {round(thresholds_l[0], 2)}")
    for i in range(1 ,len(thresholds_l)):
        l_thres.append(f"{round(thresholds_l[i-1], 2)} to {round(thresholds_l[i], 2)}")

    l_cols_w = []
    for i in range(len(cols_l)):
        d_cols_w = {}
        d_cols_w.update({'main_edge_colour': cols_l[i], 'main_edge_width': round(weights_l[i], 2), 'sentiment_score_ranges': l_thres[i]})
        l_cols_w.append(d_cols_w)
    
    return pd.DataFrame.from_dict(l_cols_w)

In [19]:
output = widgets.Output()

# creating a dropdown menu for categories
options = ['all']
options.extend(list(df_final['category1'].unique()))
dropdown = widgets.Dropdown(options=options, description="Categories")

# creating a date picker
dp = widgets.DatePicker(value = datetime.date(2022,5,1), description='Date')

# plot function
def plot_sentiment(date_selected, category_selected='news'):
    output.clear_output()
    
    # data
    df_temp = df_final[df_final['date'] == pd.to_datetime(date_selected, format = '%Y-%m-%d %H:%M:%S')]
    try:
        if val_cat != 'all':
            df_temp = df_temp[df_temp['category1'] == val_cat]
    except NameError:
        pass
    
    knowledge_graph(df_temp)
    
    x = df_temp['index']
    y = df_temp['sentiment_score']
    
    # layout
    layout = go.Layout(
        title='Sentiment Scores from Vader',
        xaxis=dict(
            title='Article Index'),
        yaxis=dict(
            title='Scores')
    )
    
    # figure
    figure = go.Figure(layout=layout)
    
    # adding a scatter trace to the figure
    figure.add_trace(
        go.Scatter(
            mode='markers',
            x=x,
            y=y,
            marker=dict(
                color='darkturquoise',
                size=12,
                opacity=0.5,
                line=dict(
                    color='burlywood',
                    width=1)
                        ),
            showlegend=False
        )
    )
    
    # displaying the output (the figure)
    with output:
        display(figure.show())
        display(Image(filename='filename.png'))
        display(knowledge_graph(df_temp))
        
def select_category(category_selected):
    output.clear_output()
    
    # data
    if category_selected != 'all':
        df_temp = df_final[df_final['category1'] == category_selected]
    else:
        df_temp = df_final.copy()
    try:
        df_temp = df_temp[df_temp['date'] == pd.to_datetime(val_date, format = '%Y-%m-%d %H:%M:%S')]
    except NameError:
        df_temp = df_temp[df_temp['date'] == datetime.date(2022,1,1)]
    
    knowledge_graph(df_temp)
    
    x = df_temp['index']
    y = df_temp['sentiment_score']
    
    # layout
    layout = go.Layout(
        title='Sentiment Scores from Vader',
        xaxis=dict(
            title='Article Index'),
        yaxis=dict(
            title='Scores')
    )
    
    # figure
    figure = go.Figure(layout=layout)
    
    # adding a scatter trace to the figure
    figure.add_trace(
        go.Scatter(
            mode='markers',
            x=x,
            y=y,
            marker=dict(
                color='turquoise',
                size=12,
                opacity=0.5,
                line=dict(
                    color='burlywood',
                    width=1)
                        ),
            showlegend=False
        )
    )
    
    # displaying the output (the figure)
    with output:
        display(figure.show())
        display(Image(filename='filename.png'))
        display(knowledge_graph(df_temp))

def colors_weights(score, score_thresholds_l = score_thresholds, colors_l = colors_list, weights_l = weights_list):
    for i in range(len(score_thresholds)):
        if score < score_thresholds[i]:
            return [colors_list[i], weights_list[i]]

def knowledge_graph(df):

    G = nx.Graph()

    edges = []

    entities = []

    sent_scores = []
    
    titles_links = []

    for i in range(len(df)):
        edges.append((df['category1'].iloc[i].upper(), df['index'].iloc[i]))
        sent_scores.append(df['sentiment_score'].iloc[i])
        entities.append((df['entities'].iloc[i]))
        
        d_temp = {}
        d_temp.update({'article_number': df['index'].iloc[i], 'title': df['title'].iloc[i], 'link': df['link'].iloc[i]})
        titles_links.append(d_temp)

    titles_links_df = pd.DataFrame.from_dict(titles_links)
    # making the links in the DataFrame clickable
    titles_links_df = HTML(titles_links_df.to_html(render_links=True, escape=False))
    
    entities_temp = []
    for i in range(len(edges)):
        G.add_edge(edges[i][0], edges[i][1], #length = round(sent_scores[i] + 1, 3),
                   color = colors_weights(round(sent_scores[i], 3))[0],
                    weight = colors_weights(round(sent_scores[i], 3))[1])
        for j in range(len(entities[i])):
            G.add_edge(edges[i][1], d_ents[entities[i][j]], color='darkgrey') #length = 1)
            entities_temp.append(d_ents[entities[i][j]])
            
    colors = list(nx.get_edge_attributes(G,'color').values())
    weights = list(nx.get_edge_attributes(G,'weight').values())
    
    display(cols_weights_thresholds(colors_list, weights_list, score_thresholds))
    
    display(titles_links_df)
    
    display(find_entities(entities_df, list(set(entities_temp))))
    
#     cmap = plt.cm.coolwarm

    nx.draw(G, with_labels=True, edge_color=colors, width=weights)#, cmap=cmap, vmin='black', vmax='lightblue')
    plt.savefig("filename.png")

#     sm = plt.cm.ScalarMappable(cmap=cmap)
#     sm.set_array([])
#     cbar = plt.colorbar(sm)
        
# creating an event handler (the value/date comes from the display variable)
def event_handler_date(value):
    global val_date
    val_date = value.new
    plot_sentiment(value.new) # with .new we are getting the most recent value coming from DatePicker
     
def dropdown_event_handler(value):
    global val_cat
    val_cat = value.new
    select_category(value.new) # with .new we are getting the most recent value coming from the dropdown menu
    
# binding the event handler to the DatePicker user input (via display(dp)) by observing the event (the user's click - when that
# happens the value the user provides is passed into the event_handler_date function, which then passes that into the 
# plot_sentiment function and the plot is generated)
dp.observe(event_handler_date, names='value')

# binding the event handler to the categories dropdown menu user input (via display(dropdown)) by observing the event
dropdown.observe(dropdown_event_handler, names='value')

# displaying the DatePicker
display(dp)

# displaying the dropdown menu
display(dropdown)

DatePicker(value=datetime.date(2022, 5, 1), description='Date')

Dropdown(description='Categories', options=('all', 'news', 'sport', 'future', 'culture', 'worklife', 'travel')…

In [20]:
# displaying the output coming from figure.show()
display(output)

Output()

In [47]:
# # function for hiding/revealing our code
# HTML('''<script>
# code_show=true;
# function code_toggle() {
#     if (code_show) {
#         $('div.input').hide();
#     } else {
#         $('div.input').show();
#     }
#     code_show = !code_show
# }
# $(document).ready(code_toggle);
# </script>
# <form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle the code on/off"></form>''')