In [1]:
import pandas as pd
def unique_actors(df):
    """
    return a list with all unique actors in the df
    :param support:
        Either "all", "sup", or "opp"
    :param df:
    :return:
    """

    return_list = []
    for ind, row in df.iterrows():
            return_list += row.CN.split(", ")


    return_list = list(set(return_list))
    if "" in return_list:
        return_list.remove("")
    return return_list

In [1]:

from collections import Counter
from itertools import combinations, product

import networkx as nx

import plotly.express as px
import plotly.graph_objects as go

def network_graph(df, total_actors):
    """
    :param total_actors:
    :param df:
    :return:
    """
    df_unique_actors = pd.DataFrame(unique_actors(df), columns=["Country"])
    total_mention = Counter()

    for ind, row in df.iterrows():
        lst_actor_in_row = sorted(row.CN.split(", "))
        for mention in lst_actor_in_row:
            total_mention[mention] += 1
    df_unique_actors['total'] = df_unique_actors['Country'].apply(lambda x: total_mention[x])


    # EDGE
    total_edge = Counter()

    for ind, row in df.iterrows():
        lst_actor_in_row = sorted(row.CN.split(", "))
        for edge in list(combinations(lst_actor_in_row, 2)):
            total_edge[edge] += 1


    df_edge = pd.DataFrame.from_dict(total_edge, orient="index").reset_index()
    df_edge = df_edge.rename(columns={'index': 'country_pair', 0: 'edge'})

    df_edge["c1"] = df_edge.country_pair.apply(lambda x: x[0])
    df_edge["c2"] = df_edge.country_pair.apply(lambda x: x[1])
    df_edge = df_edge.drop(columns="country_pair")

    # return df_edge
    # df_unique_actors = df_unique_actors.nlargest(total_actors, "total").reset_index(drop=True)
    # print(df_unique_actors)
    # cond_c1 = df_edge["c1"].isin(df_actors.Actor.values.tolist())
    # cond_c2 = df_edge["c2"].isin(df_actors.Actor.values.tolist())
    # df_edge = df_edge[cond_c1 & cond_c2]
    # return df_unique_actors
    # Networkx
    g = nx.Graph()
    nodesize = []
    for ind, row in df_unique_actors.iterrows():
        g.add_node(row["Country"])
        nodesize.append(row.total)
    maxi_node = max(nodesize)
    node_size = [100 * node / maxi_node for node in nodesize]
    for ind, row in df_edge.iterrows():
        c1 = row["c1"]
        c2 = row["c2"]
        count_edge = row["edge"]
        g.add_edge(c1, c2, weight=count_edge)

    pos = nx.spring_layout(g, k=1, iterations=200)

    edges = g.edges()


    # w = [g[u][v]['weight'] for u, v in edges]
    # maxi = max(w) if len(w) > 0 else 0

    for node in g.nodes:
        g.nodes[node]['pos'] = list(pos[node])

    # Middle point for hovering
    # middle_hover_trace = go.Scatter(x=[], y=[], hovertext=[], mode='markers', hoverinfo="text",
    #                                 marker={'size': 20, 'color': 'LightSkyBlue'}, opacity=0)

    # Edges = Lines logic
    edge_trace = []
    for ind, edge in enumerate(g.edges()):
        x0, y0 = pos[edge[0]]
        x1, y1 = pos[edge[1]]
        weight = 4 * (g[edge[0]][edge[1]]["weight"]) / maxi

        # # Color of the edge, blue or red or mix depending on support/opposition
        # red = g[edge[0]][edge[1]]["disagree"]
        # blue = g[edge[0]][edge[1]]["agree"]
        # red_blue = red + blue
        # red_color = red * 255 / red_blue
        # blue_color = blue * 255 * red_blue

        # Line logic
        trace = go.Scatter(
            x=[x0, x1, None], y=[y0, y1, None],
            line=dict(width=weight),
            # hovertext='Support = ' + str(blue) + '\nOpposition = ' + str(red),
            mode='lines')
        edge_trace.append(trace)
        #
        # # Middle-point logic
        # hovertext = str(edge) + ' :<br>Support = ' + str(blue) + '<br>Opposition = ' + str(red)
        # middle_hover_trace['x'] += tuple([(x0 + x1) / 2])
        # middle_hover_trace['y'] += tuple([(y0 + y1) / 2])
        # middle_hover_trace['hovertext'] += tuple([hovertext])

    # Nodes logic
    node_x = []
    node_y = []
    node_name = []
    for node in g.nodes():
        x, y = pos[node]
        node_x.append(x)
        node_y.append(y)
        node_name.append(str(node))

    node_trace = go.Scatter(
        x=node_x, y=node_y,
        mode='markers+text',
        hovertext=nodesize,
        # hoverinfo='',
        text=node_name,
        marker=dict(
            showscale=True,
            # colorscale options
            # 'Greys' | 'YlGnBu' | 'Greens' | 'YlOrRd' | 'Bluered' | 'RdBu' |
            # 'Reds' | 'Blues' | 'Picnic' | 'Rainbow' | 'Portland' | 'Jet' |
            # 'Hot' | 'Blackbody' | 'Earth' | 'Electric' | 'Viridis' |
            colorscale='YlGnBu',
            reversescale=True,
            color=[],
            size=node_size,
            colorbar=dict(
                thickness=15,
                title='Node Connections',
                xanchor='left',
                titleside='right'
            ),
            line_width=2))

    node_adjacencies = []
    node_text = []
    for node, adjacencies in enumerate(g.adjacency()):
        node_adjacencies.append(len(adjacencies[1]))
        node_text.append('# of connections: ' + str(len(adjacencies[1])))

    node_trace.marker.color = node_adjacencies
    # node_trace.text = node_text
    edge_trace.append(node_trace)
    # edge_trace.append(middle_hover_trace)
    fig = go.Figure(data=edge_trace,
                    layout=go.Layout(
                        title='<br>Network graph showing actors collaborations',
                        titlefont_size=16,
                        showlegend=False,
                        hovermode='closest',
                        margin=dict(b=20, l=5, r=5, t=40),
                        annotations=[dict(
                            text=str(len(df)) + "</a>",
                            showarrow=False,
                            xref="paper", yref="paper",
                            x=0.005, y=-0.002)],
                        xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                        yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))
                    )

    fig.update_traces(textposition='top center', textfont=dict(family='sans-serif', size=15, color='#000'))
    return fig

In [8]:
df = pd.read_pickle("../data/output/WoS_SDG.pkl")

In [35]:

df_edge = network_graph(df, total_actors=15)

In [53]:
df_sorted = df_edge.sort_values(by=['edge'], ascending=False)

In [54]:
df_sorted = df_sorted.reset_index(drop=True)


In [55]:
df_sorted

Unnamed: 0,edge,c1,c2
0,1247758,United States,United States
1,544572,China,China
2,358522,China,United States
3,198150,United Kingdom,United Kingdom
4,194510,United Kingdom,United States
...,...,...,...
10808,1,Gabon,Zambia
10809,1,Gabon,Mexico
10810,1,Costa Rica,Gabon
10811,1,Chad,Kosovo
