In [18]:
# Load libs and define helper functions
from typing import List, FrozenSet, Tuple
import os

import pandas as pd
import numpy as np

from pyvis.network import Network as VisNetwork


class EdgeList:
    
    def __init__(self, edge_list: str) -> None:
        """Load and pre-process edge list, also collect metadata 
            for downstream processing"""
            
        # Expected columns in source edge list df
        cols_expected = {
            'year':                int, 
            'entity1':             str, 
            'entity2':             str, 
            'co_mention_count':    int, 
            'tone_sum':            float, 
            'tone_sum':            float, 
            'co_mention_tone_avg': float, 
            'org_flag':            bool, 
            'person_flag':         bool
        }
        
        # Initialize edge list data frame
        if isinstance(edge_list, str):
            if not os.path.exists(edge_list):
                raise FileNotFoundError(f'Cannot find edge list source file @ <{edge_list}>')
            df_edge = pd.read_csv(edge_list, usecols=list(cols_expected.keys()), dtype=cols_expected)
        else:
            raise ValueError(f'<edge_list> should either be filename str; got <{type(edge_list)}>')
            
        # Preprocess pipline
        self.df_edge = self._preprocess(df_edge)
        
    def _preprocess(self, df_edge: pd.DataFrame) -> pd.DataFrame:
        """Edge list preprocessing pipeline. Called during initialization"""
        
        # Make sure entity names are titled
        df_edge.loc[:, 'entity1'] = df_edge.entity1.str.title()
        df_edge.loc[:, 'entity2'] = df_edge.entity2.str.title()
        
        return df_edge.sort_values('year')
        
    @property
    def df(self) -> pd.DataFrame:
        return self.df_edge
    
    @property
    def years(self) -> np.ndarray:
        return np.sort(self.df.year.unique())
    
    @property
    def entities(self) -> FrozenSet:
        ret = set(self.df.entity1).union(set(self.df.entity2))
        return frozenset(ret)
        
    @property
    def mask_ppl(self) -> np.ndarray:
        """Return boolean index of row with ppl entities only"""
        
        return np.logical_and(
            self.df.person_flag, 
            np.logical_not(self.df.org_flag)
        )

    @property
    def mask_org(self) -> np.ndarray:
        """Return boolean index of row with org entities only"""
        
        return np.logical_and(
            np.logical_not(self.df.person_flag), 
            self.df.org_flag
        )
        
    @property
    def mask_mix(self) -> np.ndarray:
        """Return boolean index of row with both ppl AND org entities only"""
        
        return np.logical_and(
            self.df.person_flag, 
            self.df.org_flag
        )

    def draw(self, canvas: VisNetwork) -> VisNetwork:
        """Helper function to populate and set attributes of the
            network visualizations. Assuming the input canvas (net) is empty"""

        # Add nodes
        for i, name in enumerate(self.entities):
            canvas.add_node(
                i,
                value=2,
                label=name,
                title=name
            )

        # Add edges
        # for edge in g.es:
        #     net.add_edge(
        #         edge.source, edge.target,
        #         title=round(edge['edge_weight'], 2),
        #         value=abs(edge['edge_weight']),
        #         color='red' if (edge['edge_weight'] < cutoff) else 'blue'
        #     )

        canvas.repulsion(central_gravity=0.1, spring_length=512)
        canvas.show_buttons(filter_=['nodes', 'edges', 'physics'])
        return canvas

In [20]:
el = EdgeList('../data/sec_edge_only.csv')

In [21]:
el.entities

frozenset({'Abba Kyari',
           'Abdulmumin Jibrin',
           'Abdulsalami Abubakar',
           'Abiola Ajimobi',
           'Abubakar Malami',
           'Abubakar Shekau',
           'Academic Staff Union Of Universities',
           'Access Bank',
           'Action Congress',
           'Adama Barrow',
           'Adams Oshiomhole',
           'Adamu Adamu',
           'Addis Ababa',
           'African Development Bank',
           'African Union',
           'Agboola Ajayi',
           'Ahmad Lawan',
           'Ahmadu Bello',
           'Ahmed Lawan',
           'Ahmed Makarfi',
           'Ahmed Tinubu',
           'Airports Authority Of Nigeria',
           'Airtel',
           'Aisha Buhari',
           'Akinwunmi Ambode',
           'Alex Ekwueme',
           'Alexis Akwagyiram',
           'Alhaji Atiku Abubakar',
           'Alhaji Lai Mohammed',
           'Ali Modu Sheriff',
           'Aliko Dangote',
           'Alliance For Democracy',
           'American Petr