In [1]:
import pandas as pd
import networkx as nx
import nx_altair as nxa
import altair as alt
import re

In [2]:
df_train = pd.read_csv('assets/df_train_expand.csv')
df_dev = pd.read_csv('assets/df_dev_expand.csv')
df_test = pd.read_csv('assets/df_test_expand.csv')

In [3]:
df = pd.concat([df_train, df_dev, df_test])
df = df[df['Response-Code'] == 'Successful']

In [4]:
df.head()

Unnamed: 0,id_str,tweet_text,class_label,place,disaster,year,contributors,coordinates,created_at,entities,...,user_id_str,user_name,user_screen_name,user_location,user_verified,user_followers_count,user_friends_count,user_listed_count,user_statuses_count,user_created_at
0,735891446960623616,RT @DonBradshawNTV: How @MarshallAmpsUK came t...,other_relevant_information,canada,wildfires,2016,,,2016-05-26 17:52:47+00:00,"{'hashtags': [], 'symbols': [], 'user_mentions...",...,2246413000.0,🎸 Owen 📷✪,OrussellRussell,"Stephenville, NL",0.0,2924.0,3797.0,34.0,65498.0,2013-12-27 15:59:20+00:00
2,733665357236342784,Interesting insights on the shifting communica...,other_relevant_information,canada,wildfires,2016,,,2016-05-20 14:27:06+00:00,"{'hashtags': [{'text': 'wildfire', 'indices': ...",...,292865800.0,Kabi,The_Fuzz74,"Ottawa, Canada",0.0,952.0,566.0,442.0,23321.0,2011-05-04 11:07:37+00:00
3,731963038429929472,RT @globeandmail: Oil sands producers helping ...,rescue_volunteering_or_donation_effort,canada,wildfires,2016,,,2016-05-15 21:42:42+00:00,"{'hashtags': [], 'symbols': [], 'user_mentions...",...,41821670.0,Jim Hair,AgFreeAgent,Picton,0.0,1328.0,1322.0,59.0,31208.0,2009-05-22 14:28:50+00:00
4,728674838034944001,Ottawa to match Red Cross donations for Fort M...,rescue_volunteering_or_donation_effort,canada,wildfires,2016,,,2016-05-06 19:56:34+00:00,"{'hashtags': [], 'symbols': [], 'user_mentions...",...,551013500.0,P. M. Torres 🍁,torresimagery,Canada,0.0,1948.0,2726.0,0.0,37693.0,2012-04-11 13:56:57+00:00
5,731976125279289349,Active Fires - Ontario #on411 #onfires #wildf...,caution_and_advice,canada,wildfires,2016,,,2016-05-15 22:34:42+00:00,"{'hashtags': [{'text': 'on411', 'indices': [49...",...,7.311254e+17,310-Fire,Call310Fire,Canada,0.0,32.0,58.0,17.0,205.0,2016-05-13 14:14:23+00:00


In [5]:
def create_event_name(df):
    year = df['year']
    place = re.sub(r"\(.*\)", "", df['place'])
    place = place.replace('srilanka', 'sri lanka').replace('_', ' ')
    disaster = df['disaster']
    if disaster == 'hurricane' or disaster == 'cyclone':
        return str(year) + ' ' + disaster.title() + ' ' + place.title().replace('Us', 'U.S.')
    else:
        return str(year) + ' ' + place.title().replace('Us', 'U.S.') + ' ' + disaster.title()

In [6]:
df['event_name'] = df.apply(create_event_name, axis=1)

In [17]:
sorted(df['class_label'].unique())

['caution_and_advice',
 'displaced_people_and_evacuations',
 'infrastructure_and_utility_damage',
 'injured_or_dead_people',
 'missing_or_found_people',
 'not_humanitarian',
 'other_relevant_information',
 'requests_or_urgent_needs',
 'rescue_volunteering_or_donation_effort',
 'sympathy_and_support']

In [26]:
def build_network(df, event_name):
    df_event = df[df['event_name'] == event_name]
    
    edges = dict()
    for _, row in df_event.iterrows():
        user_screen_name = row['user_screen_name']
        user_mentions_screen_name = row['entities_user_mentions_screen_name'].strip('[]').replace("'", '').split(',')
        for user_mention in user_mentions_screen_name:
            if user_screen_name == user_mention or len(user_mention) == 0:
                continue
            elif user_screen_name + '-' + user_mention in edges:
                edges[user_screen_name + '-' + user_mention]['count'] = edges[user_screen_name + '-' + user_mention]['count'] + 1
            elif user_mention + '-' + user_screen_name in edges:
                edges[user_mention + '-' + user_screen_name]['count'] = edges[user_mention + '-' + user_screen_name]['count'] + 1
            else:
                edges[user_screen_name + '-' + user_mention] = {'count':1, 'node_1':user_screen_name, 'node_2':user_mention}

    edges_df = pd.DataFrame(edges).T
    edge_list = list(edges_df[['node_1','node_2','count']].itertuples(index=False, name=None))
    G_w = nx.Graph()
    G_w.add_weighted_edges_from(edge_list, with_labels=True)
    
    for _, row in df_event.iterrows():
        user_screen_name = row['user_screen_name']
        class_label = row['class_label']
        if user_screen_name not in G_w.nodes():
            G_w.add_node(user_screen_name)
            G_w.nodes[user_screen_name]['tweet_count'] = 1
        else:
            G_w.nodes[user_screen_name]['tweet_count'] = G_w.nodes[user_screen_name].get('tweet_count', 0) + 1
        G_w.nodes[user_screen_name]['label'] = user_screen_name
        G_w.nodes[user_screen_name][class_label] = 1
        
    return G_w

In [45]:
def draw_network(G, event_name, class_label):

    nodes_selected = [x for x,y in G.nodes(data=True) if class_label in y]
    if len(nodes_selected) > 0:
        G_sub = G.subgraph(nodes_selected)

        pos = nx.kamada_kawai_layout(G_sub)

        e = nxa.draw_networkx_edges(G_sub, pos=pos)  # get the edge layer
        n = nxa.draw_networkx_nodes(G_sub, pos=pos)  # get the node layer 

        n = n.mark_circle(opacity=1).encode(
        #     color=alt.Color('community:N', legend=None),
            size=alt.Size('tweet_count:Q', 
                          legend=alt.Legend(symbolFillColor='black', symbolStrokeWidth=0), 
                          scale=alt.Scale(
                              range=[10,500], 
                          )
            ),
            tooltip=['label', 'tweet_count']
        )
        e = e.mark_line(opacity=0.8).encode(
            color=alt.Color('weight:Q', legend=None),
            size=alt.Size('weight:Q',
                           scale=alt.Scale(range=[1,2]),
                          legend=None
                         )
        )

        chart = (e+n).properties(
                width=500,
                height=500,
                title=['Network Graph of', class_label + ' tweets', 'During ' + event_name]
            ).resolve_scale(
                size='independent'
            )

        chart.display()
        #return chart

In [46]:
def draw_network_by_event(df, event_name):
    G = build_network(df, event_name)
    for class_label in sorted(df['class_label'].unique()):
        draw_network(G, event_name, class_label)

In [7]:
sorted(df['event_name'].unique())

['2016 Canada Wildfires',
 '2016 Ecuador Earthquake',
 '2016 Hurricane Matthew',
 '2016 Italy Earthquake',
 '2016 Kaikoura Earthquake',
 '2017 Hurricane Harvey',
 '2017 Hurricane Irma',
 '2017 Hurricane Maria',
 '2017 Puebla Earthquake',
 '2017 Sri Lanka Floods',
 '2018 California Wildfires',
 '2018 Hurricane Florence',
 '2018 Kerala Floods',
 '2019 Cyclone Idai',
 '2019 Hurricane Dorian',
 '2019 Midwestern U.S. Floods',
 '2019 Pakistan Earthquake']

In [48]:
draw_network_by_event(df, '2016 Canada Wildfires')

In [49]:
draw_network_by_event(df, '2016 Ecuador Earthquake')

In [None]:
draw_network_by_event(df, '2017 Sri Lanka Floods')