In [2]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

In [52]:
def get_hashtag_network(df, hashtag: str):
    ''' returns a dataframe with the edges and timestamp from tweets feed (df)'''
    
    mask = df["hashtags"].apply(lambda x: hashtag in x)
    filteredDf = df[mask]

    edges_df = filteredDf[['screen_name', 'reply_to_screen_name', 'created_at']]
    
    edges_df = edges_df.dropna()
    edges_df = edges_df.drop_duplicates().reset_index(drop=True)
    edges_df = edges_df.rename(columns={'screen_name': 'from', 'reply_to_screen_name': 'to'})
    
    return edges_df


def get_strongly_gcc(G):
    """ get the giant strongly connected component of G""" 
    SGcc = max(nx.strongly_connected_components(G), key=len)
    SGcc = G.subgraph(SGcc)
    return SGcc

def get_weakly_gcc(G):
    """ get the giant weakly connected component of G"""  
    WGcc = max(nx.weakly_connected_components(G), key=len)
    WGcc = G.subgraph(WGcc)
    return WGcc
   

def plot_network(G, G_degree=None):
    ''' plot the graph with varying nodesize '''
    
    plt.figure(num=None, figsize=(15, 15), dpi=60, facecolor='w', edgecolor='k')
    pos = nx.spring_layout(G)
    
    if G_degree:
        node_size=[v * 10 for v in dict(G_degree).values()]
    else:
        node_size = 1
        
    nx.draw(G, pos, nodelist=dict(G_degree).keys(), node_size=node_size, width=0.5, alpha=0.5, edge_color='b')
    plt.axis('off')
    plt.show()

In [5]:
# when we merge dataset and put it in csv, these no longer retain their original types
# so we need the converters + eval to bring their original form back basically
df = pd.read_csv('data/merged-dataset.csv', converters={"hashtags": eval, "mentions": eval, "is_quote": eval}) 
df.columns

Index(['status_id', 'created_at', 'screen_name', 'text', 'source',
       'reply_to_status_id', 'reply_to_screen_name', 'is_quote', 'is_retweet',
       'favourites_count', 'retweet_count', 'country_code', 'followers_count',
       'friends_count', 'verified', 'lang', 'hashtags', 'mentions'],
      dtype='object')

In [57]:
get_hashtag_network(df, 'COVID19')

Unnamed: 0,from,to,created_at
0,DZMMTeleRadyo,DZMMTeleRadyo,2020-03-29T00:01:14Z
1,CPHO_Canada,CPHO_Canada,2020-03-29T00:03:14Z
2,zorayapuente,Reylope13,2020-03-29T00:05:37Z
3,picardonhealth,picardonhealth,2020-03-29T00:09:29Z
4,threadreaderapp,DrJenniferCo_Vu,2020-03-29T00:10:31Z
...,...,...,...
20003,RepEscobar,RepEscobar,2020-04-30T23:33:22Z
20004,TNHORSEFARMS,TNHORSEFARMS,2020-04-30T23:39:21Z
20005,CPHO_Canada,CPHO_Canada,2020-04-30T23:49:50Z
20006,CPHO_Canada,CPHO_Canada,2020-04-30T23:49:51Z
