In [7]:
import pandas as pd
import datetime
import networkx as nx
import plotly.graph_objects as go
import plotly.express as px

In [8]:
file = pd.DataFrame()
for chunk in pd.read_csv("nn_all.csv", chunksize=4):
    file = pd.concat([file, chunk[chunk["label"]!=0]])

In [9]:
def to_str(v):
    return str(int(float(v)))
def to_int(v):
    return int(float(v))

In [10]:
file = file[file["label"]!=0]

In [11]:
file.label.value_counts()

1.0    30540
2.0    14684
3.0      630
Name: label, dtype: int64

In [13]:
file["all_tweet_id"] = file["tweet_id"].fillna(file["                    tweet_id"]).fillna(file["                tweet_id"])

In [14]:
file.drop(columns=["                    tweet_id","                tweet_id"],inplace = True)

In [15]:
file.columns

Index(['date', 'hts', 'is_quote', 'is_retweet', 'lat', 'likes', 'long',
       'mentions', 'original_tweet_id', 'quote_count', 'quoted_text',
       'replies', 'retweets', 'text', 'tweet_id', 'user', 'label', 'full_text',
       'original_tweet_label', 'all_tweet_id'],
      dtype='object')

In [20]:
file = file[file["original_tweet_id"]==file["original_tweet_id"]]
file["original_tweet_id"].isnull().value_counts()

False    36479
Name: original_tweet_id, dtype: int64

In [21]:
file["all_tweet_id"] = file["all_tweet_id"].map(to_int)
file["original_tweet_id"] = file["original_tweet_id"].map(to_int)

In [22]:
file.head()

Unnamed: 0,date,hts,is_quote,is_retweet,lat,likes,long,mentions,original_tweet_id,quote_count,quoted_text,replies,retweets,text,tweet_id,user,label,full_text,original_tweet_label,all_tweet_id
15841,2020-04-14 07:35:09,,False,True,,0.0,,,1249816310181068800,0.0,,0.0,0.0,RT @RealJamesWoods: South Dakota implements st...,,delyju,1.0,RT @RealJamesWoods: South Dakota implements st...,1.0,1249964395729317888
15842,2020-04-04 04:54:59,,False,True,,0.0,,,1246279260958003200,0.0,,0.0,0.0,RT @Observista_uk: Trump-backed anti-malaria d...,,EWilliams22101,1.0,RT @Observista_uk: Trump-backed anti-malaria d...,1.0,1246300206787751936
15843,2020-04-30 02:33:05,,False,True,,0.0,,,1255683322019266560,0.0,,0.0,0.0,RT @CNN: The US Food and Drug Administration h...,,TiamiyuAkeemo2,1.0,RT @CNN: The US Food and Drug Administration h...,1.0,1255686581077278720
15844,2020-04-14 08:19:42,,False,True,,0.0,,,1249737584118554624,0.0,,0.0,0.0,RT @eviefordham: South Dakota will be the firs...,,mmyscakes,1.0,RT @eviefordham: South Dakota will be the firs...,1.0,1249975604386160640
15845,2020-04-14 07:49:51,,False,True,,0.0,,,1249816310181068800,0.0,,0.0,0.0,RT @RealJamesWoods: South Dakota implements st...,,justright09,1.0,RT @RealJamesWoods: South Dakota implements st...,1.0,1249968095847796736


In [23]:
file.shape

(36479, 20)

In [24]:
day_list = []
for i in range(file.shape[0]):
    if(file.iloc[i]["date"] == file.iloc[i]["date"]):
        #print(str(file.iloc[i]["date"].split(" ")[0]))
        day = str(file.iloc[i]["date"].split(" ")[0])
        day_list.append(day)
    else:
        day_list.append("0")
file["day"] = day_list

In [25]:
days = file["day"].unique()
dates = [datetime.datetime.strptime(ts, "%Y-%m-%d") for ts in days]
dates.sort()
sorteddates = [datetime.datetime.strftime(ts, "%Y-%m-%d") for ts in dates]

In [27]:
G = nx.Graph()

for i in file.index:
    node1 = int(file[file.index==i]["all_tweet_id"].values[0])
    node1_label = int(file[file.index==i]["label"].values[0])
    node2 = int(file[file.index==i]["original_tweet_id"].values[0])
    node2_label = int(file[file.index==i]["original_tweet_label"].values[0])
    date = file[file.index==i]["day"].values[0]
    G.add_node(node1, cat=node1_label, day=date)
    G.add_node(node2, cat=node2_label, day=date)
    G.add_edge(node1, node2)

In [28]:
G.number_of_nodes(), G.number_of_edges()

(38576, 36464)

In [29]:
# store subgraphs in graph_dict = {date:subgraph}
graph_dict = {}
node_day_cum = []
i = 0
for day in sorteddates:
    node_day = [x for x,y in G.nodes(data=True) if y['day']==day]
    node_day_cum += node_day
    G_sub = G.subgraph(node_day_cum)
    graph_dict[day] = G_sub
    i += 1

In [30]:
def showGraph(G, date):
    pos = nx.spring_layout(G)

    edge_x = []
    edge_y = []
    for edge in G.edges():
        x0, y0 = pos[edge[0]]
        x1, y1 = pos[edge[1]]
        edge_x.append(x0)
        edge_x.append(x1)
        edge_x.append(None)
        edge_y.append(y0)
        edge_y.append(y1)
        edge_y.append(None)

    edge_trace = go.Scatter(
        x=edge_x, y=edge_y,
        line=dict(width=0.5, color='#888'),
        hoverinfo='text',
        mode='lines',
        marker = dict())

    node_x = []
    node_y = []
    for node in G.nodes():
        x, y = pos[node]
        node_x.append(x)
        node_y.append(y)

    node_trace = go.Scatter(
        x=node_x, y=node_y,
        mode='markers',
        hoverinfo='text',
        marker=dict(
            showscale=True,
            # colorscale options
            #'Greys' | 'YlGnBu' | 'Greens' | 'YlOrRd' | 'Bluered' | 'RdBu' |
            #'Reds' | 'Blues' | 'Picnic' | 'Rainbow' | 'Portland' | 'Jet' |
            #'Hot' | 'Blackbody' | 'Earth' | 'Electric' | 'Viridis' |
            colorscale='YlGnBu',
            reversescale=True,
            color=[],
            size=10,
            colorbar=dict(
                thickness=15,
                title='Node Connections',
                xanchor='left',
                titleside='right'
            ),
            line_width=2))

    node_adjacencies = []
    node_text = []
    for node, adjacencies in enumerate(G.adjacency()):
        node_adjacencies.append(len(adjacencies[1]))
        node_text.append('\n # of connections: '+str(len(adjacencies[1])))

    node_trace.marker.color = node_adjacencies
    node_trace.text = node_text

    fig = go.Figure(data=[edge_trace, node_trace],
                 layout=go.Layout(
                    title='<br>retweet network on date '+date,
                    titlefont_size=16,
                    showlegend=False,
                    hovermode='closest',
                    margin=dict(b=20,l=5,r=5,t=40),
                    annotations=[ dict(
                        text="",
                        showarrow=False,
                        xref="paper", yref="paper",
                        x=0.005, y=-0.002 ) ],
                    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))
                    )
    fig.show()

In [31]:
graph_dict.keys()

dict_keys(['2020-03-11', '2020-03-12', '2020-03-13', '2020-03-14', '2020-03-15', '2020-03-16', '2020-03-17', '2020-03-18', '2020-03-19', '2020-03-20', '2020-03-21', '2020-03-22', '2020-03-23', '2020-03-24', '2020-03-25', '2020-03-26', '2020-03-27', '2020-03-28', '2020-03-29', '2020-03-30', '2020-03-31', '2020-04-01', '2020-04-02', '2020-04-03', '2020-04-04', '2020-04-05', '2020-04-06', '2020-04-07', '2020-04-08', '2020-04-09', '2020-04-10', '2020-04-11', '2020-04-12', '2020-04-13', '2020-04-14', '2020-04-15', '2020-04-16', '2020-04-17', '2020-04-18', '2020-04-19', '2020-04-20', '2020-04-21', '2020-04-22', '2020-04-23', '2020-04-24', '2020-04-25', '2020-04-26', '2020-04-27', '2020-04-28', '2020-04-29', '2020-04-30', '2020-05-01', '2020-05-02'])

In [45]:
date = '2020-03-23'
H = graph_dict[date]
H_unfrozen = nx.Graph(H)
H_unfrozen.remove_nodes_from([node for node,degree in dict(H.degree()).items() if degree == 0])
showGraph(H_unfrozen, date)

In [44]:
day_list = []
cat_1_list = []
cat_2_list = []
cat_3_list = []

for day in graph_dict:
    cat_list = list(nx.get_node_attributes(graph_dict[day], name="cat").values())
    num_1 = cat_list.count(1)
    num_2 = cat_list.count(2)
    num_3 = cat_list.count(3)
    
    day_list.append(day)
    cat_1_list.append(num_1)
    cat_2_list.append(num_2)
    cat_3_list.append(num_3)

fig = go.Figure()
fig.add_trace(go.Scatter(x=day_list, y=cat_1_list,
                    mode='lines',
                    name='supportive',
                    line_shape='spline'))
fig.add_trace(go.Scatter(x=day_list, y=cat_2_list,
                    mode='lines',
                    name='denial',
                    line_shape='spline'))
fig.add_trace(go.Scatter(x=day_list, y=cat_3_list,
                    mode='lines', 
                    name='neutral',
                    line_shape='spline'))

fig.show()