In [1]:
#Import modules
%matplotlib inline

#handling data
import pandas as pd
import numpy as np

#import twitter credentials
from twitter_credentials import *

#handling information
import re
import json

#handling plots
import matplotlib.pyplot as plt

#for network creation
import networkx as nx

In [204]:
tweets_df = pd.read_json("tweets.txt", lines=True)

In [213]:
tweets_df.columns

Index(['contributors', 'coordinates', 'created_at', 'display_text_range',
       'entities', 'extended_entities', 'extended_tweet', 'favorite_count',
       'favorited', 'filter_level', 'geo', 'id', 'id_str',
       'in_reply_to_screen_name', 'in_reply_to_status_id',
       'in_reply_to_status_id_str', 'in_reply_to_user_id',
       'in_reply_to_user_id_str', 'is_quote_status', 'lang', 'place',
       'possibly_sensitive', 'quote_count', 'quoted_status',
       'quoted_status_id', 'quoted_status_id_str', 'quoted_status_permalink',
       'reply_count', 'retweet_count', 'retweeted', 'retweeted_status',
       'source', 'text', 'timestamp_ms', 'truncated', 'user'],
      dtype='object')

In [237]:
tweets_final = pd.DataFrame(columns = ["created_at", "id", "in_reply_to_screen_name", "in_reply_to_status_id", "in_reply_to_user_id",
                                      "retweeted_id", "retweeted_screen_name", "quoted_status", "quoted_status_id", "user_mentions_screen_name", "user_mentions_id", 
                                       "text", "user_id", "screen_name", "followers_count"])

In [238]:
equal_columns = ["created_at", "id", "text"]
tweets_final[equal_columns] = tweets_df[equal_columns]

In [239]:
def get_basics(tweets_final):
    tweets_final["screen_name"] = tweets_df["user"].apply(lambda x: x["screen_name"])
    tweets_final["user_id"] = tweets_df["user"].apply(lambda x: x["id"])
    tweets_final["followers_count"] = tweets_df["user"].apply(lambda x: x["followers_count"])
    return tweets_final

In [273]:
def get_usermentions(tweets_final):
    tweets_final["user_mentions_screen_name"] = tweets_df["entities"].apply(lambda x: x["user_mentions"][0]["screen_name"] if x["user_mentions"] else np.nan)
    tweets_final["user_mentions_id"] = tweets_df["entities"].apply(lambda x: x["user_mentions"][0]["id_str"] if x["user_mentions"] else np.nan)
    return tweets_final

In [274]:
def get_retweets(tweets_final):
    tweets_final["retweeted_screen_name"] = tweets_df["retweeted_status"].apply(lambda x: x["user"]["screen_name"] if x is not np.nan else np.nan)
    tweets_final["retweeted_id"] = tweets_df["retweeted_status"].apply(lambda x: x["user"]["id_str"] if x is not np.nan else np.nan)
    return tweets_final

In [284]:
def get_in_reply(tweets_final):
    tweets_final["in_reply_to_screen_name"] = tweets_df["in_reply_to_screen_name"]
    tweets_final["in_reply_to_status_id"] = tweets_df["in_reply_to_status_id"]
    tweets_final["in_reply_to_user_id"] = tweets_df["in_reply_to_user_id_str"]
    return tweets_final

In [281]:
def get_retweet(tweets_final):
    tweets_final["retweeted_status"] = tweets_df["retweeted_status"]
    tweets_final["quoted_status"] = tweets_df["quoted_status"]
    tweets_final["quoted_status_id"] = tweets_df["quoted_status_id"]
    return tweets_final

In [346]:
def fill_df(tweets_final):
    get_basics(tweets_final)
    get_usermentions(tweets_final)
    get_retweet(tweets_final)
    get_in_reply(tweets_final)
    get_retweets(tweets_final)
    tweets_final.fillna(value=0, inplace=True)
    return tweets_final

In [352]:
def get_interactions(row):
    user = row["user_id"], row["screen_name"]
    if user[0] == 0:
        return (None, None), []
    interactions = set()
    
    # Add all interactions
    interactions.add((row["in_reply_to_user_id"], row["in_reply_to_screen_name"]))
    interactions.add((row["retweeted_id"], row["retweeted_screen_name"]))
    interactions.add((row["user_mentions_id"], row["user_mentions_screen_name"]))
    
    # Discard if user id is in interactions
    interactions.discard((row["user_id"], row["screen_name"]))
    interactions.discard((0.0, 0))
    interactions.discard((0.0, 0.0))
    # Return user and interactions
    return user, interactions

In [353]:
tweets_final = fill_df(tweets_final)

In [354]:
tweets_final.head()

Unnamed: 0,created_at,id,in_reply_to_screen_name,in_reply_to_status_id,in_reply_to_user_id,retweeted_id,retweeted_screen_name,quoted_status,quoted_status_id,user_mentions_screen_name,user_mentions_id,text,user_id,screen_name,followers_count,retweeted_status
0,2019-03-30 20:14:37,1112085741683638272,0,0.0,0.0,938373003355635712,4fnan_zainal,{'created_at': 'Wed Mar 27 20:38:37 +0000 2019...,1.111005e+18,4fnan_zainal,938373003355635712,RT @4fnan_zainal: Pls ban kids masa showtime a...,925075656127365120,iRfansyamill,39,{'created_at': 'Sat Mar 30 12:25:59 +0000 2019...
1,2019-03-30 20:14:39,1112085751267762177,0,0.0,0.0,257265441,skipbolden,0,0.0,skipbolden,257265441,RT @skipbolden: The Avengers (TV Series 1961–1...,812752256214925312,Brianlovesmovi,5906,{'created_at': 'Sat Mar 30 00:44:39 +0000 2019...
2,2019-03-30 20:14:41,1112085758851059714,0,0.0,0.0,0,0,0,0.0,0,0,Looking forward to the Avengers End Game 😍 #av...,924977392535339010,GadgetSwitched,29,0
3,2019-03-30 20:14:41,1112085761569054723,0,0.0,0.0,254333914,Hypable,0,0.0,Hypable,254333914,RT @Hypable: 'Avengers: Endgame' posters have ...,1377394638,Christian_sexy7,266,{'created_at': 'Sat Mar 30 17:15:19 +0000 2019...
4,2019-03-30 20:14:41,1112085762030358530,MTV_iwan,1.112086e+18,8.525729e+17,0,0,0,0.0,MTV_iwan,852572866512027650,@MTV_iwan @theflairqueenx @BeckyLynchWWE I gue...,1105100693147394048,QuincyWard15,28,0


In [355]:
graph = nx.DiGraph()

In [363]:
for index, tweet in tweets_final.iterrows():
    user, interactions = get_interactions(tweet)
    user_id, user_name = user
    tweet_id = tweet["id"]
    for interaction in interactions:
        int_id, int_name = interaction
        graph.add_edge(user_id, int_id, tweet_id=tweet_id)
        
        graph.node[user_id]["name"] = user_name
        graph.node[int_id]["name"] = int_name       

In [364]:
interactions

{('3263526110', 'urvong')}