# 5. Mapping the Twitter Connections of the Candidates

In [1]:
import tweepy, json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
from datetime import datetime as dt

import re

%matplotlib inline

In [2]:
from IPython.core.display import HTML
HTML("""
<style>
.output_png {
    display: table-cell;
    text-align: center;
    vertical-align: middle;
}
</style>
""")

In [3]:
mcauliffe_tweets = pd.read_pickle('pickle_files/mcauliffe_tweets_df.pkl')
youngkin_tweets = pd.read_pickle('pickle_files/youngkin_tweets_df.pkl')
murphyNJ_tweets = pd.read_pickle('pickle_files/murphyNJ_tweets_df.pkl')
ciattarelliNJ_tweets = pd.read_pickle('pickle_files/ciattarelliNJ_tweets_df.pkl')

In [4]:
all_tweets = pd.concat([mcauliffe_tweets,youngkin_tweets,murphyNJ_tweets,ciattarelliNJ_tweets]).reset_index(drop=True)

In [5]:
all_tweets.head()

Unnamed: 0,tweet_id,created_at,tweet_text,num_favorites,num_retweets,in_reply_to_screen_name,in_reply_to_tweet_id,user_id,user_name,user_handle,...,qt_tweet_id,qt_tweet_user_id,qt_tweet_user_handle,qt_tweet_text,mentions,hashtags,datetime,mentions_biden,mentions_trump,mentions_opponent
0,1455894456801038346,2021-11-03 13:47:42+00:00,https://t.co/y6DyoHlJPu,6390,957,,,19471123,Terry McAuliffe,TerryMcAuliffe,...,,,,,,,2021-11-03 13:47:42,0,0,0
1,1455725530280509441,2021-11-03 02:36:27+00:00,We know that the long term path of Virginia is...,2975,324,TerryMcAuliffe,1.455725529110401e+18,19471123,Terry McAuliffe,TerryMcAuliffe,...,,,,,,,2021-11-03 02:36:27,0,0,0
2,1455725529110401030,2021-11-03 02:36:26+00:00,To all my supporters across Virginia who knock...,2859,288,TerryMcAuliffe,1.4557255273906217e+18,19471123,Terry McAuliffe,TerryMcAuliffe,...,,,,,,,2021-11-03 02:36:26,0,0,0
3,1455725527390621700,2021-11-03 02:36:26+00:00,"Folks, not everything is counted and we’re sti...",9847,1382,,,19471123,Terry McAuliffe,TerryMcAuliffe,...,,,,,,,2021-11-03 02:36:26,0,0,0
4,1455672063042129929,2021-11-02 23:03:59+00:00,"🗳 Virginia Voters! If you’re in line, stay in ...",9848,2547,,,19471123,Terry McAuliffe,TerryMcAuliffe,...,,,,,,,2021-11-02 23:03:59,0,0,0


In [6]:
all_tweets.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9054 entries, 0 to 9053
Data columns (total 27 columns):
 #   Column                   Non-Null Count  Dtype              
---  ------                   --------------  -----              
 0   tweet_id                 9054 non-null   object             
 1   created_at               9054 non-null   datetime64[ns, UTC]
 2   tweet_text               9054 non-null   object             
 3   num_favorites            9054 non-null   int64              
 4   num_retweets             9054 non-null   int64              
 5   in_reply_to_screen_name  714 non-null    object             
 6   in_reply_to_tweet_id     709 non-null    object             
 7   user_id                  9054 non-null   object             
 8   user_name                9054 non-null   object             
 9   user_handle              9054 non-null   object             
 10  user_location            9054 non-null   object             
 11  user_followers           9054 

In [7]:
all_tweets_replys = all_tweets[['user_handle','in_reply_to_screen_name']].copy()              
all_tweets_rts = all_tweets[['user_handle','rt_tweet_user_handle']].copy()
all_tweets_qts = all_tweets[['user_handle','qt_tweet_user_handle']].copy()
all_tweets_mentions = all_tweets[['user_handle','mentions']].copy()

In [8]:
all_tweets_replys = all_tweets_replys[all_tweets_replys.in_reply_to_screen_name.notna()].reset_index(drop = True)
all_tweets_replys.head()

Unnamed: 0,user_handle,in_reply_to_screen_name
0,TerryMcAuliffe,TerryMcAuliffe
1,TerryMcAuliffe,TerryMcAuliffe
2,TerryMcAuliffe,TerryMcAuliffe
3,TerryMcAuliffe,TerryMcAuliffe
4,TerryMcAuliffe,TerryMcAuliffe


In [9]:
all_tweets_rts = all_tweets_rts[all_tweets_rts.rt_tweet_user_handle.notna()].reset_index(drop = True)
all_tweets_rts.head(2)

Unnamed: 0,user_handle,rt_tweet_user_handle
0,TerryMcAuliffe,ElaineLuriaVA
1,TerryMcAuliffe,timkaine


In [10]:
all_tweets_qts = all_tweets_qts[all_tweets_qts.qt_tweet_user_handle.notna()].reset_index(drop = True)
all_tweets_qts.head(2)

Unnamed: 0,user_handle,qt_tweet_user_handle
0,TerryMcAuliffe,GovernorVA
1,TerryMcAuliffe,RedWineBlueUSA


In [11]:
all_tweets_mentions = all_tweets_mentions[all_tweets_mentions.mentions.notna()].reset_index(drop = True)
all_tweets_mentions = all_tweets_mentions.explode('mentions', ignore_index = True)
all_tweets_mentions.head()

Unnamed: 0,user_handle,mentions
0,TerryMcAuliffe,ElaineLuriaVA
1,TerryMcAuliffe,timkaine
2,TerryMcAuliffe,SpanbergerVA07
3,TerryMcAuliffe,TerryMcAuliffe
4,TerryMcAuliffe,MarkHerringVA


In [12]:
all_tweets_mentions.shape

(5560, 2)

### Add strength of connection term based on how many times it matches up 

In [13]:
all_tweets_replys_strength = all_tweets_replys.groupby(['user_handle',
                                        'in_reply_to_screen_name']
                                        ).size().reset_index(name = 'strength').sort_values(by='strength',
                                                                                            ascending=False)
all_tweets_replys_strength.head()

Unnamed: 0,user_handle,in_reply_to_screen_name,strength
14,GovMurphy,GovMurphy,324
20,Jack4NJ,Jack4NJ,148
55,TerryMcAuliffe,TerryMcAuliffe,127
2,GlennYoungkin,GlennYoungkin,58
40,Jack4NJ,declanoscanlon,2


In [14]:
all_tweets_rts_strength = all_tweets_rts.groupby(['user_handle',
                                        'rt_tweet_user_handle']
                                        ).size().reset_index(name = 'strength').sort_values(by='strength',
                                                                                            ascending=False)
all_tweets_rts_strength.head()

Unnamed: 0,user_handle,rt_tweet_user_handle,strength
84,GlennYoungkin,TeamYoungkin,64
215,GovMurphy,FirstLadyNJ,50
10,GlennYoungkin,C_RMartinez,33
35,GlennYoungkin,JasonMiyaresVA,27
667,TerryMcAuliffe,DSMcAuliffe,26


In [15]:
all_tweets_qts_strength = all_tweets_qts.groupby(['user_handle',
                                        'qt_tweet_user_handle']
                                        ).size().reset_index(name = 'strength').sort_values(by='strength',
                                                                                            ascending=False)
all_tweets_qts_strength.head()

Unnamed: 0,user_handle,qt_tweet_user_handle,strength
219,TerryMcAuliffe,GovernorVA,27
218,TerryMcAuliffe,GlennYoungkin,14
253,TerryMcAuliffe,POTUS,13
41,GlennYoungkin,TerryMcAuliffe,12
295,TerryMcAuliffe,bluevirginia,11


In [16]:
all_tweets_mentions_strength = all_tweets_mentions.groupby(['user_handle',
                                        'mentions']
                                        ).size().reset_index(name = 'strength').sort_values(by='strength',
                                                                                            ascending=False)
all_tweets_mentions_strength.head()

Unnamed: 0,user_handle,mentions,strength
430,GovMurphy,GovMurphy,369
1449,TerryMcAuliffe,TerryMcAuliffe,252
54,GlennYoungkin,GlennYoungkin,174
635,GovMurphy,POTUS,94
155,GlennYoungkin,TeamYoungkin,89


In [17]:
column_labels = ['node_user', 'edge_user', 'connection_strength']

In [18]:
all_tweets_replys_strength.columns = column_labels
all_tweets_rts_strength.columns = column_labels
all_tweets_qts_strength.columns = column_labels
all_tweets_mentions_strength.columns = column_labels

In [19]:
all_tweets_replys_strength.reset_index(drop = True, inplace = True)
all_tweets_rts_strength.reset_index(drop = True, inplace = True)
all_tweets_qts_strength.reset_index(drop = True, inplace = True)
all_tweets_mentions_strength.reset_index(drop = True, inplace = True)

In [20]:
all_tweets_replys_strength.head(2)

Unnamed: 0,node_user,edge_user,connection_strength
0,GovMurphy,GovMurphy,324
1,Jack4NJ,Jack4NJ,148


In [21]:
all_tweets_rts_strength.head(2)

Unnamed: 0,node_user,edge_user,connection_strength
0,GlennYoungkin,TeamYoungkin,64
1,GovMurphy,FirstLadyNJ,50


In [22]:
all_tweets_qts_strength.head(2)

Unnamed: 0,node_user,edge_user,connection_strength
0,TerryMcAuliffe,GovernorVA,27
1,TerryMcAuliffe,GlennYoungkin,14


In [23]:
all_tweets_mentions_strength.head(2)

Unnamed: 0,node_user,edge_user,connection_strength
0,GovMurphy,GovMurphy,369
1,TerryMcAuliffe,TerryMcAuliffe,252


In [24]:
all_tweet_connections = pd.concat([all_tweets_replys_strength,
                                  all_tweets_rts_strength,
                                  all_tweets_qts_strength,
                                  all_tweets_mentions_strength])
all_tweet_connections.reset_index(drop = True, inplace = True)
all_tweet_connections.head()

Unnamed: 0,node_user,edge_user,connection_strength
0,GovMurphy,GovMurphy,324
1,Jack4NJ,Jack4NJ,148
2,TerryMcAuliffe,TerryMcAuliffe,127
3,GlennYoungkin,GlennYoungkin,58
4,Jack4NJ,declanoscanlon,2


In [25]:
all_tweet_connections.shape

(2842, 3)

In [26]:
all_tweet_connections = all_tweet_connections[all_tweet_connections.node_user != all_tweet_connections.edge_user]
all_tweet_connections.reset_index(drop = True, inplace = True)
all_tweet_connections.head()

Unnamed: 0,node_user,edge_user,connection_strength
0,Jack4NJ,declanoscanlon,2
1,Jack4NJ,GovMurphy,2
2,GlennYoungkin,AmandaChaseVA,1
3,Jack4NJ,imanurse,1
4,Jack4NJ,hyper_strong,1


In [27]:
all_tweet_connections = all_tweet_connections.groupby(['node_user',
                                        'edge_user']
                                        ).agg({'connection_strength':'sum'})
all_tweet_connections.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,connection_strength
node_user,edge_user,Unnamed: 2_level_1
GlennYoungkin,107Wchv,1
GlennYoungkin,8NEWS,1
GlennYoungkin,AJFriedenberger,1
GlennYoungkin,AmandaChaseVA,3
GlennYoungkin,AmericaNewsroom,6


In [28]:
all_tweet_connections.shape

(1786, 1)

In [29]:
node_screen_name = ['TerryMcAuliffe','GlennYoungkin','GovMurphy','Jack4NJ']
node_label = ['TerryMcAuliffe','GlennYoungkin','GovMurphy','Jack4NJ']
location = ['Virginia','Virginia','New Jersey','New Jersey']
followers_count = [mcauliffe_tweets.user_followers[0],
                  youngkin_tweets.user_followers[0],
                  murphyNJ_tweets.user_followers[0],
                  ciattarelliNJ_tweets.user_followers[0]]
friends_count = [mcauliffe_tweets.user_following[0],
                youngkin_tweets.user_following[0],
                murphyNJ_tweets.user_following[0],
                ciattarelliNJ_tweets.user_following[0]]

In [30]:
node_dict = {'node_screen_name':node_screen_name,
            'node_label':node_label,
            'location':location,
            'followers_count':followers_count,
            'following_count':friends_count}
node_df = pd.DataFrame(node_dict)
node_df.head()

Unnamed: 0,node_screen_name,node_label,location,followers_count,following_count
0,TerryMcAuliffe,TerryMcAuliffe,Virginia,125813,1656
1,GlennYoungkin,GlennYoungkin,Virginia,154711,193
2,GovMurphy,GovMurphy,New Jersey,405360,198
3,Jack4NJ,Jack4NJ,New Jersey,36077,1933


In [31]:
node_df.to_csv('node.csv')
all_tweet_connections.to_csv('edges.csv')