## Dependencies

In [None]:
#data collection
import requests
import json
from math import ceil
from time import sleep, time
from datetime import datetime
import pandas as pd

#data cleaning and 
import numpy as np
import plotly.express as px
import networkx as nx

## TwitterAPI Class
Create TwitterAPI class. Loop through list of bearer tokens, when rate limit of a single bearer token's API is reached.  
Takes list of bearer tokens as required inputs.  
Important methods:
- __Connection_to_enpoint:__ Using the request module to approach the twitter api.
- __Connection_to_enpoint_loop:__ Connect to endpoint but try a new bearer token if rate limit is reached.
- __Snowball:__ Given a twitter user (screenname) as seed retrieve a network of users. Output is a pandas dataframe.



In [97]:
from TwitterAPI import TwitterAPI
import AppCred

In [99]:
AppCred.GOOGLE_API_KEY

'AIzaSyBn6H0mv0o04Nr6gNF4rhj19G2PBEiaUB0'

In [None]:
api = TwitterAPI(AppCred.BEARER_TOKENS)
# api.find_following_of_user('windwatchorg')
results_snowball = api.snowball(['windwatchorg'], 2, regex_pattern_filter = '(?i)Wind(?!ow)')
df = pd.DataFrame(results_snowball)
df.to_pickle('data/results') 

## Data handling script

In [None]:
#### SEED IS WINDWATCHORG
df_results = pd.read_pickle('data/results13062021')
df_results = pd.concat([df_results,df_results['public_metrics'].apply(pd.Series)], axis=1)

df_username = df_results.groupby('username')
df_username = df_username['name'].count().sort_values(ascending=False)
df_results = pd.merge(df_results,df_username,left_on='username',right_index = True, suffixes=('','_count')) 
df_results = df_results.rename(columns = {'name_count':'indegree'})

### Create a map

In [93]:
def gps_of_location(location, header = ''):
    google_API_key = AppCred.GOOGLE_API_KEY
    url = f"https://maps.googleapis.com/maps/api/geocode/json?address={location}&key={google_API_key}"
    api = TwitterAPI(header)
    json_response = api.connect_to_endpoint(url=url)
    if json_response['status'] == 'OK':
        d = json_response['results'][0]['geometry']['location']
        d['types_google_api_response'] = json_response['results'][0]['types']
        return d
    elif json_response['status'] == 'REQUEST_DENIED':
        return json_response
    else:
        return 'No GPS coordinates found'

In [90]:
#### DON'T JUST RUN THIS CODE <3<3<3<3<3<3<3<3<3 Google API free tier is limited 
locations = df_results.loc[df_results['ball_depth']<2, 'location'].unique()
gps = {x : gps_of_location(x) for x in locations}

KeyboardInterrupt: 

In [None]:
df_gps = pd.DataFrame(gps).transpose()
df = pd.merge(df_results,df_gps, left_on = 'location', right_index = True)

In [None]:
fig = px.scatter_geo(df_results,lat=df['lat'], lon=df['lng'],hover_name = df['username'])
fig.write_html("map/interactive_twitter_user_map.html")

In [73]:
df.drop_duplicates(['followed_by','username'])
df.to_pickle('data/results13062021')


In [83]:
locations = df_results.loc[df_results['ball_depth']<2, 'location'].unique()

In [94]:
AppCred.GOOGLE_API_KEY

'AIzaSyBn6H0mv0o04Nr6gNF4rhj19G2PBEiaUB0'

### Create a network

In [None]:
number_of_seeds = len(df_results['followed_by'].unique())
number_of_nodes = len(df_results['username'].unique())
df_results_no_duplicates = df_results.drop_duplicates(['followed_by','username'])
number_of_edges = len(df_results_no_duplicates)
print(f'''__Network statistics__
Number of seeds used: {number_of_seeds}
Number of nodes: {number_of_nodes}
Number of edges in network: {number_of_edges}
''')

In [None]:
df_network = df_results.copy()

# Make a "wind" dummy and only include them in the network
df_network.loc[(df_network['description'].str.contains('(?i)Wind(?!ow)')) | 
                (df_network['username'].str.contains('(?i)Wind(?!ow)')), 'wind'] = 1
#Filter
df_network = df_network.loc[df_network['wind']==1]

In [None]:
G = nx.from_pandas_edgelist(df_network ,source = 'followed_by', target = 'username', edge_attr = ['username','followed_by', 'ball_depth'] ,create_using = nx.DiGraph())

In [None]:
nodes = df_network.drop_duplicates('username')
nodes = nodes.rename(columns={'username' : 'node'})
#nodes = df_network.drop_duplicates('followed_by').rename(columns={'followed_by' : 'node'})
#nodes = pd.concat([nodes_username, nodes_followed_by]).drop_duplicates('node').drop(columns=['username','followed_by'])
node_attributes = nodes.set_index('node').to_dict('index')
nx.set_node_attributes(G,node_attributes)

In [None]:
df_results.loc[(df_network['description'].str.contains('(?i)Wind(?!ow)')) | 
                (df_network['username'].str.contains('(?i)Wind(?!ow)')), 'wind'] = 1
df_results.loc[(df_results['ball_depth'] < 2))]

In [None]:
df_results

In [None]:
nx.write_gexf(G, 'Graphs/WindWatchOrg_snowball_sample_v3_only_wind.gexf')

In [None]:
df_results['ball_depth'].unique()

In [None]:
nx.set_node_attributes(G,node_attributes)
nodes[.locnodes['node']=='StopTheseThings']

In [None]:
df_network.loc[df_network['username']=='Tegenwindinfo']

In [None]:
G.edges('Tegenwindinfo')

In [None]:
len(df['username'].unique())

In [None]:
l = list(set(df['username'].loc[(df['description'].str.contains('(?i)Wind')) | 
                                (df['username'].str.contains('(?i)Wind')) & (df['followed_by'] == 'windwatchorg')]))


In [None]:
len(set(l))

In [None]:
df['followed_by']

In [None]:
len(df['followed_by'].unique())