# Twitter Discourse and Emotions Around the Invasion of Ukraine - Companion code
## – A Text Analytics Approach 
### Gabriel Lindelöf



# Scrape script secondary dataset - contagion hypothesis

In [None]:
from twarc.client2 import Twarc2
import pandas as pd
import json 
from pyarrow import feather
from datetime import datetime, timedelta

def to_datetime(date):
    date = datetime.fromisoformat(date[:-1])
    return date

df = feather.read_feather('ukraine_two_weeks_clean_shuffled_v2_sample_10_10000.feather') # Load sampled users from main dataset.
df['created_at'] = df['created_at'].apply(to_datetime)

## Get the users they are following - following endpoint

In [None]:
t = Twarc2(bearer_token='secret_token_here') # Write Twitter API access token, initiate scrape object. 


from_user = -50 # Scraping was done in batches, start with user 0-50 (-50 since 50 is added start of loop)
for group in range(20):
    from_user += 50
    to_user = from_user + 50 # 50 users at a time, then save before next loop. 
    print("From: ", from_user, " to ", to_user)
    
    user_data = []
    for user_i, user_id in enumerate(df.author_id.tolist()[from_user:to_user]):
        print("Moving on to user: {} ({}/{})".format(user_id, (user_i+1), (to_user - from_user)))
        # Iterate over pages of followers
        
        page_df = next(t.following(user_id, max_results=1000)) # Get a maximum of 1000 followees for this user
        try:
            page_df = pd.json_normalize(page_df['data'], sep=',')
            page_df['followed_by'] = user_id
            user_data.append(page_df) # Save users followees to DataFrame.
            print('Added {} followees to user_data list.'.format(len(page_df)))
            
        except Exception as e:
            print("No data: ", e)

    output = pd.concat(user_data)
    feather.write_feather(output, 'data_out/following_{}_to_{}.feather'.format(from_user, to_user)) # Save batch of 50 users followees to file. 

# Get tweets made by followees - timeline endpoint

In [None]:
t = Twarc2(bearer_token='secret_token_here') # Write Twitter API access token, initiate scrape object. 


from_user = -50 # Getting tweets of followees was done in batches, start with the followees of user 0-50 (-50 since 50 is added start of loop)
for group in range(20):
    from_user += 50
    to_user = from_user + 50 # Get first 50 users. 
    print("From: ", from_user, " to ", to_user)
    followees = feather.read_feather('data_out/following_{}_to_{}.feather'.format(from_user, to_user)) # Get the followees of these users. 
    
    tweets = []
    for user_i, user_id in enumerate(followees.followed_by.unique().tolist()):
        ts_end = df[df.author_id == user_id].created_at.iloc[0] # when was response tweet created
        ts_start = ts_end - timedelta(hours=3, minutes=0) # Only get tweets of followees made the preceeding 3 hours of response tweet. (later filtered to only 1 hour.)
        ts_start = ts_start.strftime("%Y-%m-%dT%H:%M:%SZ")
        ts_end = ts_end.strftime("%Y-%m-%dT%H:%M:%SZ")
        
        
        user_followees = followees[followees.followed_by == user_id] # Get all the followees of the 50 users. 
        print("Getting {} users that {} follows ({})".format(len(user_followees), user_id, user_i))
        for followed_user in user_followees.id: # iterate all users they follow
            try: 
                # Query if their timelines contain any tweets the time before the response tweet. 
                query = t.timeline(followed_user,  start_time=ts_start, end_time=ts_end, exclude_retweets=False, exclude_replies=True, max_results=100, expansions=None, tweet_fields=None, user_fields=None)
                followed_timeline = next(query)
                followed_timeline = pd.json_normalize(followed_timeline['data'], sep=',')
                followed_timeline['followed_by'] = user_id
                print("Adding tweets: ", len(followed_timeline))
                tweets.append(followed_timeline) # Save any tweets from followees timeline
                
                
            except Exception as e:
                pass
    
    output = pd.concat(tweets)
    feather.write_feather(output, 'data_out/follow_tweets{}_to_{}.feather'.format(from_user, to_user)) # Save data to file.