# Twitter Profile Info

Gets Twitter profile information (`created_at`, `location`, etc.) for the Twitter handles listed in a CSV.

In [1]:
# Set path to the outer directory folder so we can access Twitter API keys in twitter_api_xanda.py
import sys
sys.path.append('..')

In [2]:
import pandas as pd
import tweepy
from twitter_api_xanda import TWITTER_API_BEARER

Get the Mergr company Twitter handles from `mergr_twitter_subsidiaries_manual.csv`.

In [3]:
twitter_handle_df = pd.read_csv('../handle_scraping/mergr_twitter_subsidiaries_manual.csv')
twitter_handle_df

Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Name,Sector,Address,City,State or Province,Zip,Country,...,Employees,Ownership Status,Investors/Acquirers,Buy M&A Count - Total,Avg. Buy Count Per Year (Last 5 Yrs),Sell M&A Count - Total,Avg. Sell Count Per Year (Last 5 Yrs),Twitter URL,Twitter Handle,Subsidiary
0,49,81,240,AECOM,Business Services,300 South Grand Avenue 9th Floor,Los Angeles,California,90071,United States,...,54000,Public,,12.0,0.2,5.0,0.8,https://twitter.com/AECOM,AECOM,False
1,20,43,142,AerSale,Aerospace,121 Alhambra Plaza Suite 1110,Coral Gables,Florida,33134,United States,...,470,Public,,2.0,0.4,,,https://twitter.com/aersale,aersale,False
2,9,24,87,Arrow Electronics,Distribution,9201 East Dry Creek Road,Centennial,Colorado,80112,United States,...,20700,Public,,34.0,0.2,1.0,,https://twitter.com/Arrow_dot_com,Arrow_dot_com,False
3,23,46,148,AutoNation,Automotive,200 SW 1st Avenue Fort Lauderdale,Fort Lauderdale,Florida,33301,United States,...,21600,Public,,13.0,0.2,1.0,,https://twitter.com/AutoNation,AutoNation,False
4,34,61,182,Avnet,Distribution,2211 South 47th Street,Phoenix,Arizona,85034,United States,...,14600,Public,,78.0,0.4,6.0,0.2,https://twitter.com/avnet,avnet,False
5,46,78,231,Berry,Plastics,101 Oakley Street,Evansville,Indiana,47710,United States,...,47000,Public,,14.0,0.4,2.0,0.4,https://twitter.com/berryglobalinc,berryglobalinc,False
6,36,64,192,BJ's,Retail,25 Research Drive,Westborough,Massachusetts,1581,United States,...,34000,Public,,,,,,https://twitter.com/BJsWholesale,BJsWholesale,False
7,3,13,52,Bunge,Agriculture,1391 Timberlake Manor Parkway,St. Louis,Missouri,63017,United States,...,22000,Public,,10.0,,4.0,0.2,https://twitter.com/bunge,bunge,False
8,38,66,197,C.H. Robinson Worldwide,Transportation,14701 Charlson Road,Eden Prairie,Minnesota,55347,United States,...,14997,Public,,24.0,0.8,1.0,,https://twitter.com/CHRobinson,CHRobinson,False
9,52,84,245,Carvana,E-Commerce,1930 West Rio Salado Parkway,Tempe,Arizona,85281,United States,...,21000,Public,,3.0,0.4,,,https://twitter.com/carvana,carvana,False


In [4]:
twitter_handles = list(twitter_handle_df['Twitter Handle'].dropna())  # Drop nulls (some companies don't have Twitters)
# print(twitter_handles)

Functions for parsing the `entities` field of a Twitter user object:

In [5]:
def parse_entity_url(user_obj):
    if not user_obj.entities or 'url' not in user_obj.entities:
        return None
        
    expanded_urls = []
    for url in user_obj.entities['url']['urls']:
        expanded_urls.append(url['expanded_url'])
    
    if len(expanded_urls) > 1:
        print("Saw a Twitter user with more than 1 URL")
        
    return expanded_urls[0] # Just return the first URL. None of the Twitter accounts have multiple URLs on their profiles.

def parse_entity_cashtags(user_obj):
    if not user_obj.entities or 'description' not in user_obj.entities or 'cashtags' not in user_obj.entities['description']:
        return None
        
    cashtags = []
    for cashtag in user_obj.entities['description']['cashtags']:
        cashtags.append(cashtag['tag'])
            
    return cashtags

def parse_entity_hashtags(user_obj):
    if not user_obj.entities or 'description' not in user_obj.entities or 'hashtags' not in user_obj.entities['description']:
        return None
        
    hashtags = []
    for hashtag in user_obj.entities['description']['hashtags']:
        hashtags.append(hashtag['tag'])
            
    return hashtags

def parse_entity_mentions(user_obj):
    if not user_obj.entities or 'description' not in user_obj.entities or 'mentions' not in user_obj.entities['description']:
        return None
        
    mentions = []
    for mention in user_obj.entities['description']['mentions']:
        mentions.append(mention['username'])
            
    return mentions

Let's get Twitter profile info for the company Twitter handles in our CSV!

In [6]:
client = tweepy.Client(bearer_token=TWITTER_API_BEARER)

USER_FIELDS = ['created_at', 'description', 'entities', 'id', 'location', 'name', 'pinned_tweet_id', 'profile_image_url', 'protected', 'public_metrics', 'url', 'username', 'verified', 'withheld']

# Get Twitter user objects for all the Twitter handles in our CSV
user_objs = []
for i in range(0, len(twitter_handles), 100): # need for loop since get_users can only get up to 100 users at a time
    user_objs += client.get_users(usernames=twitter_handles[i:i+100], user_fields=USER_FIELDS).data

# Use the Twitter user objects to create a list of lists, where each inner list corresponds to the Twitter profile info for one user
users = []
for obj in user_objs:
    users.append([obj.id, obj.name, obj.username, obj.created_at, obj.location, obj.description, parse_entity_url(obj), obj.pinned_tweet_id, obj.profile_image_url, obj.protected, obj.public_metrics['followers_count'], obj.public_metrics['following_count'], obj.public_metrics['tweet_count'], obj.public_metrics['listed_count'], obj.verified, obj.withheld, parse_entity_cashtags(obj), parse_entity_hashtags(obj), parse_entity_mentions(obj)])

In [7]:
# Create pandas DataFrame from users (the list of lists)
user_info_df = pd.DataFrame(users, columns=['id', 'name', 'username', 'created_at', 'location', 'description', 'url', 'pinned_tweet_id', 'profile_image_url', 'protected', 'followers_count', 'following_count', 'tweet_count', 'listed_count', 'verified', 'withheld', 'cashtags', 'hashtags', 'mentions'])

# Save Twitter profile info dataframe as a CSV!
user_info_df.to_csv('mergr_twitter_profile_info.csv')