# Twitter Profile Info

Gets Twitter profile information (`created_at`, `location`, etc.) for the Twitter handles listed in a CSV.

In [1]:
# Set path to the outer directory folder so we can access Twitter API keys in twitter_api_xanda.py
import sys
sys.path.append('..')

In [2]:
import pandas as pd
import tweepy
from twitter_api_xanda import TWITTER_API_BEARER

Get the S&P 500 company Twitter handles from `sp_500_twitter_subsidiaries_manual_mentioned.csv`.

In [3]:
twitter_handle_df = pd.read_csv('../handle_scraping/sp_500_twitter_subsidiaries_manual_mentioned.csv')
twitter_handle_df

Unnamed: 0.4,Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Symbol,Security,SEC filings,GICS Sector,GICS Sub-Industry,Headquarters Location,Date first added,CIK,Founded,Wiki Link,Official Website,Twitter URL,Twitter Handle,Subsidiary
0,0,0,0,0,MMM,3M,reports,Industrials,Industrial Conglomerates,"Saint Paul, Minnesota",8/9/76,66740,1902,https://en.wikipedia.org/wiki/3M,https://www.3m.com/,https://twitter.com/3M,3M,False
1,1,1,1,1,AOS,A. O. Smith,reports,Industrials,Building Products,"Milwaukee, Wisconsin",7/26/17,91142,1916,https://en.wikipedia.org/wiki/A._O._Smith,http://aosmith.com,https://twitter.com/aosmithhotwater,aosmithhotwater,False
2,2,2,2,2,ABT,Abbott,reports,Health Care,Health Care Equipment,"North Chicago, Illinois",3/31/64,1800,1888,https://en.wikipedia.org/wiki/Abbott_Laboratories,http://www.abbott.com,https://twitter.com/AbbottNews,AbbottNews,False
3,3,3,3,3,ABBV,AbbVie,reports,Health Care,Pharmaceuticals,"North Chicago, Illinois",12/31/12,1551152,2013 (1888),https://en.wikipedia.org/wiki/AbbVie,http://abbvie.com,https://twitter.com/abbvie,abbvie,False
4,4,4,4,4,ABMD,Abiomed,reports,Health Care,Health Care Equipment,"Danvers, Massachusetts",5/31/18,815094,1981,https://en.wikipedia.org/wiki/Abiomed,http://abiomed.com,https://twitter.com/abiomed,abiomed,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
569,527,528,497,497,YUM,Yum! Brands,reports,Consumer Discretionary,Restaurants,"Louisville, Kentucky",10/6/97,1041061,1997,https://en.wikipedia.org/wiki/Yum!_Brands,https://www.yum.com,https://twitter.com/kfc,kfc,True
570,528,529,498,498,ZBRA,Zebra Technologies,reports,Information Technology,Electronic Equipment & Instruments,"Lincolnshire, Illinois",12/23/19,877212,1969,https://en.wikipedia.org/wiki/Zebra_Technologies,http://www.zebra.com,http://www.twitter.com/zebratechnology,zebratechnology,False
571,529,530,499,499,ZBH,Zimmer Biomet,reports,Health Care,Health Care Equipment,"Warsaw, Indiana",8/7/01,1136869,1927,https://en.wikipedia.org/wiki/Zimmer_Biomet,http://zimmerbiomet.com,https://twitter.com/zimmerbiomet,zimmerbiomet,False
572,530,531,500,500,ZION,Zions Bancorporation,reports,Financials,Regional Banks,"Salt Lake City, Utah",6/22/01,109380,1873,https://en.wikipedia.org/wiki/Zions_Bancorpora...,http://zionsbancorporation.com,,,False


In [4]:
twitter_handles = list(twitter_handle_df['Twitter Handle'].dropna())  # Drop nulls (some companies don't have Twitters)
# print(twitter_handles)

Functions for parsing the `entities` field of a Twitter user object:

In [5]:
def parse_entity_url(user_obj):
    if not user_obj.entities or 'url' not in user_obj.entities:
        return None
        
    expanded_urls = []
    for url in user_obj.entities['url']['urls']:
        expanded_urls.append(url['expanded_url'])
    
    if len(expanded_urls) > 1:
        print("Saw a Twitter user with more than 1 URL")
        
    return expanded_urls[0] # Just return the first URL. None of the Twitter accounts have multiple URLs on their profiles.

def parse_entity_cashtags(user_obj):
    if not user_obj.entities or 'description' not in user_obj.entities or 'cashtags' not in user_obj.entities['description']:
        return None
        
    cashtags = []
    for cashtag in user_obj.entities['description']['cashtags']:
        cashtags.append(cashtag['tag'])
            
    return cashtags

def parse_entity_hashtags(user_obj):
    if not user_obj.entities or 'description' not in user_obj.entities or 'hashtags' not in user_obj.entities['description']:
        return None
        
    hashtags = []
    for hashtag in user_obj.entities['description']['hashtags']:
        hashtags.append(hashtag['tag'])
            
    return hashtags

def parse_entity_mentions(user_obj):
    if not user_obj.entities or 'description' not in user_obj.entities or 'mentions' not in user_obj.entities['description']:
        return None
        
    mentions = []
    for mention in user_obj.entities['description']['mentions']:
        mentions.append(mention['username'])
            
    return mentions

Let's get Twitter profile info for the company Twitter handles in our CSV!

In [6]:
client = tweepy.Client(bearer_token=TWITTER_API_BEARER)

USER_FIELDS = ['created_at', 'description', 'entities', 'id', 'location', 'name', 'pinned_tweet_id', 'profile_image_url', 'protected', 'public_metrics', 'url', 'username', 'verified', 'withheld']

# Get Twitter user objects for all the Twitter handles in our CSV
user_objs = []
for i in range(0, len(twitter_handles), 100): # need for loop since get_users can only get up to 100 users at a time
    user_objs += client.get_users(usernames=twitter_handles[i:i+100], user_fields=USER_FIELDS).data

# Use the Twitter user objects to create a list of lists, where each inner list corresponds to the Twitter profile info for one user
users = []
for obj in user_objs:
    users.append([obj.id, obj.name, obj.username, obj.created_at, obj.location, obj.description, parse_entity_url(obj), obj.pinned_tweet_id, obj.profile_image_url, obj.protected, obj.public_metrics['followers_count'], obj.public_metrics['following_count'], obj.public_metrics['tweet_count'], obj.public_metrics['listed_count'], obj.verified, obj.withheld, parse_entity_cashtags(obj), parse_entity_hashtags(obj), parse_entity_mentions(obj)])

In [7]:
# Create pandas DataFrame from users (the list of lists)
user_info_df = pd.DataFrame(users, columns=['id', 'name', 'username', 'created_at', 'location', 'description', 'url', 'pinned_tweet_id', 'profile_image_url', 'protected', 'followers_count', 'following_count', 'tweet_count', 'listed_count', 'verified', 'withheld', 'cashtags', 'hashtags', 'mentions'])

# Save Twitter profile info dataframe as a CSV!
user_info_df.to_csv('sp_500_twitter_profile_info_mentioned_subsidiaries.csv')