# Live Twitter Data

Twitter data is useful for everything from marketing to NLP. Thankfully, Twitter has a pretty generous API that can be used to retrieve information such as recent Tweets or number of connections.

## Generate an API Key

undefined. To read data from Twitter, you need to generate a an API key. Read more about how to generate your own key here.

undefined. Once you have your token, add it as an environment variable.

In [None]:
#See https://docs.deepnote.com/environment/environment-variables for information on how to add you own API key

In [None]:
import os
twitter_api_token = os.getenv('TWITTER_KEY') # See https://docs.deepnote.com/environment/environment-variables for more information

## Number of Mentions

In [None]:
username = 'deepnoteHQ'

In [None]:
'''
This code contains useful functions for using the Twitter API
'''

import requests, time
from datetime import datetime, timedelta
import pandas as pd

# Set up the url and credentials for the API 
twitter_base_url = 'https://api.twitter.com/2/'
headers = {'Authorization': f'Bearer {twitter_api_token}'}

# Return the user id for a given username
def get_user_id(username):
    url = twitter_base_url + f'users/by/username/{username}'
    response = requests.get(url, headers=headers).json()
    user_id=response['data']['id']

    return user_id

# Retrieve data based on parameters
def get_twitter_data(endpoint, params):  
    data = []
    url = twitter_base_url + endpoint

    # Keep doing this until we break
    while True:
        # Get data
        response = requests.get(url, headers=headers, params=params).json()
        time.sleep(0.5) # Sleep to avoid any rate-limit issues

        # Sanity check that we actually have data
        if 'data' in response.keys():
            data += response['data']
            # Get maximum of 1000 datapoints
            if len(data) >= 1000:
                break

            # Check for the next token
            next_token = response['meta'].get('next_token')
            if next_token:
                # The token parameter is different for some endpoints, so we need to check what the endpoint is
                if 'search' in endpoint:
                    params['next_token'] = next_token
                else:
                    params['pagination_token'] = next_token
            # No more data to read
            else:
                break
        else:
            break

    return pd.DataFrame(data)

def get_twitter_mentions(username):
    params = {'max_results': 100, 'tweet.fields': 'created_at'}
    user_id = get_user_id(username)
    mentions = get_twitter_data(f'users/{user_id}/mentions',params)
    return mentions

def search_tweets(query, start_date):
    params = {
        'query': query,
        'max_results': 100,
        'start_time': start_date,
        'expansions': 'author_id',
        'tweet.fields': 'created_at,lang,text,author_id,public_metrics',
        'user.fields': 'name,username,public_metrics,verified'
    }
    tweets = get_twitter_data(f'tweets/search/recent',params)
    return tweets

In [None]:
# Get the mentions for a given username
df = get_twitter_mentions(username=username)
df['created_at'] = pd.to_datetime(df['created_at'])

# Group tweets by week
df = df.groupby(pd.Grouper(key='created_at', freq='1W')).size().reset_index(name='count')

In [None]:
DeepnoteChart(df, """{"mark":{"clip":true,"type":"trail","color":"#4c78a8","tooltip":true},"config":{"legend":{}},"$schema":"https://vega.github.io/schema/vega-lite/v5.json","encoding":{"x":{"sort":null,"type":"temporal","field":"created_at","scale":{"type":"linear","zero":false}},"y":{"sort":null,"type":"quantitative","field":"count","scale":{"type":"linear","zero":false}}}}""")

<__main__.DeepnoteChart at 0x7fc744f164d0>

## Top Tweets Today

You can use the API to keep up with trends and find Tweets that you should follow up on or respond to.

In [None]:
query = '"seed" "series a"'

In [None]:
starting_date = pd.Timestamp.now() - pd.Timedelta(1,'D')
starting_date = starting_date.floor('S').isoformat() + 'Z' # Reformat in a form that Twitter wants

# Search the Tweets
df = search_tweets(query, starting_date)

# Extract and sort by # of likes
def get_likes(x):
    return x.get('like_count')
df['nlikes'] = df.public_metrics.apply(get_likes)
df = df.sort_values('nlikes',ascending=False)

In [None]:
from IPython.display import display, Markdown, HTML

# Embed the top 3 tweets as HTML
for tweet_id in df.id.head(3):
    embed_url = f'https://publish.twitter.com/oembed?url=https://twitter.com/twitter/status/{tweet_id}'
    response = requests.get(embed_url, headers=headers).json()
    display(HTML(data=response['html']))

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=9a04582f-d4fa-4142-9de9-1fe9115658eb' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>