In [42]:
import os
import requests
import json
from datetime import datetime, timedelta
import pandas as pd
from cassandra.cluster import Cluster

In [2]:
creds_path = os.path.join(
        os.path.expanduser('~'), 'bakery/twitter/creds.json'
    )

with open(creds_path, 'rb') as f:
    creds = json.loads(f.read().decode())

In [4]:
def bearer_oauth(r):
    r.headers['Authorization'] = f'Bearer {creds["bearer_token"]}'
    r.headers['User-Agent'] = 'bakery'
    return r

##### Get previous day's tweets

In [59]:
current_dttm = datetime.utcnow() - timedelta(seconds=10)
end_time = current_dttm.strftime('%Y-%m-%dT%H:%M:%SZ')
start_time = (current_dttm - timedelta(days=1)).strftime('%Y-%m-%dT%H:%M:%SZ')
print(start_time, end_time)

2022-08-06T22:31:17Z 2022-08-07T22:31:17Z


In [60]:
params = {
    'query': '(from:CNBC OR from:Stocktwits OR from:MarketWatch OR from:wsjmarkets) -is:retweet',
    'start_time': start_time,
    'end_time': end_time,
    'tweet.fields': 'created_at',
    'user.fields': 'name',
    'expansions': 'author_id',
    'max_results': 100
}

data = []

first_r = requests.get(
    'https://api.twitter.com/2/tweets/search/recent',
    auth=bearer_oauth,
    params = params
)

users = first_r.json()['includes']['users']

data.extend(first_r.json()['data'])

if 'next_token' in first_r.json()['meta']:
    params['next_token'] = first_r.json()['meta']['next_token']
    next_token = True
    while next_token:
        r = requests.get(
            'https://api.twitter.com/2/tweets/search/recent',
            auth=bearer_oauth,
            params = params
        )
        data.extend(r.json()['data'])
        if 'next_token' in r.json()['meta']:
            params['next_token'] = r.json()['meta']['next_token']
        else:
            next_token = False

In [61]:
len(data)

204

In [62]:
pd.DataFrame(data)

Unnamed: 0,author_id,created_at,id,text
0,624413,2022-08-07T22:29:52.000Z,1556407304731463682,A scrappy group of blue-collar retirees rescue...
1,20402945,2022-08-07T22:03:59.000Z,1556400788855046145,Stock futures are flat following S&amp;P 500's...
2,624413,2022-08-07T22:03:57.000Z,1556400782538317824,‘Bullet Train’ pulls away with $30.1 million o...
3,20402945,2022-08-07T22:03:00.000Z,1556400541713895424,This HR manager took 3 months off with pay to ...
4,624413,2022-08-07T21:37:09.000Z,1556394035119620096,CVS seeking to buy Signify Health as part of h...
...,...,...,...,...
199,20402945,2022-08-06T23:03:00.000Z,1556053253422555136,More workers want to get paid in crypto — and ...
200,20402945,2022-08-06T23:00:00.000Z,1556052501182439424,"Bye bye, San Francisco: The top 7 U.S. cities ..."
201,624413,2022-08-06T22:55:02.000Z,1556051247509241857,"OPINION: More jobs are a good thing, but too m..."
202,20402945,2022-08-06T22:48:13.000Z,1556049532835749888,"There are 3 types of employees, according to a..."


In [40]:
pd.DataFrame(users)

Unnamed: 0,id,name,username
0,20402945,CNBC,CNBC
1,624413,MarketWatch,MarketWatch
2,28164923,WSJ Markets,WSJmarkets


##### Create Cassandra table

In [43]:
cass_cluster = Cluster()
cass_session = cass_cluster.connect()

In [46]:
cass_session.execute(
    '''
    create table if not exists bakery.tweets (
        id text,
        author_id text,
        author_name text,
        created_at timestamp,
        text text,
        primary key (id)
    )
    '''
)

<cassandra.cluster.ResultSet at 0x7f6fdf8063a0>

In [55]:
[user['name'] for user in users if user['id'] == '20402945'][0]

'CNBC'

In [47]:
data

[{'text': 'This former Olympian turned diplomat reveals why there’s no magic formula for success. (via @CNBCi) https://t.co/R9GwNLTC6F',
  'created_at': '2022-08-07T21:32:00.000Z',
  'id': '1556392740266553344',
  'author_id': '20402945'},
 {'text': 'Here’s how the Inflation Reduction Act’s rebates and tax credits for heat pumps and solar can lower your energy bill https://t.co/RmcmmQph0H',
  'created_at': '2022-08-07T21:12:06.000Z',
  'id': '1556387733249179650',
  'author_id': '624413'},
 {'text': 'Why is options trading so risky? Watch the video to learn more. https://t.co/KXEFE0tOhS https://t.co/J9atCvnb0J',
  'created_at': '2022-08-07T21:01:00.000Z',
  'id': '1556384938722635776',
  'author_id': '20402945'},
 {'text': 'The Inflation Reduction Act, which the Senate approved Sunday, includes rebates or a tax break for qualifying consumers who add efficient heat pumps, rooftop solar, electric HVAC and electric water heaters.\nhttps://t.co/1WpGHMO18f',
  'created_at': '2022-08-07T20:4

In [58]:
for t in data:
    values = (
        t['id'],
        t['author_id'],
        [user['name'] for user in users if user['id'] == t['author_id']][0],
        t['created_at'],
        t['text'].replace("'", "")
    )
    cass_session.execute(
        f'''
        INSERT INTO bakery.tweets
            (id, author_id, author_name, created_at, text)
        VALUES {values}
        '''
        )

##### Streaming

In [21]:
r = requests.get(
    "https://api.twitter.com/2/tweets/search/stream/rules",
    auth=bearer_oauth
)

if r.status_code != 200:
    raise Exception(
        'Cannot get rules (HTTP {}): {}'.format(r.status_code, r.text)
    )
rules = r.json()

In [22]:
if rules is None or 'data' not in rules:
    print('No rules to delete')
else:
    ids = list(map(lambda rule: rule['id'], rules['data']))
    payload = {'delete': {'ids': ids}}
    r = requests.post(
        'https://api.twitter.com/2/tweets/search/stream/rules',
        auth=bearer_oauth,
        json=payload
    )

    if r.status_code != 200:
        raise Exception(
            'Cannot delete ruels (HTTP {}): {}'.format(r.status_code, r.text)
        )
    print(r.json())

{'meta': {'sent': '2022-08-03T20:59:55.563Z', 'summary': {'deleted': 1, 'not_deleted': 0}}}


In [23]:
rules = [
    {'value': 'from:CNBC'}
]
payload = {'add': rules}
r = requests.post(
    'https://api.twitter.com/2/tweets/search/stream/rules',
    auth=bearer_oauth,
    json=payload
)
if r.status_code != 201:
    raise Exception(
        'Cannot add rules (HTTP {}): {}'.format(r.status_code, r.text)
    )
print(r.json())

{'data': [{'value': 'from:CNBC', 'id': '1554935116685889536'}], 'meta': {'sent': '2022-08-03T20:59:56.708Z', 'summary': {'created': 1, 'not_created': 0, 'valid': 1, 'invalid': 0}}}


In [24]:
r = requests.get(
    'https://api.twitter.com/2/tweets/search/stream', auth=bearer_oauth, stream=True)
if r.status_code != 200:
    raise Exception(
        'Cannot get stream (HTTP {}): {}'.format(r.status_code, r.text)
    )
for line in r.iter_lines():
    if line:
        r_json = json.loads(line)
        print(json.dumps(r_json, indent=4, sort_keys=True))

{
    "data": {
        "id": "1554935189712797698",
        "text": "Walmart lays off corporate employees after slashing forecast https://t.co/FyFMgWwb1Y"
    },
    "matching_rules": [
        {
            "id": "1554935116685889536",
            "tag": ""
        }
    ]
}
{
    "data": {
        "id": "1554935827402301441",
        "text": "RT @melissa_repko: Just in: Walmart says it is laying off corporate employees about a week after it slashed its profit outlook. $WMT https:\u2026"
    },
    "matching_rules": [
        {
            "id": "1554935116685889536",
            "tag": ""
        }
    ]
}
{
    "data": {
        "id": "1554936897172082688",
        "text": "Remember AOL chatrooms and downloading songs through Napster? Crypto evangelists are now pushing for a new kind of net that yanks power away from Big Tech platforms and hands it back to users. Can they succeed? (via @CNBCi) https://t.co/UDP1aTW1MM"
    },
    "matching_rules": [
        {
            "id": "1554

{
    "data": {
        "id": "1554970542809223168",
        "text": "Jim Cramer says to avoid these ill-fated moves in the market https://t.co/vSzpMSg3tK"
    },
    "matching_rules": [
        {
            "id": "1554935116685889536",
            "tag": ""
        }
    ]
}
{
    "data": {
        "id": "1554976155760992257",
        "text": "\u201cWith all the job growth in the first half of the year, it\u2019s hard to say that there was a recession. With a flat unemployment rate at 3.6%, it\u2019s hard to say there was a recession,\u201d says St. Louis Fed President James Bullard on a potential recession. https://t.co/AEXQCdRehE https://t.co/FeKicGLzd5"
    },
    "matching_rules": [
        {
            "id": "1554935116685889536",
            "tag": ""
        }
    ]
}
{
    "data": {
        "id": "1554976283079106560",
        "text": "Asia-Pacific markets set to trade mixed following Wall Street bounce https://t.co/uDjIKqh421"
    },
    "matching_rules": [
        {
      

{
    "data": {
        "id": "1554995425265852416",
        "text": "RT @MonicaPitrelli: Baby #China ? Little #Italy ? The popularity of \u201ctravel-inspired\u201d baby names has increased 14% in the U.S. and the U.K.\u2026"
    },
    "matching_rules": [
        {
            "id": "1554935116685889536",
            "tag": ""
        }
    ]
}
{
    "data": {
        "id": "1554998047146479618",
        "text": "Asset manager predicts the next bull market \u2014 and reveals how to position for it https://t.co/xUBJ6RP3EW"
    },
    "matching_rules": [
        {
            "id": "1554935116685889536",
            "tag": ""
        }
    ]
}
{
    "data": {
        "id": "1554999326664957953",
        "text": "Here's how to invest for yields to beat a bad year for stocks and bonds \u2014 according to the pros https://t.co/qrxIcEiUSU"
    },
    "matching_rules": [
        {
            "id": "1554935116685889536",
            "tag": ""
        }
    ]
}
{
    "data": {
        "id": 

{
    "data": {
        "id": "1555115469148930048",
        "text": "Adidas boss says LIV Golf a 'normal evolution,' wants to focus on player partnerships https://t.co/RiRxBYZwCd"
    },
    "matching_rules": [
        {
            "id": "1554935116685889536",
            "tag": ""
        }
    ]
}
{
    "data": {
        "id": "1555116341388009472",
        "text": "Steel is essential \u2014 it's in everything from dishwashers to cars. Since the start of the pandemic, steel prices have spiked. Some worry it's a bubble that's about to burst. Watch the full video here: https://t.co/PkVQtuRJaf https://t.co/YiGyDqvSzs"
    },
    "matching_rules": [
        {
            "id": "1554935116685889536",
            "tag": ""
        }
    ]
}
{
    "data": {
        "id": "1555130359884517377",
        "text": "Why this analyst believes advertising is the next big revenue driver for Apple https://t.co/bPVq8vorCW"
    },
    "matching_rules": [
        {
            "id": "15549351166858895

{
    "data": {
        "id": "1555165299065982976",
        "text": "IMF tells Europe to let consumers bear the brunt of higher bills to encourage energy saving https://t.co/smpr5JIhUn"
    },
    "matching_rules": [
        {
            "id": "1554935116685889536",
            "tag": ""
        }
    ]
}
{
    "data": {
        "id": "1555165311762145280",
        "text": "Brittney Griner awaits her fate in Russian drugs trial https://t.co/xEPOcpCA09"
    },
    "matching_rules": [
        {
            "id": "1554935116685889536",
            "tag": ""
        }
    ]
}
{
    "data": {
        "id": "1555165722644545537",
        "text": "RT @_karengilchrist: The IMF warned Europe against intervening in the region\u2019s worsening #energycrisis with broad-based financial support,\u2026"
    },
    "matching_rules": [
        {
            "id": "1554935116685889536",
            "tag": ""
        }
    ]
}
{
    "data": {
        "id": "1555167717547704320",
        "text": "5 thin

{
    "data": {
        "id": "1555190582405988353",
        "text": "Rent prices are soaring in these 5 U.S. metros. https://t.co/AfHsOaaAH9 https://t.co/GG5jZm8gr6"
    },
    "matching_rules": [
        {
            "id": "1554935116685889536",
            "tag": ""
        }
    ]
}
{
    "data": {
        "id": "1555191756492341248",
        "text": "Gadget season is here \u2014 new folding phones, watches and earbuds are coming https://t.co/Upn1Gf0tE1"
    },
    "matching_rules": [
        {
            "id": "1554935116685889536",
            "tag": ""
        }
    ]
}
{
    "data": {
        "id": "1555193085935640579",
        "text": "New research from Mandiant shows the Chinese government is likely using a vast network of fake news websites and social media to push its message. @EamonJavers breaks down the details. https://t.co/BcIxIFzPV8 https://t.co/DGZJOzzMCF"
    },
    "matching_rules": [
        {
            "id": "1554935116685889536",
            "tag": ""
      

{
    "data": {
        "id": "1555219008126844934",
        "text": "Election results in a slate of key primary races Tuesday night underscored former President Trump\u2019s enduring influence over the Republican Party, despite signals that his status as its de facto leader may be eroding.\nhttps://t.co/9Dd6VrXmlK"
    },
    "matching_rules": [
        {
            "id": "1554935116685889536",
            "tag": ""
        }
    ]
}
{
    "data": {
        "id": "1555219860266913797",
        "text": "Polio has been found in wastewater samples taken from two counties outside of New York City indicating the virus is spreading in the community, according to state health officials.\nhttps://t.co/JSb9EkQ13L"
    },
    "matching_rules": [
        {
            "id": "1554935116685889536",
            "tag": ""
        }
    ]
}
{
    "data": {
        "id": "1555221664174149634",
        "text": "1 in 4 Gen Z employees worry that taking a lunch break makes them look bad at work (via @CN