In [None]:
from copy import deepcopy
import logging
import os
import re
import json

import tweepy
from shapely.geometry import box, mapping

In [None]:
logging.basicConfig(level=logging.INFO)

logger = logging.getLogger()

In [None]:
bearer_token = os.environ["TWITTER_BEARER_TOKEN"]

client = tweepy.Client(bearer_token=bearer_token)

In [None]:
geo_tweets = []

users = {}
places = {}

next_token = None

In [None]:
for iteration in range(10):
    resp = client.search_recent_tweets(
        "I used Shazam",
        max_results=100,
        tweet_fields=["author_id", "created_at", "geo"],
        user_fields=["username"],
        place_fields=["geo", "name", "country"],
        expansions=["author_id", "geo.place_id"],
        next_token=next_token,
    )

    geo_tweets.extend([t for t in resp.data if t.geo])

    logger.info("Iteration: %d", iteration)
    logger.debug("Length of geo tweets: %d", len(geo_tweets))

    users |= {
        u.id: u
        for u in resp.includes.get("users", [])
    }
    places |= {
        p.id: p
        for p in resp.includes.get("places", [])
    }

    if "next_token" in resp.meta:
        next_token = resp.meta["next_token"]
    else:
        break

In [None]:
len(geo_tweets)

In [None]:
def get_geom(place_id):
    geom = deepcopy(places[place_id].geo)
    return geom

In [None]:
print(json.dumps(get_geom(geo_tweets[0].geo["place_id"]), indent=2))

In [None]:
def get_username(user_id):
    user = users[user_id]
    return user.username

In [None]:
get_username(geo_tweets[0].author_id)

In [None]:
pattern = re.compile(
    r".*I used [@#]?Shazam to discover (?P<title>.*) by (?P<artist>.*)\. https.*"
)

In [None]:
def get_metadata(text):
    match = pattern.search(text)
    if not match:
        raise ValueError("Cannot retrieve metadata")

    metadata = match.groupdict()

    metadata["year"] = 0
    metadata["genre"] = "..."

    return metadata

In [None]:
get_metadata(geo_tweets[0].text)

In [None]:
def serialize_tweet(tweet):
    geom = get_geom(tweet.geo["place_id"])
    username = f"@{get_username(tweet.author_id)}"
    song_metadata = get_metadata(tweet.text)

    tweet = {
        "date": tweet.created_at.isoformat(),
        "song_metadata": song_metadata,
        "username": username,
        "bbox": geom["bbox"]
    }
    if "geometry" in geom:
        tweet["point"] = geom["geometry"]

    return tweet

In [None]:
print(json.dumps(serialize_tweet(geo_tweets[0]), indent=2))