In [1]:
# !pip uninstall twitter-api-client -y
# !pip install twitter-api-client --no-cache-dir

In [2]:
import re
import orjson
from pathlib import Path
from twitter.utils import find_key
import pandas as pd

In [3]:
def get_data(path: Path, expr: str = '', **kwargs) -> dict:
    D = {}
    for p in path.rglob('*'):
        if re.search(expr, p.name, **kwargs):
            D.setdefault(p.stem.split('_')[-1], []).append(orjson.loads(p.read_bytes()))
    return D


def get_user_details(data: dict, cols: list = None, sort: str = 'created_at') -> pd.DataFrame:
    """
    add "+" to sort ascending
    """
    D = []
    for u in find_key(data, 'user_results'):
        x = u.get('result', {})
        y = x.get('rest_id')
        if z := x.get('legacy', {}):
            D.append({'rest_id': y} | z)
    df = (
        pd.DataFrame(D)
        .drop_duplicates('rest_id')
        .assign(created_at=lambda x: pd.to_datetime(x['created_at']))
        .sort_values(sort.strip('+'), ascending='+' in sort)
        .reset_index(drop=True)
    )
    n = [x for x in df.columns if 'count' in x]
    df[n] = df[n].apply(pd.to_numeric, errors='coerce')
    return df[cols] if cols else df

In [4]:
PATH = Path('data/raw')

data = get_data(PATH, expr='Favoriters|Retweeters') # filter for users who favorited or retweeted a tweet

In [5]:
df = get_user_details(
    data,
    cols = ['created_at','screen_name','followers_count'],
    sort = 'followers_count',
)

df

Unnamed: 0,created_at,screen_name,followers_count
0,2014-07-14 20:24:30+00:00,larry_deramus,25513
1,2012-12-06 17:07:13+00:00,OneAhmedSagheer,22092
2,2013-08-12 21:56:09+00:00,eriikaswrld,16872
3,2009-04-17 06:02:06+00:00,BrandonEsWolf,15561
4,2009-06-18 23:30:10+00:00,badgalvitoria,14621
...,...,...,...
1850,2023-01-06 17:18:17+00:00,Jeff62245805,1
1851,2023-02-16 18:38:48+00:00,SladjaMilov14,1
1852,2022-10-20 23:47:34+00:00,DylonsaurusRex,0
1853,2022-10-14 22:10:56+00:00,SanduskyAddison,0
