In [5]:
from commit import CommitWrapperEventModel
import json
import copy
import datetime
from collections import defaultdict
from itertools import combinations
from tqdm.notebook import tqdm
import time

In [6]:
with open("bluesky_cached_models.json") as f:
    data = json.load(f)
    models = []
    for model in data:
        model["commit"]["record"]["$type"] = model["commit"]["record"][
            "record_type"
        ]
        del model["commit"]["record"]["record_type"]
        model = CommitWrapperEventModel.model_validate(model)
        models.append(model)

# Query 1

/*Detect Bot Accounts (3 posts in 10 seconds)*/

SELECT *

FROM Bluesky

WHERE CreatePost ; CreatePost ; CreatePost

PARTITION BY [did]

WITHIN 10 SECONDS

In [7]:
messages_by_did_in_window = defaultdict(list)

In [8]:
outputs = []

assert sorted(models, key=lambda model: model.received_time) == models, [model.received_time.strftime("%Y-%m-%d %H:%M:%S") for model in models]

for model in tqdm(models):
    time.sleep(0.001)
    if model.commit.operation != "create":
        continue
    if model.commit.record.record_type != "app.bsky.feed.post":
        continue
    received_time = model.received_time
    did = model.did
    while len(messages_by_did_in_window[did]) != 0 and (received_time - messages_by_did_in_window[did][0].received_time) > datetime.timedelta(seconds=10):
        messages_by_did_in_window[did].pop(0)
            
    messages_by_did_in_window[did].append(model)

    assert sorted(messages_by_did_in_window[did], key=lambda model: model.received_time) == messages_by_did_in_window[did], [model.received_time.strftime("%Y-%m-%d %H:%M:%S") for model in messages_by_did_in_window[did]]
    
    # output
    if len(messages_by_did_in_window[did]) >= 3:
        for comb in combinations(messages_by_did_in_window[did][:-1], 2):
            output = [*comb, model]
            assert (output[-1].received_time - output[0].received_time) <= datetime.timedelta(seconds=10), f"{(output[-1].received_time - output[0].received_time)}"
            outputs.append(output)

  0%|          | 0/20000 [00:00<?, ?it/s]

In [9]:
outputs[0]

[CommitWrapperEventModel(did='did:plc:7pjwjzhppos2ljh3pcpiqtfr', kind='commit', time_us=1760491450837014, commit=CommitEventModel(cid='bafyreibdijnb4cwfgyckd7tigozxsku7ispne4a332g4qh5iw7tsozpdvy', operation='create', record=PostRecordModel(record_type='app.bsky.feed.post', createdAt=datetime.datetime(2025, 10, 15, 1, 23, 57, 765000, tzinfo=TzInfo(UTC)), langs=['en'], text='Where am I posting my webcomic, you may ask?\n\nI got three major spots: Comic Fury, Bluesky, and Instagram.\n\nComic Fury will be the main home.\n\nI will also upload the comic on Newgrounds in batches.')), received_time=datetime.datetime(2025, 10, 15, 1, 24, 10, 917748, tzinfo=TzInfo(UTC))),
 CommitWrapperEventModel(did='did:plc:7pjwjzhppos2ljh3pcpiqtfr', kind='commit', time_us=1760491450837689, commit=CommitEventModel(cid='bafyreif57sove33mmrfzmmf575gommaufnhf4kctzfw7hyo5ay7b3l4qfa', operation='create', record=PostRecordModel(record_type='app.bsky.feed.post', createdAt=datetime.datetime(2025, 10, 15, 1, 23, 57, 76