# Install Imports

In [14]:
pip install atproto -q
pip install pandas -q

SyntaxError: invalid syntax (894898574.py, line 1)

# Create SQL Database

In [33]:
# Database setup (storing raw data instead of overloading memory)
conn = sqlite3.connect("firehose.db")
cur = conn.cursor()
cur.execute("""
    CREATE TABLE IF NOT EXISTS firehose (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        timestamp TEXT,
        repo TEXT,
        message TEXT
    )
""")
conn.commit()


## Start scraping data with Firehose and store in Database

In [35]:
from atproto import FirehoseSubscribeReposClient, parse_subscribe_repos_message
import sqlite3
import json

# Limit messages for testing (set to None for unlimited streaming)
MAX_MESSAGES = 5
message_count = 0


# Initialize Firehose client
client = FirehoseSubscribeReposClient()

def on_message_handler(message) -> None:
    global message_count
    if MAX_MESSAGES and message_count >= MAX_MESSAGES:
        print("Reached message limit. Stopping Firehose.")
        client.stop()
        return

    try:
        parsed_msg = parse_subscribe_repos_message(message)
        message_json = json.dumps(parsed_msg, default=str)

        # Store in SQLite instead of printing to prevent memory overload
        cur.execute("INSERT INTO firehose (timestamp, repo, message) VALUES (?, ?, ?)", 
                    (parsed_msg["time"], parsed_msg["repo"], message_json))
        conn.commit()

        print(f"Stored message {message_count + 1}: {parsed_msg['repo']}")
        message_count += 1

    except Exception as e:
        print(f"Error processing message: {e}")

# Clear the database before inserting new messages
cur.execute("DELETE FROM firehose")
conn.commit()

# Start Firehose listener
client.start(on_message_handler)


Stored message 1: did:plc:k7bmm7zmvi3l7yol726zjd5f
Stored message 2: did:plc:realist5mcrfawzhtlnqfa6q
Stored message 3: did:plc:uxi76mlhxwx6fwgp3wzgcpaj
Stored message 4: did:plc:hld7amk5yewgff6e3pnwztk3
Stored message 5: did:plc:7cjnuxacad65c47722mdnbyo
Reached message limit. Stopping Firehose.


## Example Output:


In [40]:
import sqlite3
import json

# Connect to the database
conn = sqlite3.connect("firehose.db")
cur = conn.cursor()

# Fetch 3 sample rows
cur.execute("SELECT timestamp, repo, message FROM firehose LIMIT 5")
rows = cur.fetchall()

# Display the results
for row in rows:
    timestamp, repo, message_json = row
    message = json.loads(message_json)  # Convert JSON string to dictionary
    print(f"\n📌 Entry from {repo} at {timestamp}:")
    print(json.dumps(message, indent=2))  # Pretty print JSON

conn.close()



📌 Entry from did:plc:k7bmm7zmvi3l7yol726zjd5f at 2025-02-21T14:15:49.415Z:
"blobs=[] blocks=b':\\xa2eroots\\x81\\xd8*X%\\x00\\x01q\\x12 :\\xef\\xec\\xd5\\xbf.\\xbd?\\xe0S\\x89`2\\xd6\\x91\\x96>IS\\xbff\\xb9\\xf2\\xfb\\xfdUH\\xbf\\xc3\\x17\\xc76gversion\\x01\\x84\\x08\\x01q\\x12 \\xcf\\x1a\\x0ck\\xba&\\x95Z\\xcc\\xb17P\\xf8\\xdf\\x8c9\\xab\\xd4\\xd8~a\\x17\\xd9a\\x82|Ry\\xa9\\xec\\rU\\xa2ae\\x89\\xa4akX app.bsky.feed.like/3lb5zq2rm3c2qap\\x00at\\xd8*X%\\x00\\x01q\\x12 \\xb1\\x1f \\x12\\x1a\\xfa\\xa6Q\\xb5!\\xb3\\xee\\x80C3m\\xca\"F\\x14\\xcc~\\xdd\\xb8,\\xa6S\\x84\\xc1z!xav\\xd8*X%\\x00\\x01q\\x12 \\x92\\x98\\x83\\xd81i\\x05n\\xe6\\xc9\\x0c\\xaf\\x1d\\x08\\xeb\\x81\\xc8/\\xf1\\x8d/\\xa2;\\xd6f\\xef\\xccx\\xe1\\xef_\\t\\xa4akJkakxfxlg22ap\\x16at\\xd8*X%\\x00\\x01q\\x12 \\xccG\\x85W\\x9e\\xc8\\x8f\\xf3\\x8d\\xa4\\xb6\\xf0\\xdft\\xcd\\x9f.\\x85\\x0fT\\x1a\\x8b\\xe1\\xc0}X\\t\\x99\\x8a\\x8cE\\xb8av\\xd8*X%\\x00\\x01q\\x12 \\x18\\xe0\\x12D\\x16\\xfa\\x99\\x1e#\\xf4\\x1d\\xef\\x82\\xba\\x14\

In [45]:
from atproto import CAR, models, parse_subscribe_repos_message

def on_message_handler(message) -> None:
    commit = parse_subscribe_repos_message(message)

    # Ensure it's a commit message with CAR blocks
    if not isinstance(commit, models.ComAtprotoSyncSubscribeRepos.Commit):
        return

    if not commit.blocks:
        return  # No data to parse

    try:
        car = CAR.from_bytes(commit.blocks)

        for record in car.records:
            print("🔹 Found Record:", record)

            # Handle Posts
            if "app.bsky.feed.post" in str(record):
                post_content = record.get("value", {}).get("text", None)
                author = record.get("repo", None)
                timestamp = record.get("value", {}).get("createdAt", None)

                if post_content:
                    print(f"📝 Post by {author} at {timestamp}: {post_content}")
                else:
                    print("⚠️ No text content found in this post.")

            # Handle Likes
            elif "app.bsky.feed.like" in str(record):
                print("👍 This is a LIKE event!")

                # Extract the referenced post CID
                liked_post_cid = record.get("value", {}).get("subject", {}).get("uri", None)
                if liked_post_cid:
                    print(f"🔗 This like references post: {liked_post_cid}")
                else:
                    print("⚠️ No referenced post found.")

    except Exception as e:
        print(f"🚨 Error processing CAR data: {e}")


AttributeError: 'str' object has no attribute 'get'