# BlueSky API

In [1]:
!pip install atproto

Collecting atproto
  Downloading atproto-0.0.56-py3-none-any.whl.metadata (14 kB)
Collecting dnspython<3,>=2.4.0 (from atproto)
  Downloading dnspython-2.7.0-py3-none-any.whl.metadata (5.8 kB)
Collecting httpx<0.28.0,>=0.25.0 (from atproto)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting libipld<4,>=2.0.0 (from atproto)
  Downloading libipld-3.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)
Collecting websockets<14,>=12 (from atproto)
  Downloading websockets-13.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)
Downloading atproto-0.0.56-py3-none-any.whl (337 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m337.1/337.1 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dnspython-2.7.0-py3-none-any.whl (313 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m313.6/313.6 kB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
[?

In [None]:
from atproto import Client

BLUESKY_USERNAME = "climatesentiment.bsky.social"
BLUESKY_PASSWORD = "" # ask me on WhatsApp

client = Client()
client.login(BLUESKY_USERNAME, BLUESKY_PASSWORD)

ProfileViewDetailed(did='did:plc:zg3cy4xk2m4zwbxdyuv6brcf', handle='climatesentiment.bsky.social', associated=ProfileAssociated(chat=None, feedgens=0, labeler=False, lists=0, starter_packs=0, py_type='app.bsky.actor.defs#profileAssociated'), avatar='https://cdn.bsky.app/img/avatar/plain/did:plc:zg3cy4xk2m4zwbxdyuv6brcf/bafkreichnafs32hra3ukwiae2ap3ig2xppp2g5cx4nicw7jk2vtful2soe@jpeg', banner=None, created_at='2025-01-05T15:46:16.146Z', description=None, display_name='', followers_count=3, follows_count=1, indexed_at='2025-01-05T15:46:16.146Z', joined_via_starter_pack=None, labels=[], pinned_post=None, posts_count=1, viewer=ViewerState(blocked_by=False, blocking=None, blocking_by_list=None, followed_by=None, following=None, known_followers=None, muted=False, muted_by_list=None, py_type='app.bsky.actor.defs#viewerState'), py_type='app.bsky.actor.defs#profileViewDetailed')

In [None]:
from atproto import Client, client_utils

# Test connection
def main():
    client = Client()
    profile = client.login('climatesentiment.bsky.social', 'Sentiment2025')
    print('Welcome,', profile.handle)

if __name__ == '__main__':
    main()


Welcome, climatesentiment.bsky.social


In the following code, we iterate through 2010-2024 to extract 100 posts (API limit) per month and write the top posts into the .csv file that will serve as input for our sentiment model

In [None]:
from datetime import datetime, timedelta
import csv

# Function for iteration over time period
def generate_months(start_year, start_month, end_year, end_month):
    current_year = start_year
    current_month = start_month

    while (current_year < end_year) or (current_year == end_year and current_month <= end_month):
        since_date = datetime(current_year, current_month, 1)
        until_date = (since_date + timedelta(days=31)).replace(day=1)

        yield since_date, until_date

        # Increment month
        current_month += 1
        if current_month > 12:
            current_month = 1
            current_year += 1

# Parameters
query = 'Global Warming|Climate Crisis|Climate Emergency|Global Heating|Climate Change|globalwarming|climatecrisis|climateemergency|globalheating|climatechange'
limit = 100
output_file = 'bluesky_posts.csv'


# CSV Header
with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
    csvwriter = csv.writer(csvfile)
    csvwriter.writerow(["tweet-id", "username", "date", "text", "like-count"])


# API daily limit was exceeded after collecting ~ 3 years of data, so we ran the following over a period of 4-5 days and combined the files manually.
for since_date, until_date in generate_months(2018, 4, 2024, 12): # dates can and should be modified here.

# Formatting dates
  since_str = since_date.strftime('%Y-%m-%dT%H:%M:%SZ')
  until_str = until_date.strftime('%Y-%m-%dT%H:%M:%SZ')

  # API call
  params = {'q': query, 'limit': limit, 'since': since_str, 'until': until_str, 'sort': 'top'}
  response = client.app.bsky.feed.search_posts(params)

  # Extract & Clean Data
  posts_data = []
  if hasattr(response, 'posts'):
    for post in response.posts:
        tweet_id = post.uri
        date = post.record.created_at.split('T')[0]
        text = post.record.text.replace("\n", " ")
        username = post.author.handle.split('@')[0]

        # Likes Count (individual API call... you can't get just the count)
        likes = client.app.bsky.feed.get_likes({'uri': tweet_id})

        # Prep print
        posts_data.append({
            'tweet_id': tweet_id,
            'username': username,
            'date': date,
            'text': text,
            'like_count': len(likes.likes)
        })

  # console sample print
  for post in posts_data[:5]:
    print(f"{post['tweet_id']},{post['username']},{post['date']},\"{post['text']}\",{post['like_count']}")

  # CSV output
  with open('bluesky_posts.csv', 'a', newline='', encoding='utf-8') as csvfile:
    csvwriter = csv.writer(csvfile)
    for post in posts_data:
      csvwriter.writerow([post['tweet_id'], post['username'], post['date'], post['text'], post['like_count']])

  # Logging
  print(f"Processed data for {since_date.strftime('%B %Y')}.")



RequestException: Response(success=False, status_code=429, content=XrpcError(error='RateLimitExceeded', message='Rate Limit Exceeded'), headers={'x-powered-by': 'Express', 'access-control-allow-origin': '*', 'ratelimit-limit': '3000', 'ratelimit-remaining': '0', 'ratelimit-reset': '1737284387', 'ratelimit-policy': '3000;w=300', 'content-type': 'application/json; charset=utf-8', 'content-length': '61', 'etag': 'W/"3d-egyFOcXCrKQFzoI7522/4+PpxIk"', 'vary': 'Accept-Encoding', 'date': 'Sun, 19 Jan 2025 10:58:59 GMT', 'keep-alive': 'timeout=90', 'strict-transport-security': 'max-age=63072000'})