<a href="https://colab.research.google.com/github/Ifeanyi55/Bluesky4NodeXL/blob/main/Bluesky4NodeXL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# run once to install Atproto python client
!pip install -q --upgrade atproto

## **Authenticate the App**

In [None]:
Bsky_Username = "" # @param {"type":"string","placeholder":"Your Bluesky username without the \"@\""}
Bsky_Password = "" # @param {"type":"string","placeholder":"Your Bluesky password"}
# authenticate application
from atproto import Client

client = Client()
client.login(Bsky_Username, Bsky_Password)

## **Get Replies Network Data**

Adjust the **URIs_To_Loop** upwards to transform more data from the returned dataset, but at the risk of a longer code runtime, especially if the search query is a trending or viral topic. By default, it starts at 500 URIs.

After the query has finished running, go to the **Files** folder beneath the key icon in the sidebar and download the generated `bsky_replies_network.csv` file.

In [None]:
from google.colab import output

Query = "" # @param {"type":"string","placeholder":"Search query"}
Query = Query.split(",")
Query = [query.strip() for query in Query]

# create modal
def show_alert():
    output.eval_js('alert("Enter one search query term at a time!")')


if len(Query) > 1:
  show_alert()
  raise ValueError("Enter one search query term at a time!")

else:
  Query = Query[0]


URIs_To_Loop = 500 # @param {"type":"slider","min":500,"max":10000,"step":100}

import pandas as pd
import numpy as np

def bskyMasterReplies(query,uris_to_loop):
    # bluesky search
    def bskySearch(query,uris_to_loop):
      # define function to collect all posts containing the search query term
      def get_all_posts(query):
        cursor = None
        posts = []

        while True:
            response = client.app.bsky.feed.search_posts(
                params={'q': query, 'cursor': cursor}
            )
            posts.extend(response.posts)

            cursor = response.cursor
            if not cursor:
                break

        return posts

      # apply function
      all_posts = get_all_posts(query)

      # extract metadata
      handles = [p.author.handle for p in all_posts]
      did = [p.author.did for p in all_posts]
      associated = [p.author.associated for p in all_posts]
      avatar = [p.author.avatar for p in all_posts]
      created_at = [p.author.created_at for p in all_posts]
      display_name = [p.author.display_name for p in all_posts]
      text = [p.record.text for p in all_posts]
      uri = [p.uri for p in all_posts]
      like_count = [p.like_count for p in all_posts]
      reply_count = [p.reply_count for p in all_posts]
      repost_count = [p.repost_count for p in all_posts]
      quote_count = [p.quote_count for p in all_posts]
      py_type = [p.viewer.py_type for p in all_posts]

      # convert into a data frame
      bsky_posts_df = pd.DataFrame(
          {
              "Handles": handles,
              "DID": did,
              "Associated": associated,
              "Avatars": avatar,
              "Created_at": created_at,
              "Display_name": display_name,
              "Text": text,
              "URI": uri,
              "Like_count": like_count,
              "Reply_count": reply_count,
              "Repost_count": repost_count,
              "Quote_count": quote_count,
              "Py_type": py_type
          }
      )

      return bsky_posts_df.head(URIs_To_Loop)

    # get replies network
    def getRepliesNetwork(uri):

      # def postUri(url):
      #   post_split = url.split("/")
      #   return "at://" + post_split[4] + "/" + "app.bsky.feed.post/" + post_split[6]

      # uri = postUri(url)
      response = client.app.bsky.feed.get_post_thread(params={"uri":uri})

      # extract thread metadata
      handles = [r.post.author.handle for r in response.thread.replies]
      did = [r.post.author.did for r in response.thread.replies]
      associated = [r.post.author.associated for r in response.thread.replies]
      avatar = [r.post.author.avatar for r in response.thread.replies]
      created_at = [r.post.author.created_at for r in response.thread.replies]
      reply_at = [r.post.indexed_at for r in response.thread.replies]
      display_name = [r.post.author.display_name for r in response.thread.replies]
      text = [r.post.record.text for r in response.thread.replies]
      uri = [r.post.uri for r in response.thread.replies]
      like_count = [r.post.like_count for r in response.thread.replies]
      reply_count = [r.post.reply_count for r in response.thread.replies]
      repost_count = [r.post.repost_count for r in response.thread.replies]
      quote_count = [r.post.quote_count for r in response.thread.replies]
      py_type = [r.post.viewer.py_type for r in response.thread.replies]
      profile_url = ["https://bsky.app/profile/" + r.post.author.handle for r in response.thread.replies]

      # parse dates
      created_at_parse = [" ".join(created_at[i].split("T")) for i in range(len(created_at))]
      created_at_parse2 = ["".join(created_at_parse[i].split("Z")) for i in range(len(created_at_parse))]

      reply_at_parse = [" ".join(reply_at[i].split("T")) for i in range(len(reply_at))]
      reply_at_parse2 = ["".join(reply_at_parse[i].split("Z")) for i in range(len(reply_at_parse))]

      # get parent post metatdata
      v2_handle = np.repeat(response.thread.post.author.handle,len(handles))
      v2_did = np.repeat(response.thread.post.author.did,len(did))
      v2_associated = np.repeat(response.thread.post.author.associated,len(did))
      v2_avatar = np.repeat(response.thread.post.author.avatar,len(avatar))
      v2_created_at = response.thread.post.author.created_at
      v2_post_at = response.thread.post.indexed_at
      v2_name = np.repeat(response.thread.post.author.display_name,len(display_name))
      v2_text = np.repeat(response.thread.post.record.text,len(text))
      v2_profile_url = np.repeat("https://bsky.app/profile/" + response.thread.post.author.handle,len(profile_url))
      v2_uri = np.repeat(response.thread.post.uri,len(uri))

      # parse dates
      v2_created_at_parse = " ".join(v2_created_at.split("T"))
      v2_created_at_parse2 = "".join(v2_created_at_parse.split("Z"))

      v2_post_at_parse = " ".join(v2_post_at.split("T"))
      v2_post_at_parse2 = "".join(v2_post_at_parse.split("Z"))

      # convert into data frame
      replies_net = pd.DataFrame(
        {
            "Vertex1":display_name,
            "Vertex2": v2_name,
            "Vertex1_Text": text,
            "Vertex1_CreatedAt": created_at_parse2,
            "Vertex1_ReplyAt": reply_at_parse2,
            "Vertex1_Handle": handles,
            "Vertex1_ProfileURL": profile_url,
            "Vertex1_Avatar": avatar,
            "Vertex1_LikeCount": like_count,
            "Vertex1_ReplyCount": reply_count,
            "Vertex1_RepostCount": repost_count,
            "Vertex1_QuoteCount": quote_count,
            "Vertex1_DID": did,
            "Vertex1_Associated": associated,
            "Vertex1_URI": uri,
            "Vertex1_PyType": py_type,
            "Vertex2_Handle": v2_handle,
            "Vertex2_ProfileURL": v2_profile_url,
            "Vertex2_Avatar": v2_avatar,
            "Vertex2_Text": v2_text,
            "Vertex2_Associated": v2_associated,
            "Vertex2_DID": v2_did,
            "Vertex2_CreatedAt": np.repeat(v2_created_at_parse2,len(created_at)),
            "Vertex2_PostAt": np.repeat(v2_post_at_parse2,len(reply_at)),
            "Vertex2_URI": v2_uri
        }
      )

      # save DataFrame to csv
      # output_dir = "output"
      # file_name = "Replies.csv"
      # file_path = os.path.join(output_dir, file_name)
      #
      # # check if output director exists; if not, create it.
      # if not os.path.exists(output_dir):
      #     os.makedirs(output_dir)
      #
      # replies_net.to_csv(file_path,index=False)

      return replies_net

    # bluesky replies network
    def bskyRepliesNetwork(uri):
        if isinstance(uri,list) and len(uri) > 1:
            bsky_replies_net = [r for r in map(getRepliesNetwork, uri)]
            bsky_replies_net_df = pd.concat(bsky_replies_net)
            return bsky_replies_net_df
        else:
            bsky_replies = getRepliesNetwork(uri)
            return bsky_replies

    # bluesky all replies network
    def bskyAllRepliesNet(query,uris_to_loop):

      bskySearch_df = bskySearch(query,uris_to_loop)
      bskyURIs = bskySearch_df["URI"]

      allRepliesNetData = []
      for u in bskyURIs:
        try:
          r = bskyRepliesNetwork(u)
          if not r.empty:
            allRepliesNetData.append(r)
        except Exception as e:
          pass
        # time.sleep(2)
      allRepliesNetData_df = pd.concat(allRepliesNetData)

      # clean dataset of blank cells
      allRepliesNetData_df_clean = allRepliesNetData_df[(allRepliesNetData_df['Vertex1'] != "") & (allRepliesNetData_df['Vertex2'] != "")]

      return allRepliesNetData_df_clean

    replies_network = bskyAllRepliesNet(query,uris_to_loop)

    return replies_network

# get master replies network data
master_replies_network = bskyMasterReplies(Query,URIs_To_Loop)

# save as csv
master_replies_network.to_csv("bsky_replies_network.csv",index=False)


## **Get Followers Network Data**
You can collect the follower network data of more than one Bluesky account. Only make sure to separate the handles by a comma in the search field. After the query has finished running, go to the **Files** folder beneath the key icon in the sidebar and download the generated `bsky_followers_network.csv` file.

In [None]:
Bsky_Handle = "" # @param {"type":"string","placeholder":"Bluesky handle without the \"@\" symbol."}
Bsky_Handle = Bsky_Handle.split(",")
Bsky_Handle = [handle.strip() for handle in Bsky_Handle]
Bsky_Handle = Bsky_Handle[0]

import pandas as pd
import requests
import time

# bluesky followers
def bskyFollowers(handle):

  # DID finder
  def did_finder(handle):
    did = requests.get(f"https://bsky.social/xrpc/com.atproto.identity.resolveHandle?handle={handle}")
    return did.json()["did"]

  # collect all followers of a user
  def get_all_followers(actor_did):
    cursor = None
    followers = []

    while True:
        response = client.app.bsky.graph.get_followers(
            params={'actor': actor_did, 'cursor': cursor}
        )
        followers.extend(response.followers)

        cursor = response.cursor
        if not cursor:
            break

    return followers

  # apply functions
  actor_did = did_finder(handle)
  all_followers = get_all_followers(actor_did)
  actor_profile = client.app.bsky.actor.get_profile(params={"actor": actor_did})

  # extract metadata for Vertex1 and Vertex2
  vertex1_handles = [f.handle for f in all_followers]
  vertex1_did = [f.did for f in all_followers]
  vertex1_avatar = [f.avatar for f in all_followers]
  vertex1_created_at = [f.created_at for f in all_followers]
  vertex1_description = [f.description for f in all_followers]
  vertex1_display_name = [f.display_name for f in all_followers]
  vertex1_indexed_at = [f.indexed_at for f in all_followers]
  vertex1_py_type = [f.py_type for f in all_followers]

  vertex2_handle = actor_profile.handle
  vertex2_did = actor_profile.did
  vertex2_avatar = actor_profile.avatar
  vertex2_created_at = actor_profile.created_at
  vertex2_description = actor_profile.description
  vertex2_display_name = actor_profile.display_name
  vertex2_indexed_at = actor_profile.indexed_at
  vertex2_py_type = actor_profile.py_type

  bsky_follower_network = pd.DataFrame(
    {
        "Vertex1": vertex1_handles,
        "Vertex2": vertex2_handle,
        "Vertex1_did": vertex1_did,
        "Vertex1_avatar": vertex1_avatar,
        "Vertex1_display_name": vertex1_display_name,
        "Vertex1_description": vertex1_description,
        "Vertex1_created_at": vertex1_created_at,
        "Vertex1_indexed_at": vertex1_indexed_at,
        "Vertex1_py_type": vertex1_py_type,
        "Vertex2_did": vertex2_did,
        "Vertex2_avatar": vertex2_avatar,
        "Vertex2_display_name": vertex2_display_name,
        "Vertex2_description": vertex2_description,
        "Vertex2_created_at": vertex2_created_at,
        "Vertex2_indexed_at": vertex2_indexed_at,
        "Vertex2_py_type": vertex2_py_type
    }
  )

  return bsky_follower_network

# bluesky following
# define following function
def bskyFollowing(handle):

  # DID finder
  def did_finder(handle):
    did = requests.get(f"https://bsky.social/xrpc/com.atproto.identity.resolveHandle?handle={handle}")
    return did.json()["did"]

  # collect all following of a user
  def get_all_following(actor_did):
    cursor = None
    following = []

    while True:
        response = client.app.bsky.graph.get_follows(
            params={'actor': actor_did, 'cursor': cursor}
        )
        following.extend(response.follows)

        cursor = response.cursor
        if not cursor:
            break

    return following

  # apply functions
  actor_did = did_finder(handle)
  all_following = get_all_following(actor_did)
  actor_profile = client.app.bsky.actor.get_profile(params={"actor": actor_did})

  # extract metadata for Vertex1 and Vertex2
  vertex1_handle = actor_profile.handle
  vertex1_did = actor_profile.did
  vertex1_avatar = actor_profile.avatar
  vertex1_created_at = actor_profile.created_at
  vertex1_description = actor_profile.description
  vertex1_display_name = actor_profile.display_name
  vertex1_indexed_at = actor_profile.indexed_at
  vertex1_py_type = actor_profile.py_type

  vertex2_handles = [f.handle for f in all_following]
  vertex2_did = [f.did for f in all_following]
  vertex2_avatar = [f.avatar for f in all_following]
  vertex2_created_at = [f.created_at for f in all_following]
  vertex2_description = [f.description for f in all_following]
  vertex2_display_name = [f.display_name for f in all_following]
  vertex2_indexed_at = [f.indexed_at for f in all_following]
  vertex2_py_type = [f.py_type for f in all_following]

  bsky_following_network = pd.DataFrame(
      {
          "Vertex1": vertex1_handle,
          "Vertex2": vertex2_handles,
          "Vertex1_did": vertex1_did,
          "Vertex1_avatar": vertex1_avatar,
          "Vertex1_display_name": vertex1_display_name,
          "Vertex1_description": vertex1_description,
          "Vertex1_created_at": vertex1_created_at,
          "Vertex1_indexed_at": vertex1_indexed_at,
          "Vertex1_py_type": vertex1_py_type,
          "Vertex2_did": vertex2_did,
          "Vertex2_avatar": vertex2_avatar,
          "Vertex2_display_name": vertex2_display_name,
          "Vertex2_description": vertex2_description,
          "Vertex2_created_at": vertex2_created_at,
          "Vertex2_indexed_at": vertex2_indexed_at,
          "Vertex2_py_type": vertex2_py_type
      }
  )

  return bsky_following_network

# bluesky follower-followwing
def bskyFollowerFollowing(handle):

  followers = bskyFollowers(handle)
  following = bskyFollowing(handle)

  bskyCombined = pd.concat([followers,following])

  return bskyCombined

# master follower-following network
def masterFollowerFollowing(handle):
  if isinstance(handle,list) and len(handle) > 1:
    bsky_follower_following_df = []
    for handles in handle:
      bsky_follower_following_df.append(bskyFollowerFollowing(handles))
      time.sleep(1)

    bsky_follower_following_combined = pd.concat(bsky_follower_following_df)

    return bsky_follower_following_combined
  else:
    bsky_follower_following = bskyFollowerFollowing(handle)
    return bsky_follower_following

followers_network = masterFollowerFollowing(handle=Bsky_Handle)

# save as csv
followers_network.to_csv("bsky_followers_network.csv",index=False)