### Proposals User Data

In [None]:
import requests
import csv

# API URL for fetching user data
user_api_url = "https://forum.arbitrum.foundation/c/proposals/7.json"
# Specify CSV file path for user data
user_csv_file_path = "proposals_users_data.csv"

def fetch_users(api_url, page):
    # Make request to fetch user data for the specified page
    response = requests.get(f"{api_url}?page={page}")
    data = response.json()

    # Extract user data from the response
    users = data["users"] if "users" in data else []
    return users

# Set to store processed user IDs
processed_user_ids = set()

# Write user data to CSV file
with open(user_csv_file_path, mode="w", newline="", encoding="utf-8") as user_csv_file:
    # Create CSV writer for user data
    user_csv_writer = csv.writer(user_csv_file)

    # Write user data header
    user_header = ["User Id", "Username", "Name", "Avtar Template", "Moderator", "Trust Level"]

    user_csv_writer.writerow(user_header)

    # Initialize page number
    page_num = 0

    # Fetch and write user data until there are no more pages
    while True:
        # Fetch user data for the current page
        users = fetch_users(user_api_url, page_num)

        # Break the loop if no users are returned
        if not users:
            break

        # Write user details, avoiding duplicates
        for user_data in users:
            user_id = user_data["id"]

            # Check if the user ID has already been processed
            if user_id not in processed_user_ids:
                # Add the user ID to the set of processed IDs
                processed_user_ids.add(user_id)

                # Initialize user data list with default values
                user_row = [
                    user_id,
                    user_data["username"],
                    user_data["name"],
                    user_data["avatar_template"],
                    user_data.get("moderator", False),
                    user_data["trust_level"],
                ]

                user_csv_writer.writerow(user_row)

        # Move to the next page
        page_num += 1

print(f"User data has been written to {user_csv_file_path}")


### Proposals Topic Data

In [None]:
import requests
import csv
from datetime import datetime

# API URL
api_url = "https://forum.arbitrum.foundation/c/proposals/7.json"

# Specify CSV file path
csv_file_path = "proposals_topics_data.csv"

def fetch_topics(api_url, page):
    # Make request for the specified page
    response = requests.get(f"{api_url}?page={page}")
    data = response.json()

    # Extract topics from the current page
    topics = data["topic_list"]["topics"]
    return topics

# Function to format date and time
def format_datetime(datetime_str):
    # Convert the string to a datetime object
    dt_object = datetime.strptime(datetime_str, "%Y-%m-%dT%H:%M:%S.%fZ")
    # Format the datetime object as a string in the desired format
    return dt_object.strftime("%Y-%m-%d %H:%M:%S")

# Write data to CSV file
with open(csv_file_path, mode="w", newline="", encoding="utf-8") as csv_file:
    # Create CSV writer
    csv_writer = csv.writer(csv_file)

    # Write header
    header = ["Topic ID", "Title", "Fancy Title", "Slug", "Posts Count", "Reply Count", "Highest Post Number",
              "Image URL", "Created At", "Last Posted At", "Views", "Like Count", "Pinned", "Unpinned", "Closed", "Visible",
              "Tags", "Last Poster Username", "Category ID", "posters", "Original Poster ID"]

    csv_writer.writerow(header)

    # Initialize page number
    page_num = 0

    # Fetch and write topics until there are no more topics
    while True:
        topics = fetch_topics(api_url, page_num)

        # Break the loop if no topics are returned
        if not topics:
            break

        # Write topic details
        for topic in topics:
            # Extract the user IDs of the posters, excluding the original poster
            user_ids_posters = [poster["user_id"] for poster in topic["posters"] if poster["user_id"] != topic["posters"][0]["user_id"]]

            # Initialize data list with default values
            data = [
                topic["id"],
                topic["title"],
                topic["fancy_title"],
                topic["slug"],
                topic["posts_count"],
                topic["reply_count"],
                topic["highest_post_number"],
                topic["image_url"],
                format_datetime(topic["created_at"]),  # Format Created At
                format_datetime(topic["last_posted_at"]),  # Format Last Posted At
                topic["views"],
                topic["like_count"],
                topic["pinned"],
                topic["unpinned"],
                topic["closed"],
                topic["visible"],
                topic["tags"],
                topic["last_poster_username"],
                topic["category_id"],
                topic["posters"],
                topic["posters"][0]["user_id"] if topic["posters"] else None,
            ]

            csv_writer.writerow(data)

        # Move to the next page
        page_num += 1

print(f"Data has been written to {csv_file_path}")


### Proposals Post Data (Post available on each topics)

In [None]:
import requests
import csv
from bs4 import BeautifulSoup
from datetime import datetime

# Function to fetch and store data from a specific API endpoint
def fetch_and_store_data(api_url, post_number):
    url = f"{api_url}{post_number}.json"
    response = requests.get(url)

    if response.status_code == 200:
        data = response.json()
        if "post_stream" in data and "posts" in data["post_stream"]:
            posts = data["post_stream"]["posts"]

            # Remove duplicates based on post_number
            posts = [post for post in posts if post["post_number"] > post_number]

            if posts:
                # Write data to CSV file
                with open("proposals_posts_data.csv", "a", newline="", encoding="utf-8") as csv_file:
                    fieldnames = ["Topic ID", "Username", "Post Created At", "Post Description", "Post Number"]
                    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

                    # Write headers if the file is empty
                    if csv_file.tell() == 0:
                        writer.writeheader()

                    # Write posts data
                    for post in posts:
                        # Use BeautifulSoup to extract text from HTML
                        soup = BeautifulSoup(post["cooked"], "html.parser")
                        cleaned_text = soup.get_text()

                        # Format date
                        formatted_date = datetime.strptime(post["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ").strftime("%Y-%m-%d %H:%M:%S")

                        writer.writerow({
                            "Topic ID": post["topic_id"],
                            "Username": post["username"],
                            "Post Created At": formatted_date,
                            "Post Description": cleaned_text,
                            "Post Number": post["post_number"]
                        })

                print(f"Data from {url} successfully fetched and stored.")
                return posts[-1]["post_number"] + 1  # Increment post_number based on the last post number
            else:
                print("No new posts found.")
                return None
        else:
            print("Error in response data.")
            return None
    else:
        print(f"Error fetching data from {url}. Status code: {response.status_code}")
        return None

def main():
    # Specify API URL
    api_url = "https://forum.arbitrum.foundation/t/"

    # Read Topic IDs from the CSV file generated by ProposalsTopicSummary.py
    with open("proposals_topics_data.csv", "r", encoding="utf-8") as csv_file:
        reader = csv.DictReader(csv_file)
        for row in reader:
            # Fetch and store data for each Topic ID
            topic_id = row["Topic ID"]
            post_number = 0

            while True:
                last_post_number = fetch_and_store_data(f"{api_url}{topic_id}/", post_number)

                if last_post_number is not None:
                    post_number = last_post_number
                else:
                    break

    print("All data successfully fetched and stored in proposals_post_data.csv.")

if __name__ == "__main__":
    main()


### Dao-Grant_Program User Data

In [None]:
import requests
import csv

# API URL for fetching user data
user_api_url = "https://forum.arbitrum.foundation/c/proposals/16.json"
# Specify CSV file path for user data
user_csv_file_path = "dgp_users_data.csv"

def fetch_users(api_url, page):
    # Make request to fetch user data for the specified page
    response = requests.get(f"{api_url}?page={page}")
    data = response.json()

    # Extract user data from the response
    users = data["users"] if "users" in data else []
    return users

# Set to store processed user IDs
processed_user_ids = set()

# Write user data to CSV file
with open(user_csv_file_path, mode="w", newline="", encoding="utf-8") as user_csv_file:
    # Create CSV writer for user data
    user_csv_writer = csv.writer(user_csv_file)

    # Write user data header
    user_header = ["User Id", "Username", "Name", "Avtar Template", "Moderator", "Trust Level"]

    user_csv_writer.writerow(user_header)

    # Initialize page number
    page_num = 0

    # Fetch and write user data until there are no more pages
    while True:
        # Fetch user data for the current page
        users = fetch_users(user_api_url, page_num)

        # Break the loop if no users are returned
        if not users:
            break

        # Write user details, avoiding duplicates
        for user_data in users:
            user_id = user_data["id"]

            # Check if the user ID has already been processed
            if user_id not in processed_user_ids:
                # Add the user ID to the set of processed IDs
                processed_user_ids.add(user_id)

                # Initialize user data list with default values
                user_row = [
                    user_id,
                    user_data["username"],
                    user_data["name"],
                    user_data["avatar_template"],
                    user_data.get("moderator", False),
                    user_data["trust_level"],
                ]

                user_csv_writer.writerow(user_row)

        # Move to the next page
        page_num += 1

print(f"User data has been written to {user_csv_file_path}")


### Dao Grant Program Topic Data

In [None]:
import requests
import csv
from datetime import datetime

# API URL
api_url = "https://forum.arbitrum.foundation/c/proposals/16.json"

# Specify CSV file path
csv_file_path = "dgp_topics_data.csv"

def fetch_topics(api_url, page):
    # Make request for the specified page
    response = requests.get(f"{api_url}?page={page}")
    data = response.json()

    # Extract topics from the current page
    topics = data["topic_list"]["topics"]
    return topics

# Function to format date and time
def format_datetime(datetime_str):
    if datetime_str:
        # Convert the string to a datetime object
        dt_object = datetime.strptime(datetime_str, "%Y-%m-%dT%H:%M:%S.%fZ")
        # Format the datetime object as a string in the desired format
        return dt_object.strftime("%Y-%m-%d %H:%M:%S")
    else:
        return None
        
# Write data to CSV file
with open(csv_file_path, mode="w", newline="", encoding="utf-8") as csv_file:
    # Create CSV writer
    csv_writer = csv.writer(csv_file)

    # Write header
    header = ["Topic ID", "Title", "Fancy Title", "Slug", "Posts Count", "Reply Count", "Highest Post Number",
              "Image URL", "Created At", "Last Posted At", "Views", "Like Count", "Pinned", "Unpinned", "Closed", "Visible",
              "Tags", "Last Poster Username", "Category ID", "posters", "Original Poster ID"]

    csv_writer.writerow(header)

    # Initialize page number
    page_num = 0

    # Fetch and write topics until there are no more topics
    while True:
        topics = fetch_topics(api_url, page_num)

        # Break the loop if no topics are returned
        if not topics:
            break

        # Write topic details
        for topic in topics:
            # Extract the user IDs of the posters, excluding the original poster
            user_ids_posters = [poster["user_id"] for poster in topic["posters"] if poster["user_id"] != topic["posters"][0]["user_id"]]

            # Initialize data list with default values
            data = [
                topic["id"],
                topic["title"],
                topic["fancy_title"],
                topic["slug"],
                topic["posts_count"],
                topic["reply_count"],
                topic["highest_post_number"],
                topic["image_url"],
                format_datetime(topic["created_at"]),  # Format Created At
                format_datetime(topic["last_posted_at"]),  # Format Last Posted At
                topic["views"],
                topic["like_count"],
                topic["pinned"],
                topic["unpinned"],
                topic["closed"],
                topic["visible"],
                topic["tags"],
                topic["last_poster_username"],
                topic["category_id"],
                topic["posters"],
                topic["posters"][0]["user_id"] if topic["posters"] else None,
            ]

            csv_writer.writerow(data)

        # Move to the next page
        page_num += 1

print(f"Data has been written to {csv_file_path}")


### Dao Grant Program Post Data (Pots available on each topics)

In [None]:
import requests
import csv
from bs4 import BeautifulSoup
from datetime import datetime

# Function to fetch and store data from a specific API endpoint
def fetch_and_store_data(api_url, post_number):
    url = f"{api_url}{post_number}.json"
    response = requests.get(url)

    if response.status_code == 200:
        data = response.json()
        if "post_stream" in data and "posts" in data["post_stream"]:
            posts = data["post_stream"]["posts"]

            # Remove duplicates based on post_number
            posts = [post for post in posts if post["post_number"] > post_number]

            if posts:
                # Write data to CSV file
                with open("dgp_posts_data.csv", "a", newline="", encoding="utf-8") as csv_file:
                    fieldnames = ["Topic ID", "Username", "Post Created At", "Post Description", "Post Number"]
                    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

                    # Write headers if the file is empty
                    if csv_file.tell() == 0:
                        writer.writeheader()

                    # Write posts data
                    for post in posts:
                        # Use BeautifulSoup to extract text from HTML
                        soup = BeautifulSoup(post["cooked"], "html.parser")
                        cleaned_text = soup.get_text()

                        # Format date
                        formatted_date = datetime.strptime(post["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ").strftime("%Y-%m-%d %H:%M:%S")

                        writer.writerow({
                            "Topic ID": post["topic_id"],
                            "Username": post["username"],
                            "Post Created At": formatted_date,
                            "Post Description": cleaned_text,
                            "Post Number": post["post_number"]
                        })

                print(f"Data from {url} successfully fetched and stored.")
                return posts[-1]["post_number"] + 1  # Increment post_number based on the last post number
            else:
                print("No new posts found.")
                return None
        else:
            print("Error in response data.")
            return None
    else:
        print(f"Error fetching data from {url}. Status code: {response.status_code}")
        return None

def main():
    # Specify API URL
    api_url = "https://forum.arbitrum.foundation/t/"

    # Read Topic IDs from the CSV file generated by ProposalsTopicSummary.py
    with open("dgp_topics_data.csv", "r", encoding="utf-8") as csv_file:
        reader = csv.DictReader(csv_file)
        for row in reader:
            # Fetch and store data for each Topic ID
            topic_id = row["Topic ID"]
            post_number = 0

            while True:
                last_post_number = fetch_and_store_data(f"{api_url}{topic_id}/", post_number)

                if last_post_number is not None:
                    post_number = last_post_number
                else:
                    break

    print("All data successfully fetched and stored in proposals_post_data.csv.")

if __name__ == "__main__":
    main()


### Grant Discussion User Data

In [None]:
import requests
import csv

# API URL for fetching user data
user_api_url = "https://forum.arbitrum.foundation/c/proposals/11.json"
# Specify CSV file path for user data
user_csv_file_path = "gd_users_data.csv"

def fetch_users(api_url, page):
    # Make request to fetch user data for the specified page
    response = requests.get(f"{api_url}?page={page}")
    data = response.json()

    # Extract user data from the response
    users = data["users"] if "users" in data else []
    return users

# Set to store processed user IDs
processed_user_ids = set()

# Write user data to CSV file
with open(user_csv_file_path, mode="w", newline="", encoding="utf-8") as user_csv_file:
    # Create CSV writer for user data
    user_csv_writer = csv.writer(user_csv_file)

    # Write user data header
    user_header = ["User Id", "Username", "Name", "Avtar Template", "Moderator", "Trust Level"]

    user_csv_writer.writerow(user_header)

    # Initialize page number
    page_num = 0

    # Fetch and write user data until there are no more pages
    while True:
        # Fetch user data for the current page
        users = fetch_users(user_api_url, page_num)

        # Break the loop if no users are returned
        if not users:
            break

        # Write user details, avoiding duplicates
        for user_data in users:
            user_id = user_data["id"]

            # Check if the user ID has already been processed
            if user_id not in processed_user_ids:
                # Add the user ID to the set of processed IDs
                processed_user_ids.add(user_id)

                # Initialize user data list with default values
                user_row = [
                    user_id,
                    user_data["username"],
                    user_data["name"],
                    user_data["avatar_template"],
                    user_data.get("moderator", False),
                    user_data["trust_level"],
                ]

                user_csv_writer.writerow(user_row)

        # Move to the next page
        page_num += 1

print(f"User data has been written to {user_csv_file_path}")


### Grant Discussion Topics Data

In [None]:
import requests
import csv
from datetime import datetime

# API URL
api_url = "https://forum.arbitrum.foundation/c/proposals/11.json"

# Specify CSV file path
csv_file_path = "gd_topics_data.csv"

def fetch_topics(api_url, page):
    # Make request for the specified page
    response = requests.get(f"{api_url}?page={page}")
    data = response.json()

    # Extract topics from the current page
    topics = data["topic_list"]["topics"]
    return topics

# Function to format date and time
def format_datetime(datetime_str):
    # Convert the string to a datetime object
    dt_object = datetime.strptime(datetime_str, "%Y-%m-%dT%H:%M:%S.%fZ")
    # Format the datetime object as a string in the desired format
    return dt_object.strftime("%Y-%m-%d %H:%M:%S")

# Write data to CSV file
with open(csv_file_path, mode="w", newline="", encoding="utf-8") as csv_file:
    # Create CSV writer
    csv_writer = csv.writer(csv_file)

    # Write header
    header = ["Topic ID", "Title", "Fancy Title", "Slug", "Posts Count", "Reply Count", "Highest Post Number",
              "Image URL", "Created At", "Last Posted At", "Views", "Like Count", "Pinned", "Unpinned", "Closed", "Visible",
              "Tags", "Last Poster Username", "Category ID", "posters", "Original Poster ID"]

    csv_writer.writerow(header)

    # Initialize page number
    page_num = 0

    # Fetch and write topics until there are no more topics
    while True:
        topics = fetch_topics(api_url, page_num)

        # Break the loop if no topics are returned
        if not topics:
            break

        # Write topic details
        for topic in topics:
            # Extract the user IDs of the posters, excluding the original poster
            user_ids_posters = [poster["user_id"] for poster in topic["posters"] if poster["user_id"] != topic["posters"][0]["user_id"]]

            # Initialize data list with default values
            data = [
                topic["id"],
                topic["title"],
                topic["fancy_title"],
                topic["slug"],
                topic["posts_count"],
                topic["reply_count"],
                topic["highest_post_number"],
                topic["image_url"],
                format_datetime(topic["created_at"]),  # Format Created At
                format_datetime(topic["last_posted_at"]),  # Format Last Posted At
                topic["views"],
                topic["like_count"],
                topic["pinned"],
                topic["unpinned"],
                topic["closed"],
                topic["visible"],
                topic["tags"],
                topic["last_poster_username"],
                topic["category_id"],
                topic["posters"],
                topic["posters"][0]["user_id"] if topic["posters"] else None,
            ]

            csv_writer.writerow(data)

        # Move to the next page
        page_num += 1

print(f"Data has been written to {csv_file_path}")


### Grant Discussion Posts Data

In [None]:
import requests
import csv
from bs4 import BeautifulSoup
from datetime import datetime

# Function to fetch and store data from a specific API endpoint
def fetch_and_store_data(api_url, post_number):
    url = f"{api_url}{post_number}.json"
    response = requests.get(url)

    if response.status_code == 200:
        data = response.json()
        if "post_stream" in data and "posts" in data["post_stream"]:
            posts = data["post_stream"]["posts"]

            # Remove duplicates based on post_number
            posts = [post for post in posts if post["post_number"] > post_number]

            if posts:
                # Write data to CSV file
                with open("gd_posts_data.csv", "a", newline="", encoding="utf-8") as csv_file:
                    fieldnames = ["Topic ID", "Username", "Post Created At", "Post Description", "Post Number"]
                    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

                    # Write headers if the file is empty
                    if csv_file.tell() == 0:
                        writer.writeheader()

                    # Write posts data
                    for post in posts:
                        # Use BeautifulSoup to extract text from HTML
                        soup = BeautifulSoup(post["cooked"], "html.parser")
                        cleaned_text = soup.get_text()

                        # Format date
                        formatted_date = datetime.strptime(post["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ").strftime("%Y-%m-%d %H:%M:%S")

                        writer.writerow({
                            "Topic ID": post["topic_id"],
                            "Username": post["username"],
                            "Post Created At": formatted_date,
                            "Post Description": cleaned_text,
                            "Post Number": post["post_number"]
                        })

                print(f"Data from {url} successfully fetched and stored.")
                return posts[-1]["post_number"] + 1  # Increment post_number based on the last post number
            else:
                print("No new posts found.")
                return None
        else:
            print("Error in response data.")
            return None
    else:
        print(f"Error fetching data from {url}. Status code: {response.status_code}")
        return None

def main():
    # Specify API URL
    api_url = "https://forum.arbitrum.foundation/t/"

    # Read Topic IDs from the CSV file generated by ProposalsTopicSummary.py
    with open("gd_topics_data.csv", "r", encoding="utf-8") as csv_file:
        reader = csv.DictReader(csv_file)
        for row in reader:
            # Fetch and store data for each Topic ID
            topic_id = row["Topic ID"]
            post_number = 0

            while True:
                last_post_number = fetch_and_store_data(f"{api_url}{topic_id}/", post_number)

                if last_post_number is not None:
                    post_number = last_post_number
                else:
                    break

    print("All data successfully fetched and stored in proposals_post_data.csv.")

if __name__ == "__main__":
    main()


### Governance User Data

In [None]:
import requests
import csv

# API URL for fetching user data
user_api_url = "https://forum.arbitrum.foundation/c/proposals/6.json"
# Specify CSV file path for user data
user_csv_file_path = "governance_users_data.csv"

def fetch_users(api_url, page):
    # Make request to fetch user data for the specified page
    response = requests.get(f"{api_url}?page={page}")
    data = response.json()

    # Extract user data from the response
    users = data["users"] if "users" in data else []
    return users

# Set to store processed user IDs
processed_user_ids = set()

# Write user data to CSV file
with open(user_csv_file_path, mode="w", newline="", encoding="utf-8") as user_csv_file:
    # Create CSV writer for user data
    user_csv_writer = csv.writer(user_csv_file)

    # Write user data header
    user_header = ["User Id", "Username", "Name", "Avtar Template", "Moderator", "Trust Level"]

    user_csv_writer.writerow(user_header)

    # Initialize page number
    page_num = 0

    # Fetch and write user data until there are no more pages
    while True:
        # Fetch user data for the current page
        users = fetch_users(user_api_url, page_num)

        # Break the loop if no users are returned
        if not users:
            break

        # Write user details, avoiding duplicates
        for user_data in users:
            user_id = user_data["id"]

            # Check if the user ID has already been processed
            if user_id not in processed_user_ids:
                # Add the user ID to the set of processed IDs
                processed_user_ids.add(user_id)

                # Initialize user data list with default values
                user_row = [
                    user_id,
                    user_data["username"],
                    user_data["name"],
                    user_data["avatar_template"],
                    user_data.get("moderator", False),
                    user_data["trust_level"],
                ]

                user_csv_writer.writerow(user_row)

        # Move to the next page
        page_num += 1

print(f"User data has been written to {user_csv_file_path}")


### Governance Topics Data

In [None]:
import requests
import csv
from datetime import datetime

# API URL
api_url = "https://forum.arbitrum.foundation/c/proposals/6.json"

# Specify CSV file path
csv_file_path = "governance_topics_data.csv"

def fetch_topics(api_url, page):
    # Make request for the specified page
    response = requests.get(f"{api_url}?page={page}")
    data = response.json()

    # Extract topics from the current page
    topics = data["topic_list"]["topics"]
    return topics

# Function to format date and time
def format_datetime(datetime_str):
    # Convert the string to a datetime object
    dt_object = datetime.strptime(datetime_str, "%Y-%m-%dT%H:%M:%S.%fZ")
    # Format the datetime object as a string in the desired format
    return dt_object.strftime("%Y-%m-%d %H:%M:%S")

# Write data to CSV file
with open(csv_file_path, mode="w", newline="", encoding="utf-8") as csv_file:
    # Create CSV writer
    csv_writer = csv.writer(csv_file)

    # Write header
    header = ["Topic ID", "Title", "Fancy Title", "Slug", "Posts Count", "Reply Count", "Highest Post Number",
              "Image URL", "Created At", "Last Posted At", "Views", "Like Count", "Pinned", "Unpinned", "Closed", "Visible",
              "Tags", "Last Poster Username", "Category ID", "posters", "Original Poster ID"]

    csv_writer.writerow(header)

    # Initialize page number
    page_num = 0

    # Fetch and write topics until there are no more topics
    while True:
        topics = fetch_topics(api_url, page_num)

        # Break the loop if no topics are returned
        if not topics:
            break

        # Write topic details
        for topic in topics:
            # Extract the user IDs of the posters, excluding the original poster
            user_ids_posters = [poster["user_id"] for poster in topic["posters"] if poster["user_id"] != topic["posters"][0]["user_id"]]

            # Initialize data list with default values
            data = [
                topic["id"],
                topic["title"],
                topic["fancy_title"],
                topic["slug"],
                topic["posts_count"],
                topic["reply_count"],
                topic["highest_post_number"],
                topic["image_url"],
                format_datetime(topic["created_at"]),  # Format Created At
                format_datetime(topic["last_posted_at"]),  # Format Last Posted At
                topic["views"],
                topic["like_count"],
                topic["pinned"],
                topic["unpinned"],
                topic["closed"],
                topic["visible"],
                topic["tags"],
                topic["last_poster_username"],
                topic["category_id"],
                topic["posters"],
                topic["posters"][0]["user_id"] if topic["posters"] else None,
            ]

            csv_writer.writerow(data)

        # Move to the next page
        page_num += 1

print(f"Data has been written to {csv_file_path}")


### Governance Posts Data

In [None]:
import requests
import csv
from bs4 import BeautifulSoup
from datetime import datetime

# Function to fetch and store data from a specific API endpoint
def fetch_and_store_data(api_url, post_number):
    url = f"{api_url}{post_number}.json"
    response = requests.get(url)

    if response.status_code == 200:
        data = response.json()
        if "post_stream" in data and "posts" in data["post_stream"]:
            posts = data["post_stream"]["posts"]

            # Remove duplicates based on post_number
            posts = [post for post in posts if post["post_number"] > post_number]

            if posts:
                # Write data to CSV file
                with open("governance_posts_data.csv", "a", newline="", encoding="utf-8") as csv_file:
                    fieldnames = ["Topic ID", "Username", "Post Created At", "Post Description", "Post Number"]
                    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

                    # Write headers if the file is empty
                    if csv_file.tell() == 0:
                        writer.writeheader()

                    # Write posts data
                    for post in posts:
                        # Use BeautifulSoup to extract text from HTML
                        soup = BeautifulSoup(post["cooked"], "html.parser")
                        cleaned_text = soup.get_text()

                        # Format date
                        formatted_date = datetime.strptime(post["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ").strftime("%Y-%m-%d %H:%M:%S")

                        writer.writerow({
                            "Topic ID": post["topic_id"],
                            "Username": post["username"],
                            "Post Created At": formatted_date,
                            "Post Description": cleaned_text,
                            "Post Number": post["post_number"]
                        })

                print(f"Data from {url} successfully fetched and stored.")
                return posts[-1]["post_number"] + 1  # Increment post_number based on the last post number
            else:
                print("No new posts found.")
                return None
        else:
            print("Error in response data.")
            return None
    else:
        print(f"Error fetching data from {url}. Status code: {response.status_code}")
        return None

def main():
    # Specify API URL
    api_url = "https://forum.arbitrum.foundation/t/"

    # Read Topic IDs from the CSV file generated by ProposalsTopicSummary.py
    with open("governance_topics_data.csv", "r", encoding="utf-8") as csv_file:
        reader = csv.DictReader(csv_file)
        for row in reader:
            # Fetch and store data for each Topic ID
            topic_id = row["Topic ID"]
            post_number = 0

            while True:
                last_post_number = fetch_and_store_data(f"{api_url}{topic_id}/", post_number)

                if last_post_number is not None:
                    post_number = last_post_number
                else:
                    break

    print("All data successfully fetched and stored in proposals_post_data.csv.")

if __name__ == "__main__":
    main()


### Ground Rules User Data

In [None]:
import requests
import csv

# API URL for fetching user data
user_api_url = "https://forum.arbitrum.foundation/c/proposals/10.json"
# Specify CSV file path for user data
user_csv_file_path = "gr_users_data.csv"

def fetch_users(api_url, page):
    # Make request to fetch user data for the specified page
    response = requests.get(f"{api_url}?page={page}")
    data = response.json()

    # Extract user data from the response
    users = data["users"] if "users" in data else []
    return users

# Set to store processed user IDs
processed_user_ids = set()

# Write user data to CSV file
with open(user_csv_file_path, mode="w", newline="", encoding="utf-8") as user_csv_file:
    # Create CSV writer for user data
    user_csv_writer = csv.writer(user_csv_file)

    # Write user data header
    user_header = ["User Id", "Username", "Name", "Avtar Template", "Moderator", "Trust Level"]

    user_csv_writer.writerow(user_header)

    # Initialize page number
    page_num = 0

    # Fetch and write user data until there are no more pages
    while True:
        # Fetch user data for the current page
        users = fetch_users(user_api_url, page_num)

        # Break the loop if no users are returned
        if not users:
            break

        # Write user details, avoiding duplicates
        for user_data in users:
            user_id = user_data["id"]

            # Check if the user ID has already been processed
            if user_id not in processed_user_ids:
                # Add the user ID to the set of processed IDs
                processed_user_ids.add(user_id)

                # Initialize user data list with default values
                user_row = [
                    user_id,
                    user_data["username"],
                    user_data["name"],
                    user_data["avatar_template"],
                    user_data.get("moderator", False),
                    user_data["trust_level"],
                ]

                user_csv_writer.writerow(user_row)

        # Move to the next page
        page_num += 1

print(f"User data has been written to {user_csv_file_path}")


### Ground Rules Topics Data

In [None]:
import requests
import csv
from datetime import datetime

# API URL
api_url = "https://forum.arbitrum.foundation/c/proposals/10.json"

# Specify CSV file path
csv_file_path = "gr_topics_data.csv"

def fetch_topics(api_url, page):
    # Make request for the specified page
    response = requests.get(f"{api_url}?page={page}")
    data = response.json()

    # Extract topics from the current page
    topics = data["topic_list"]["topics"]
    return topics

# Function to format date and time
def format_datetime(datetime_str):
    # Convert the string to a datetime object
    dt_object = datetime.strptime(datetime_str, "%Y-%m-%dT%H:%M:%S.%fZ")
    # Format the datetime object as a string in the desired format
    return dt_object.strftime("%Y-%m-%d %H:%M:%S")

# Write data to CSV file
with open(csv_file_path, mode="w", newline="", encoding="utf-8") as csv_file:
    # Create CSV writer
    csv_writer = csv.writer(csv_file)

    # Write header
    header = ["Topic ID", "Title", "Fancy Title", "Slug", "Posts Count", "Reply Count", "Highest Post Number",
              "Image URL", "Created At", "Last Posted At", "Views", "Like Count", "Pinned", "Unpinned", "Closed", "Visible",
              "Tags", "Last Poster Username", "Category ID", "posters", "Original Poster ID"]

    csv_writer.writerow(header)

    # Initialize page number
    page_num = 0

    # Fetch and write topics until there are no more topics
    while True:
        topics = fetch_topics(api_url, page_num)

        # Break the loop if no topics are returned
        if not topics:
            break

        # Write topic details
        for topic in topics:
            # Extract the user IDs of the posters, excluding the original poster
            user_ids_posters = [poster["user_id"] for poster in topic["posters"] if poster["user_id"] != topic["posters"][0]["user_id"]]

            # Initialize data list with default values
            data = [
                topic["id"],
                topic["title"],
                topic["fancy_title"],
                topic["slug"],
                topic["posts_count"],
                topic["reply_count"],
                topic["highest_post_number"],
                topic["image_url"],
                format_datetime(topic["created_at"]),  # Format Created At
                format_datetime(topic["last_posted_at"]),  # Format Last Posted At
                topic["views"],
                topic["like_count"],
                topic["pinned"],
                topic["unpinned"],
                topic["closed"],
                topic["visible"],
                topic["tags"],
                topic["last_poster_username"],
                topic["category_id"],
                topic["posters"],
                topic["posters"][0]["user_id"] if topic["posters"] else None,
            ]

            csv_writer.writerow(data)

        # Move to the next page
        page_num += 1

print(f"Data has been written to {csv_file_path}")


### Ground Rules Posts Data

In [None]:
import requests
import csv
from bs4 import BeautifulSoup
from datetime import datetime

# Function to fetch and store data from a specific API endpoint
def fetch_and_store_data(api_url, post_number):
    url = f"{api_url}{post_number}.json"
    response = requests.get(url)

    if response.status_code == 200:
        data = response.json()
        if "post_stream" in data and "posts" in data["post_stream"]:
            posts = data["post_stream"]["posts"]

            # Remove duplicates based on post_number
            posts = [post for post in posts if post["post_number"] > post_number]

            if posts:
                # Write data to CSV file
                with open("gr_posts_data.csv", "a", newline="", encoding="utf-8") as csv_file:
                    fieldnames = ["Topic ID", "Username", "Post Created At", "Post Description", "Post Number"]
                    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

                    # Write headers if the file is empty
                    if csv_file.tell() == 0:
                        writer.writeheader()

                    # Write posts data
                    for post in posts:
                        # Use BeautifulSoup to extract text from HTML
                        soup = BeautifulSoup(post["cooked"], "html.parser")
                        cleaned_text = soup.get_text()

                        # Format date
                        formatted_date = datetime.strptime(post["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ").strftime("%Y-%m-%d %H:%M:%S")

                        writer.writerow({
                            "Topic ID": post["topic_id"],
                            "Username": post["username"],
                            "Post Created At": formatted_date,
                            "Post Description": cleaned_text,
                            "Post Number": post["post_number"]
                        })

                print(f"Data from {url} successfully fetched and stored.")
                return posts[-1]["post_number"] + 1  # Increment post_number based on the last post number
            else:
                print("No new posts found.")
                return None
        else:
            print("Error in response data.")
            return None
    else:
        print(f"Error fetching data from {url}. Status code: {response.status_code}")
        return None

def main():
    # Specify API URL
    api_url = "https://forum.arbitrum.foundation/t/"

    # Read Topic IDs from the CSV file generated by ProposalsTopicSummary.py
    with open("gr_topics_data.csv", "r", encoding="utf-8") as csv_file:
        reader = csv.DictReader(csv_file)
        for row in reader:
            # Fetch and store data for each Topic ID
            topic_id = row["Topic ID"]
            post_number = 0

            while True:
                last_post_number = fetch_and_store_data(f"{api_url}{topic_id}/", post_number)

                if last_post_number is not None:
                    post_number = last_post_number
                else:
                    break

    print("All data successfully fetched and stored in proposals_post_data.csv.")

if __name__ == "__main__":
    main()


### Security Council Electoion User Data

In [None]:
import requests
import csv

# API URL for fetching user data
user_api_url = "https://forum.arbitrum.foundation/c/proposals/12.json"
# Specify CSV file path for user data
user_csv_file_path = "sce_users_data.csv"

def fetch_users(api_url, page):
    # Make request to fetch user data for the specified page
    response = requests.get(f"{api_url}?page={page}")
    data = response.json()

    # Extract user data from the response
    users = data["users"] if "users" in data else []
    return users

# Set to store processed user IDs
processed_user_ids = set()

# Write user data to CSV file
with open(user_csv_file_path, mode="w", newline="", encoding="utf-8") as user_csv_file:
    # Create CSV writer for user data
    user_csv_writer = csv.writer(user_csv_file)

    # Write user data header
    user_header = ["User Id", "Username", "Name", "Avtar Template", "Moderator", "Trust Level"]

    user_csv_writer.writerow(user_header)

    # Initialize page number
    page_num = 0

    # Fetch and write user data until there are no more pages
    while True:
        # Fetch user data for the current page
        users = fetch_users(user_api_url, page_num)

        # Break the loop if no users are returned
        if not users:
            break

        # Write user details, avoiding duplicates
        for user_data in users:
            user_id = user_data["id"]

            # Check if the user ID has already been processed
            if user_id not in processed_user_ids:
                # Add the user ID to the set of processed IDs
                processed_user_ids.add(user_id)

                # Initialize user data list with default values
                user_row = [
                    user_id,
                    user_data["username"],
                    user_data["name"],
                    user_data["avatar_template"],
                    user_data.get("moderator", False),
                    user_data["trust_level"],
                ]

                user_csv_writer.writerow(user_row)

        # Move to the next page
        page_num += 1

print(f"User data has been written to {user_csv_file_path}")


### Security Council Electoion Topics Data

In [None]:
import requests
import csv
from datetime import datetime

# API URL
api_url = "https://forum.arbitrum.foundation/c/proposals/12.json"

# Specify CSV file path
csv_file_path = "sce_topics_data.csv"

def fetch_topics(api_url, page):
    # Make request for the specified page
    response = requests.get(f"{api_url}?page={page}")
    data = response.json()

    # Extract topics from the current page
    topics = data["topic_list"]["topics"]
    return topics

# Function to format date and time
def format_datetime(datetime_str):
    # Convert the string to a datetime object
    dt_object = datetime.strptime(datetime_str, "%Y-%m-%dT%H:%M:%S.%fZ")
    # Format the datetime object as a string in the desired format
    return dt_object.strftime("%Y-%m-%d %H:%M:%S")

# Write data to CSV file
with open(csv_file_path, mode="w", newline="", encoding="utf-8") as csv_file:
    # Create CSV writer
    csv_writer = csv.writer(csv_file)

    # Write header
    header = ["Topic ID", "Title", "Fancy Title", "Slug", "Posts Count", "Reply Count", "Highest Post Number",
              "Image URL", "Created At", "Last Posted At", "Views", "Like Count", "Pinned", "Unpinned", "Closed", "Visible",
              "Tags", "Last Poster Username", "Category ID", "posters", "Original Poster ID"]

    csv_writer.writerow(header)

    # Initialize page number
    page_num = 0

    # Fetch and write topics until there are no more topics
    while True:
        topics = fetch_topics(api_url, page_num)

        # Break the loop if no topics are returned
        if not topics:
            break

        # Write topic details
        for topic in topics:
            # Extract the user IDs of the posters, excluding the original poster
            user_ids_posters = [poster["user_id"] for poster in topic["posters"] if poster["user_id"] != topic["posters"][0]["user_id"]]

            # Initialize data list with default values
            data = [
                topic["id"],
                topic["title"],
                topic["fancy_title"],
                topic["slug"],
                topic["posts_count"],
                topic["reply_count"],
                topic["highest_post_number"],
                topic["image_url"],
                format_datetime(topic["created_at"]),  # Format Created At
                format_datetime(topic["last_posted_at"]),  # Format Last Posted At
                topic["views"],
                topic["like_count"],
                topic["pinned"],
                topic["unpinned"],
                topic["closed"],
                topic["visible"],
                topic["tags"],
                topic["last_poster_username"],
                topic["category_id"],
                topic["posters"],
                topic["posters"][0]["user_id"] if topic["posters"] else None,
            ]

            csv_writer.writerow(data)

        # Move to the next page
        page_num += 1

print(f"Data has been written to {csv_file_path}")


### Security Council Electoion Posts Data

In [None]:
import requests
import csv
from bs4 import BeautifulSoup
from datetime import datetime

# Function to fetch and store data from a specific API endpoint
def fetch_and_store_data(api_url, post_number):
    url = f"{api_url}{post_number}.json"
    response = requests.get(url)

    if response.status_code == 200:
        data = response.json()
        if "post_stream" in data and "posts" in data["post_stream"]:
            posts = data["post_stream"]["posts"]

            # Remove duplicates based on post_number
            posts = [post for post in posts if post["post_number"] > post_number]

            if posts:
                # Write data to CSV file
                with open("sce_posts_data.csv", "a", newline="", encoding="utf-8") as csv_file:
                    fieldnames = ["Topic ID", "Username", "Post Created At", "Post Description", "Post Number"]
                    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

                    # Write headers if the file is empty
                    if csv_file.tell() == 0:
                        writer.writeheader()

                    # Write posts data
                    for post in posts:
                        # Use BeautifulSoup to extract text from HTML
                        soup = BeautifulSoup(post["cooked"], "html.parser")
                        cleaned_text = soup.get_text()

                        # Format date
                        formatted_date = datetime.strptime(post["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ").strftime("%Y-%m-%d %H:%M:%S")

                        writer.writerow({
                            "Topic ID": post["topic_id"],
                            "Username": post["username"],
                            "Post Created At": formatted_date,
                            "Post Description": cleaned_text,
                            "Post Number": post["post_number"]
                        })

                print(f"Data from {url} successfully fetched and stored.")
                return posts[-1]["post_number"] + 1  # Increment post_number based on the last post number
            else:
                print("No new posts found.")
                return None
        else:
            print("Error in response data.")
            return None
    else:
        print(f"Error fetching data from {url}. Status code: {response.status_code}")
        return None

def main():
    # Specify API URL
    api_url = "https://forum.arbitrum.foundation/t/"

    # Read Topic IDs from the CSV file generated by ProposalsTopicSummary.py
    with open("sce_topics_data.csv", "r", encoding="utf-8") as csv_file:
        reader = csv.DictReader(csv_file)
        for row in reader:
            # Fetch and store data for each Topic ID
            topic_id = row["Topic ID"]
            post_number = 0

            while True:
                last_post_number = fetch_and_store_data(f"{api_url}{topic_id}/", post_number)

                if last_post_number is not None:
                    post_number = last_post_number
                else:
                    break

    print("All data successfully fetched and stored in proposals_post_data.csv.")

if __name__ == "__main__":
    main()


### Arbitrum Dao Chains User Data

In [None]:
import requests
import csv

# API URL for fetching user data
user_api_url = "https://forum.arbitrum.foundation/c/proposals/5.json"
# Specify CSV file path for user data
user_csv_file_path = "adc_users_data.csv"

def fetch_users(api_url, page):
    # Make request to fetch user data for the specified page
    response = requests.get(f"{api_url}?page={page}")
    data = response.json()

    # Extract user data from the response
    users = data["users"] if "users" in data else []
    return users

# Set to store processed user IDs
processed_user_ids = set()

# Write user data to CSV file
with open(user_csv_file_path, mode="w", newline="", encoding="utf-8") as user_csv_file:
    # Create CSV writer for user data
    user_csv_writer = csv.writer(user_csv_file)

    # Write user data header
    user_header = ["User Id", "Username", "Name", "Avtar Template", "Moderator", "Trust Level"]

    user_csv_writer.writerow(user_header)

    # Initialize page number
    page_num = 0

    # Fetch and write user data until there are no more pages
    while True:
        # Fetch user data for the current page
        users = fetch_users(user_api_url, page_num)

        # Break the loop if no users are returned
        if not users:
            break

        # Write user details, avoiding duplicates
        for user_data in users:
            user_id = user_data["id"]

            # Check if the user ID has already been processed
            if user_id not in processed_user_ids:
                # Add the user ID to the set of processed IDs
                processed_user_ids.add(user_id)

                # Initialize user data list with default values
                user_row = [
                    user_id,
                    user_data["username"],
                    user_data["name"],
                    user_data["avatar_template"],
                    user_data.get("moderator", False),
                    user_data["trust_level"],
                ]

                user_csv_writer.writerow(user_row)

        # Move to the next page
        page_num += 1

print(f"User data has been written to {user_csv_file_path}")


### Arbitrum Dao Chains Topics Data

In [None]:
import requests
import csv
from datetime import datetime

# API URL
api_url = "https://forum.arbitrum.foundation/c/proposals/5.json"

# Specify CSV file path
csv_file_path = "adc_topics_data.csv"

def fetch_topics(api_url, page):
    # Make request for the specified page
    response = requests.get(f"{api_url}?page={page}")
    data = response.json()

    # Extract topics from the current page
    topics = data["topic_list"]["topics"]
    return topics

# Function to format date and time
def format_datetime(datetime_str):
    # Convert the string to a datetime object
    dt_object = datetime.strptime(datetime_str, "%Y-%m-%dT%H:%M:%S.%fZ")
    # Format the datetime object as a string in the desired format
    return dt_object.strftime("%Y-%m-%d %H:%M:%S")

# Write data to CSV file
with open(csv_file_path, mode="w", newline="", encoding="utf-8") as csv_file:
    # Create CSV writer
    csv_writer = csv.writer(csv_file)

    # Write header
    header = ["Topic ID", "Title", "Fancy Title", "Slug", "Posts Count", "Reply Count", "Highest Post Number",
              "Image URL", "Created At", "Last Posted At", "Views", "Like Count", "Pinned", "Unpinned", "Closed", "Visible",
              "Tags", "Last Poster Username", "Category ID", "posters", "Original Poster ID"]

    csv_writer.writerow(header)

    # Initialize page number
    page_num = 0

    # Fetch and write topics until there are no more topics
    while True:
        topics = fetch_topics(api_url, page_num)

        # Break the loop if no topics are returned
        if not topics:
            break

        # Write topic details
        for topic in topics:
            # Extract the user IDs of the posters, excluding the original poster
            user_ids_posters = [poster["user_id"] for poster in topic["posters"] if poster["user_id"] != topic["posters"][0]["user_id"]]

            # Initialize data list with default values
            data = [
                topic["id"],
                topic["title"],
                topic["fancy_title"],
                topic["slug"],
                topic["posts_count"],
                topic["reply_count"],
                topic["highest_post_number"],
                topic["image_url"],
                format_datetime(topic["created_at"]),  # Format Created At
                format_datetime(topic["last_posted_at"]),  # Format Last Posted At
                topic["views"],
                topic["like_count"],
                topic["pinned"],
                topic["unpinned"],
                topic["closed"],
                topic["visible"],
                topic["tags"],
                topic["last_poster_username"],
                topic["category_id"],
                topic["posters"],
                topic["posters"][0]["user_id"] if topic["posters"] else None,
            ]

            csv_writer.writerow(data)

        # Move to the next page
        page_num += 1

print(f"Data has been written to {csv_file_path}")


### Arbitrum Dao Chains Posts Data

In [None]:
import requests
import csv
from bs4 import BeautifulSoup
from datetime import datetime

# Function to fetch and store data from a specific API endpoint
def fetch_and_store_data(api_url, post_number):
    url = f"{api_url}{post_number}.json"
    response = requests.get(url)

    if response.status_code == 200:
        data = response.json()
        if "post_stream" in data and "posts" in data["post_stream"]:
            posts = data["post_stream"]["posts"]

            # Remove duplicates based on post_number
            posts = [post for post in posts if post["post_number"] > post_number]

            if posts:
                # Write data to CSV file
                with open("adc_posts_data.csv", "a", newline="", encoding="utf-8") as csv_file:
                    fieldnames = ["Topic ID", "Username", "Post Created At", "Post Description", "Post Number"]
                    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

                    # Write headers if the file is empty
                    if csv_file.tell() == 0:
                        writer.writeheader()

                    # Write posts data
                    for post in posts:
                        # Use BeautifulSoup to extract text from HTML
                        soup = BeautifulSoup(post["cooked"], "html.parser")
                        cleaned_text = soup.get_text()

                        # Format date
                        formatted_date = datetime.strptime(post["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ").strftime("%Y-%m-%d %H:%M:%S")

                        writer.writerow({
                            "Topic ID": post["topic_id"],
                            "Username": post["username"],
                            "Post Created At": formatted_date,
                            "Post Description": cleaned_text,
                            "Post Number": post["post_number"]
                        })

                print(f"Data from {url} successfully fetched and stored.")
                return posts[-1]["post_number"] + 1  # Increment post_number based on the last post number
            else:
                print("No new posts found.")
                return None
        else:
            print("Error in response data.")
            return None
    else:
        print(f"Error fetching data from {url}. Status code: {response.status_code}")
        return None

def main():
    # Specify API URL
    api_url = "https://forum.arbitrum.foundation/t/"

    # Read Topic IDs from the CSV file generated by ProposalsTopicSummary.py
    with open("adc_topics_data.csv", "r", encoding="utf-8") as csv_file:
        reader = csv.DictReader(csv_file)
        for row in reader:
            # Fetch and store data for each Topic ID
            topic_id = row["Topic ID"]
            post_number = 0

            while True:
                last_post_number = fetch_and_store_data(f"{api_url}{topic_id}/", post_number)

                if last_post_number is not None:
                    post_number = last_post_number
                else:
                    break

    print("All data successfully fetched and stored in proposals_post_data.csv.")

if __name__ == "__main__":
    main()


### Arbitrum GovHack Submissions Users Data

In [None]:
import requests
import csv

# API URL for fetching user data
user_api_url = "https://forum.arbitrum.foundation/c/proposals/26.json"
# Specify CSV file path for user data
user_csv_file_path = "aghs_users_data.csv"

def fetch_users(api_url, page):
    # Make request to fetch user data for the specified page
    response = requests.get(f"{api_url}?page={page}")
    data = response.json()

    # Extract user data from the response
    users = data["users"] if "users" in data else []
    return users

# Set to store processed user IDs
processed_user_ids = set()

# Write user data to CSV file
with open(user_csv_file_path, mode="w", newline="", encoding="utf-8") as user_csv_file:
    # Create CSV writer for user data
    user_csv_writer = csv.writer(user_csv_file)

    # Write user data header
    user_header = ["User Id", "Username", "Name", "Avtar Template", "Moderator", "Trust Level"]

    user_csv_writer.writerow(user_header)

    # Initialize page number
    page_num = 0

    # Fetch and write user data until there are no more pages
    while True:
        # Fetch user data for the current page
        users = fetch_users(user_api_url, page_num)

        # Break the loop if no users are returned
        if not users:
            break

        # Write user details, avoiding duplicates
        for user_data in users:
            user_id = user_data["id"]

            # Check if the user ID has already been processed
            if user_id not in processed_user_ids:
                # Add the user ID to the set of processed IDs
                processed_user_ids.add(user_id)

                # Initialize user data list with default values
                user_row = [
                    user_id,
                    user_data["username"],
                    user_data["name"],
                    user_data["avatar_template"],
                    user_data.get("moderator", False),
                    user_data["trust_level"],
                ]

                user_csv_writer.writerow(user_row)

        # Move to the next page
        page_num += 1

print(f"User data has been written to {user_csv_file_path}")


### Arbitrum GovHack Submissions Topics Data

In [None]:
import requests
import csv
from datetime import datetime

# API URL
api_url = "https://forum.arbitrum.foundation/c/proposals/26.json"

# Specify CSV file path
csv_file_path = "aghs_topics_data.csv"

def fetch_topics(api_url, page):
    # Make request for the specified page
    response = requests.get(f"{api_url}?page={page}")
    data = response.json()

    # Extract topics from the current page
    topics = data["topic_list"]["topics"]
    return topics

# Function to format date and time
def format_datetime(datetime_str):
    if datetime_str:
        # Convert the string to a datetime object
        dt_object = datetime.strptime(datetime_str, "%Y-%m-%dT%H:%M:%S.%fZ")
        # Format the datetime object as a string in the desired format
        return dt_object.strftime("%Y-%m-%d %H:%M:%S")
    else:
        return None
        
# Write data to CSV file
with open(csv_file_path, mode="w", newline="", encoding="utf-8") as csv_file:
    # Create CSV writer
    csv_writer = csv.writer(csv_file)

    # Write header
    header = ["Topic ID", "Title", "Fancy Title", "Slug", "Posts Count", "Reply Count", "Highest Post Number",
              "Image URL", "Created At", "Last Posted At", "Views", "Like Count", "Pinned", "Unpinned", "Closed", "Visible",
              "Tags", "Last Poster Username", "Category ID", "posters", "Original Poster ID"]

    csv_writer.writerow(header)

    # Initialize page number
    page_num = 0

    # Fetch and write topics until there are no more topics
    while True:
        topics = fetch_topics(api_url, page_num)

        # Break the loop if no topics are returned
        if not topics:
            break

        # Write topic details
        for topic in topics:
            # Extract the user IDs of the posters, excluding the original poster
            user_ids_posters = [poster["user_id"] for poster in topic["posters"] if poster["user_id"] != topic["posters"][0]["user_id"]]

            # Initialize data list with default values
            data = [
                topic["id"],
                topic["title"],
                topic["fancy_title"],
                topic["slug"],
                topic["posts_count"],
                topic["reply_count"],
                topic["highest_post_number"],
                topic["image_url"],
                format_datetime(topic["created_at"]),  # Format Created At
                format_datetime(topic["last_posted_at"]),  # Format Last Posted At
                topic["views"],
                topic["like_count"],
                topic["pinned"],
                topic["unpinned"],
                topic["closed"],
                topic["visible"],
                topic["tags"],
                topic["last_poster_username"],
                topic["category_id"],
                topic["posters"],
                topic["posters"][0]["user_id"] if topic["posters"] else None,
            ]

            csv_writer.writerow(data)

        # Move to the next page
        page_num += 1

print(f"Data has been written to {csv_file_path}")


### Arbitrum GovHack Submissions Posts Data

In [None]:
import requests
import csv
from bs4 import BeautifulSoup
from datetime import datetime

# Function to fetch and store data from a specific API endpoint
def fetch_and_store_data(api_url, post_number):
    url = f"{api_url}{post_number}.json"
    response = requests.get(url)

    if response.status_code == 200:
        data = response.json()
        if "post_stream" in data and "posts" in data["post_stream"]:
            posts = data["post_stream"]["posts"]

            # Remove duplicates based on post_number
            posts = [post for post in posts if post["post_number"] > post_number]

            if posts:
                # Write data to CSV file
                with open("aghs_posts_data.csv", "a", newline="", encoding="utf-8") as csv_file:
                    fieldnames = ["Topic ID", "Username", "Post Created At", "Post Description", "Post Number"]
                    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

                    # Write headers if the file is empty
                    if csv_file.tell() == 0:
                        writer.writeheader()

                    # Write posts data
                    for post in posts:
                        # Use BeautifulSoup to extract text from HTML
                        soup = BeautifulSoup(post["cooked"], "html.parser")
                        cleaned_text = soup.get_text()

                        # Format date
                        formatted_date = datetime.strptime(post["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ").strftime("%Y-%m-%d %H:%M:%S")

                        writer.writerow({
                            "Topic ID": post["topic_id"],
                            "Username": post["username"],
                            "Post Created At": formatted_date,
                            "Post Description": cleaned_text,
                            "Post Number": post["post_number"]
                        })

                print(f"Data from {url} successfully fetched and stored.")
                return posts[-1]["post_number"] + 1  # Increment post_number based on the last post number
            else:
                print("No new posts found.")
                return None
        else:
            print("Error in response data.")
            return None
    else:
        print(f"Error fetching data from {url}. Status code: {response.status_code}")
        return None

def main():
    # Specify API URL
    api_url = "https://forum.arbitrum.foundation/t/"

    # Read Topic IDs from the CSV file generated by ProposalsTopicSummary.py
    with open("aghs_topics_data.csv", "r", encoding="utf-8") as csv_file:
        reader = csv.DictReader(csv_file)
        for row in reader:
            # Fetch and store data for each Topic ID
            topic_id = row["Topic ID"]
            post_number = 0

            while True:
                last_post_number = fetch_and_store_data(f"{api_url}{topic_id}/", post_number)

                if last_post_number is not None:
                    post_number = last_post_number
                else:
                    break

    print("All data successfully fetched and stored in proposals_post_data.csv.")

if __name__ == "__main__":
    main()


### Procurement Committee Users Data

In [None]:
import requests
import csv

# API URL for fetching user data
user_api_url = "https://forum.arbitrum.foundation/c/proposals/26.json"
# Specify CSV file path for user data
user_csv_file_path = "pc_users_data.csv"

def fetch_users(api_url, page):
    # Make request to fetch user data for the specified page
    response = requests.get(f"{api_url}?page={page}")
    data = response.json()

    # Extract user data from the response
    users = data["users"] if "users" in data else []
    return users

# Set to store processed user IDs
processed_user_ids = set()

# Write user data to CSV file
with open(user_csv_file_path, mode="w", newline="", encoding="utf-8") as user_csv_file:
    # Create CSV writer for user data
    user_csv_writer = csv.writer(user_csv_file)

    # Write user data header
    user_header = ["User Id", "Username", "Name", "Avtar Template", "Moderator", "Trust Level"]

    user_csv_writer.writerow(user_header)

    # Initialize page number
    page_num = 0

    # Fetch and write user data until there are no more pages
    while True:
        # Fetch user data for the current page
        users = fetch_users(user_api_url, page_num)

        # Break the loop if no users are returned
        if not users:
            break

        # Write user details, avoiding duplicates
        for user_data in users:
            user_id = user_data["id"]

            # Check if the user ID has already been processed
            if user_id not in processed_user_ids:
                # Add the user ID to the set of processed IDs
                processed_user_ids.add(user_id)

                # Initialize user data list with default values
                user_row = [
                    user_id,
                    user_data["username"],
                    user_data["name"],
                    user_data["avatar_template"],
                    user_data.get("moderator", False),
                    user_data["trust_level"],
                ]

                user_csv_writer.writerow(user_row)

        # Move to the next page
        page_num += 1

print(f"User data has been written to {user_csv_file_path}")


### Procurement Committee Topics Data

In [None]:
import requests
import csv
from datetime import datetime

# API URL
api_url = "https://forum.arbitrum.foundation/c/proposals/27.json"

# Specify CSV file path
csv_file_path = "pc_topics_data.csv"

def fetch_topics(api_url, page):
    # Make request for the specified page
    response = requests.get(f"{api_url}?page={page}")
    data = response.json()

    # Extract topics from the current page
    topics = data["topic_list"]["topics"]
    return topics

# Function to format date and time
def format_datetime(datetime_str):
    if datetime_str:
        # Convert the string to a datetime object
        dt_object = datetime.strptime(datetime_str, "%Y-%m-%dT%H:%M:%S.%fZ")
        # Format the datetime object as a string in the desired format
        return dt_object.strftime("%Y-%m-%d %H:%M:%S")
    else:
        return None
        
# Write data to CSV file
with open(csv_file_path, mode="w", newline="", encoding="utf-8") as csv_file:
    # Create CSV writer
    csv_writer = csv.writer(csv_file)

    # Write header
    header = ["Topic ID", "Title", "Fancy Title", "Slug", "Posts Count", "Reply Count", "Highest Post Number",
              "Image URL", "Created At", "Last Posted At", "Views", "Like Count", "Pinned", "Unpinned", "Closed", "Visible",
              "Tags", "Last Poster Username", "Category ID", "posters", "Original Poster ID"]

    csv_writer.writerow(header)

    # Initialize page number
    page_num = 0

    # Fetch and write topics until there are no more topics
    while True:
        topics = fetch_topics(api_url, page_num)

        # Break the loop if no topics are returned
        if not topics:
            break

        # Write topic details
        for topic in topics:
            # Extract the user IDs of the posters, excluding the original poster
            user_ids_posters = [poster["user_id"] for poster in topic["posters"] if poster["user_id"] != topic["posters"][0]["user_id"]]

            # Initialize data list with default values
            data = [
                topic["id"],
                topic["title"],
                topic["fancy_title"],
                topic["slug"],
                topic["posts_count"],
                topic["reply_count"],
                topic["highest_post_number"],
                topic["image_url"],
                format_datetime(topic["created_at"]),  # Format Created At
                format_datetime(topic["last_posted_at"]),  # Format Last Posted At
                topic["views"],
                topic["like_count"],
                topic["pinned"],
                topic["unpinned"],
                topic["closed"],
                topic["visible"],
                topic["tags"],
                topic["last_poster_username"],
                topic["category_id"],
                topic["posters"],
                topic["posters"][0]["user_id"] if topic["posters"] else None,
            ]

            csv_writer.writerow(data)

        # Move to the next page
        page_num += 1

print(f"Data has been written to {csv_file_path}")


### Procurement Committee Posts Data

In [None]:
import requests
import csv
from bs4 import BeautifulSoup
from datetime import datetime

# Function to fetch and store data from a specific API endpoint
def fetch_and_store_data(api_url, post_number):
    url = f"{api_url}{post_number}.json"
    response = requests.get(url)

    if response.status_code == 200:
        data = response.json()
        if "post_stream" in data and "posts" in data["post_stream"]:
            posts = data["post_stream"]["posts"]

            # Remove duplicates based on post_number
            posts = [post for post in posts if post["post_number"] > post_number]

            if posts:
                # Write data to CSV file
                with open("pc_posts_data.csv", "a", newline="", encoding="utf-8") as csv_file:
                    fieldnames = ["Topic ID", "Username", "Post Created At", "Post Description", "Post Number"]
                    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

                    # Write headers if the file is empty
                    if csv_file.tell() == 0:
                        writer.writeheader()

                    # Write posts data
                    for post in posts:
                        # Use BeautifulSoup to extract text from HTML
                        soup = BeautifulSoup(post["cooked"], "html.parser")
                        cleaned_text = soup.get_text()

                        # Format date
                        formatted_date = datetime.strptime(post["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ").strftime("%Y-%m-%d %H:%M:%S")

                        writer.writerow({
                            "Topic ID": post["topic_id"],
                            "Username": post["username"],
                            "Post Created At": formatted_date,
                            "Post Description": cleaned_text,
                            "Post Number": post["post_number"]
                        })

                print(f"Data from {url} successfully fetched and stored.")
                return posts[-1]["post_number"] + 1  # Increment post_number based on the last post number
            else:
                print("No new posts found.")
                return None
        else:
            print("Error in response data.")
            return None
    else:
        print(f"Error fetching data from {url}. Status code: {response.status_code}")
        return None

def main():
    # Specify API URL
    api_url = "https://forum.arbitrum.foundation/t/"

    # Read Topic IDs from the CSV file generated by ProposalsTopicSummary.py
    with open("pc_topics_data.csv", "r", encoding="utf-8") as csv_file:
        reader = csv.DictReader(csv_file)
        for row in reader:
            # Fetch and store data for each Topic ID
            topic_id = row["Topic ID"]
            post_number = 0

            while True:
                last_post_number = fetch_and_store_data(f"{api_url}{topic_id}/", post_number)

                if last_post_number is not None:
                    post_number = last_post_number
                else:
                    break

    print("All data successfully fetched and stored in proposals_post_data.csv.")

if __name__ == "__main__":
    main()


### General Users Data

In [None]:
import requests
import csv

# API URL for fetching user data
user_api_url = "https://forum.arbitrum.foundation/c/proposals/4.json"
# Specify CSV file path for user data
user_csv_file_path = "general_users_data.csv"

def fetch_users(api_url, page):
    # Make request to fetch user data for the specified page
    response = requests.get(f"{api_url}?page={page}")
    data = response.json()

    # Extract user data from the response
    users = data["users"] if "users" in data else []
    return users

# Set to store processed user IDs
processed_user_ids = set()

# Write user data to CSV file
with open(user_csv_file_path, mode="w", newline="", encoding="utf-8") as user_csv_file:
    # Create CSV writer for user data
    user_csv_writer = csv.writer(user_csv_file)

    # Write user data header
    user_header = ["User Id", "Username", "Name", "Avtar Template", "Moderator", "Trust Level"]

    user_csv_writer.writerow(user_header)

    # Initialize page number
    page_num = 0

    # Fetch and write user data until there are no more pages
    while True:
        # Fetch user data for the current page
        users = fetch_users(user_api_url, page_num)

        # Break the loop if no users are returned
        if not users:
            break

        # Write user details, avoiding duplicates
        for user_data in users:
            user_id = user_data["id"]

            # Check if the user ID has already been processed
            if user_id not in processed_user_ids:
                # Add the user ID to the set of processed IDs
                processed_user_ids.add(user_id)

                # Initialize user data list with default values
                user_row = [
                    user_id,
                    user_data["username"],
                    user_data["name"],
                    user_data["avatar_template"],
                    user_data.get("moderator", False),
                    user_data["trust_level"],
                ]

                user_csv_writer.writerow(user_row)

        # Move to the next page
        page_num += 1

print(f"User data has been written to {user_csv_file_path}")


### General Topics Data

In [None]:
import requests
import csv
from datetime import datetime

# API URL
api_url = "https://forum.arbitrum.foundation/c/proposals/4.json"

# Specify CSV file path
csv_file_path = "general_topics_data.csv"

def fetch_topics(api_url, page):
    # Make request for the specified page
    response = requests.get(f"{api_url}?page={page}")
    data = response.json()

    # Extract topics from the current page
    topics = data["topic_list"]["topics"]
    return topics

# Function to format date and time
def format_datetime(datetime_str):
    if datetime_str:
        # Convert the string to a datetime object
        dt_object = datetime.strptime(datetime_str, "%Y-%m-%dT%H:%M:%S.%fZ")
        # Format the datetime object as a string in the desired format
        return dt_object.strftime("%Y-%m-%d %H:%M:%S")
    else:
        return None
        
# Write data to CSV file
with open(csv_file_path, mode="w", newline="", encoding="utf-8") as csv_file:
    # Create CSV writer
    csv_writer = csv.writer(csv_file)

    # Write header
    header = ["Topic ID", "Title", "Fancy Title", "Slug", "Posts Count", "Reply Count", "Highest Post Number",
              "Image URL", "Created At", "Last Posted At", "Views", "Like Count", "Pinned", "Unpinned", "Closed", "Visible",
              "Tags", "Last Poster Username", "Category ID", "posters", "Original Poster ID"]

    csv_writer.writerow(header)

    # Initialize page number
    page_num = 0

    # Fetch and write topics until there are no more topics
    while True:
        topics = fetch_topics(api_url, page_num)

        # Break the loop if no topics are returned
        if not topics:
            break

        # Write topic details
        for topic in topics:
            # Extract the user IDs of the posters, excluding the original poster
            user_ids_posters = [poster["user_id"] for poster in topic["posters"] if poster["user_id"] != topic["posters"][0]["user_id"]]

            # Initialize data list with default values
            data = [
                topic["id"],
                topic["title"],
                topic["fancy_title"],
                topic["slug"],
                topic["posts_count"],
                topic["reply_count"],
                topic["highest_post_number"],
                topic["image_url"],
                format_datetime(topic["created_at"]),  # Format Created At
                format_datetime(topic["last_posted_at"]),  # Format Last Posted At
                topic["views"],
                topic["like_count"],
                topic["pinned"],
                topic["unpinned"],
                topic["closed"],
                topic["visible"],
                topic["tags"],
                topic["last_poster_username"],
                topic["category_id"],
                topic["posters"],
                topic["posters"][0]["user_id"] if topic["posters"] else None,
            ]

            csv_writer.writerow(data)

        # Move to the next page
        page_num += 1

print(f"Data has been written to {csv_file_path}")


### General Posts Data

In [None]:
import requests
import csv
from bs4 import BeautifulSoup
from datetime import datetime

# Function to fetch and store data from a specific API endpoint
def fetch_and_store_data(api_url, post_number):
    url = f"{api_url}{post_number}.json"
    response = requests.get(url)

    if response.status_code == 200:
        data = response.json()
        if "post_stream" in data and "posts" in data["post_stream"]:
            posts = data["post_stream"]["posts"]

            # Remove duplicates based on post_number
            posts = [post for post in posts if post["post_number"] > post_number]

            if posts:
                # Write data to CSV file
                with open("general_posts_data.csv", "a", newline="", encoding="utf-8") as csv_file:
                    fieldnames = ["Topic ID", "Username", "Post Created At", "Post Description", "Post Number"]
                    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

                    # Write headers if the file is empty
                    if csv_file.tell() == 0:
                        writer.writeheader()

                    # Write posts data
                    for post in posts:
                        # Use BeautifulSoup to extract text from HTML
                        soup = BeautifulSoup(post["cooked"], "html.parser")
                        cleaned_text = soup.get_text()

                        # Format date
                        formatted_date = datetime.strptime(post["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ").strftime("%Y-%m-%d %H:%M:%S")

                        writer.writerow({
                            "Topic ID": post["topic_id"],
                            "Username": post["username"],
                            "Post Created At": formatted_date,
                            "Post Description": cleaned_text,
                            "Post Number": post["post_number"]
                        })

                print(f"Data from {url} successfully fetched and stored.")
                return posts[-1]["post_number"] + 1  # Increment post_number based on the last post number
            else:
                print("No new posts found.")
                return None
        else:
            print("Error in response data.")
            return None
    else:
        print(f"Error fetching data from {url}. Status code: {response.status_code}")
        return None

def main():
    # Specify API URL
    api_url = "https://forum.arbitrum.foundation/t/"

    # Read Topic IDs from the CSV file generated by ProposalsTopicSummary.py
    with open("general_topics_data.csv", "r", encoding="utf-8") as csv_file:
        reader = csv.DictReader(csv_file)
        for row in reader:
            # Fetch and store data for each Topic ID
            topic_id = row["Topic ID"]
            post_number = 0

            while True:
                last_post_number = fetch_and_store_data(f"{api_url}{topic_id}/", post_number)

                if last_post_number is not None:
                    post_number = last_post_number
                else:
                    break

    print("All data successfully fetched and stored in proposals_post_data.csv.")

if __name__ == "__main__":
    main()


### Forum Users Data

In [5]:
import pandas as pd
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Get the directory path from the environment variable
directory = os.getenv('CSV_DIRECTORY')

# Function to combine CSV files in a directory
def combine_csv_files(directory, output_filename):
    user_files = [file for file in os.listdir(directory) if file.endswith('_users_data.csv')]
    combined_df = pd.concat((pd.read_csv(os.path.join(directory, file)) for file in user_files))
    combined_df.drop_duplicates(inplace=True)  # Remove duplicate rows
    combined_df.to_csv(output_filename, index=False)

# Combine users CSV files
combine_csv_files(directory, 'forum_users_data.csv')

### Forum Topics Data

In [6]:
import pandas as pd
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Get the directory path from the environment variable
directory = os.getenv('CSV_DIRECTORY')

# Function to combine CSV files in a directory
def combine_csv_files(directory, output_filename):
    user_files = [file for file in os.listdir(directory) if file.endswith('_topics_data.csv')]
    combined_df = pd.concat((pd.read_csv(os.path.join(directory, file)) for file in user_files))
    combined_df.to_csv(output_filename, index=False)

# Combine topics CSV files
combine_csv_files(directory, 'forum_topics_data.csv')

### Forum Posts Data

In [None]:
import pandas as pd
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Get the directory path from the environment variable
directory = os.getenv('CSV_DIRECTORY')

# Function to combine CSV files in a directory
def combine_csv_files(directory, output_filename):
    user_files = [file for file in os.listdir(directory) if file.endswith('_posts_data.csv')]
    combined_df = pd.concat((pd.read_csv(os.path.join(directory, file)) for file in user_files))
    combined_df.to_csv(output_filename, index=False)

# Combine posts CSV files
combine_csv_files(directory, 'forum_posts_data.csv')