In [46]:
import requests
import csv
import os
import pandas as pd
import numpy as np
from datetime import datetime, UTC
import pytz

### Fetch proposals

In [47]:
# Write Your GraphQL Query
graphql_query = """
query {
  proposals (
    first: 100,
    skip: 0,
    where: {
      space_in: ["retrofunding.eth", "citizenshouse.eth"]
    },
    orderBy: "created",
    orderDirection: desc
  ) {
    id
    title
    body
    type
    choices
    created
    start 
    end
    state
    scores
    scores_total
    author
    discussion
    space {
      id
    }
  }
}
"""

# Set GraphQL Endpoint
graphql_endpoint = 'https://hub.snapshot.org/graphql'

# Make a Request to the GraphQL Endpoint
response = requests.post(graphql_endpoint, json={'query': graphql_query})

# Check the Response
if response.status_code == 200:
    # Parse the JSON Response
    data = response.json()
    proposals = data['data']['proposals']

    # Ensure the directory exists
    os.makedirs('../Data', exist_ok=True)

    # Store Data in CSV
    with open('../Data/Proposals_Data.csv', 'w', newline='', encoding='utf-8') as csv_file:
        csv_writer = csv.writer(csv_file)
        header = [
            "proposal_id", 
            "proposal_title", 
            "proposal_description", 
            "proposal_type", 
            "proposal_creation_date", 
            "proposal_start_date", 
            "proposal_end_date", 
            "proposal_choices",
            "proposal_scores",
            "proposal_total_votes",
            "proposal_author",
            "proposal_discussion",
            "space"
        ]
        csv_writer.writerow(header)

        for proposal in proposals:
            proposal_data = [
                proposal['id'],
                proposal['title'],
                proposal['body'],
                proposal['type'],
                proposal['created'],
                proposal['start'],
                proposal['end'],
                proposal['choices'],
                proposal['scores'],
                proposal['scores_total'],
                proposal['author'],
                proposal['discussion'],
                proposal['space']['id']
            ]
            csv_writer.writerow(proposal_data)

    print("Data has been successfully saved to Proposals_Data.csv")
else:
    print(f"Error: {response.status_code} - {response.text}")


Data has been successfully saved to Proposals_Data.csv


In [48]:
proposals = pd.read_csv("../Data/Proposals_Data.csv")

In [49]:
proposals.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29 entries, 0 to 28
Data columns (total 13 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   proposal_id             29 non-null     object
 1   proposal_title          29 non-null     object
 2   proposal_description    29 non-null     object
 3   proposal_type           29 non-null     object
 4   proposal_creation_date  29 non-null     int64 
 5   proposal_start_date     29 non-null     int64 
 6   proposal_end_date       29 non-null     int64 
 7   proposal_choices        29 non-null     object
 8   proposal_scores         29 non-null     object
 9   proposal_total_votes    29 non-null     int64 
 10  proposal_author         29 non-null     object
 11  proposal_discussion     27 non-null     object
 12  space                   29 non-null     object
dtypes: int64(4), object(9)
memory usage: 3.1+ KB


### Data Formatting

In [50]:
# Define IST timezone
ist = pytz.timezone('Asia/Kolkata')

# Convert the timestamp columns to UTC, then convert to IST
for col in ['proposal_creation_date', 'proposal_start_date', 'proposal_end_date']:
    proposals[col] = pd.to_datetime(proposals[col], unit='s', utc=True).dt.tz_convert(ist)

# Format datetime columns as string with timezone offset
for col in ['proposal_creation_date', 'proposal_start_date', 'proposal_end_date']:
    proposals[col] = proposals[col].dt.strftime('%Y-%m-%d %H:%M:%S%z')


In [51]:
# Add source column
proposals['source'] = proposals['space'].map({
    'citizenshouse.eth': 'citizen_house_proposal',
    'retrofunding.eth': 'retro_funding_proposal'
}).fillna('other')

# Add document_role column
proposals['document_role'] = 'proposal_record'

In [52]:
proposals.columns

Index(['proposal_id', 'proposal_title', 'proposal_description',
       'proposal_type', 'proposal_creation_date', 'proposal_start_date',
       'proposal_end_date', 'proposal_choices', 'proposal_scores',
       'proposal_total_votes', 'proposal_author', 'proposal_discussion',
       'space', 'source', 'document_role'],
      dtype='object')

#### Add proposal category

In [53]:
# Dictionary mapping categories to proposal titles (as given)
category_map = {
    "Eval Algos": [
        "Retro Funding S7 - Dev Tooling - Eval Algos",
        "Retro Funding S7 - Onchain Builders - Eval Algos"
    ],
    "Intent Ratification": [
        "Season 7: Intent Ratification",
        "Ratification of Round 4 Profit Definition"
    ],
    "Mission Approval": [
        "Retro Funding: Dev Tooling Mission",
        "Retro Funding: Onchain Builders Mission"
    ],
    "Upgrade Veto Proposals": [
        "Voting Cycle #17: Veto #1",
        "Voting Cycle #18: Veto #2",
        "Protocol Upgrade #5: Veto #3",
        "Protocol Upgrade #6: Veto #4",
        "Protocol Upgrade #7: Veto #6",
        "Protocol Upgrade #8: Veto #7",
        "Protocol Upgrade #9: Veto #9",
        "Protocol Upgrade #10: Veto #10",
        "Governor Upgrade #1: Improve advanced delegation voting",
        "Governor Update Proposal #2: Improvements to advanced delegation allowance calculations: Veto #8",
        "Governor Update Proposal #3: Enable onchain treasury execution"
    ],
    "Upgrade Proposals": [
        "Upgrade Proposal #11: Holocene Network Upgrade",
        "Protocol Upgrade: Superchain Registry 2.0",
        "Upgrade Proposal #13: OPCM and Incident Response improvements",
        "Upgrade Proposal #14: Isthmus L1 Contracts + MT-Cannon",
        "Upgrade Proposal #15: Isthmus Hard Fork"
    ],
    "Budget Board Ratification": [
        "Season 8 and 9: Budget Board Member Ratification"
    ],
    "Sequencer Revenue": [
        "Allow the Optimism Foundation to Stake a Portion of Sequencer ETH Through Season 8"
    ],
    "Maintenance Upgrade": [
        "Maintenance Upgrade: L1 Pectra Readiness",
        "Maintenance Upgrade: Absolute Prestate Updates for Isthmus Activation & Blob Preimage Fix"
    ],
    "Test Proposals": [
        "Test Vote (Season 6)",
        "Test Veto Vote"
    ],
    "Retro Funding Round 6": [
        "Retro Funding Round 6 - weighing the votes of Guest Voters"
    ]
}

# Clean your proposal titles for safer matching
proposals["proposal_title"] = proposals["proposal_title"].str.strip().str.replace("–", "-", regex=False)

# Function to assign category by partial (fuzzy) match
def get_category_fuzzy(title):
    title_lower = title.lower()
    for category, titles in category_map.items():
        for ref_title in titles:
            if ref_title.lower() in title_lower:
                return category
    return "Uncategorized"

# Apply to DataFrame
proposals["proposal_category"] = proposals["proposal_title"].apply(get_category_fuzzy)


#### Add the Retro Round number in which the proposal was created

In [54]:
# Convert to datetime (auto-detects timezone from string)
proposals['creation_datetime'] = pd.to_datetime(proposals['proposal_creation_date'])

# Define timezone-aware datetime ranges (India Standard Time)
ist = pytz.timezone('Asia/Kolkata')

rounds = {
    4: {'start': ist.localize(datetime(2024, 1, 12)), 'end': ist.localize(datetime(2024, 7, 16))},
    5: {'start': ist.localize(datetime(2024, 7, 17)), 'end': ist.localize(datetime(2024, 10, 21))},
    6: {'start': ist.localize(datetime(2024, 10, 22)), 'end': ist.localize(datetime(2025, 7, 31))},
}

# Function to assign round_number
def get_round_number(dt):
    for round_num, dates in rounds.items():
        if dates['start'] <= dt <= dates['end']:
            return round_num
    return None  

# Step 4: Apply
proposals['round_number'] = proposals['creation_datetime'].apply(get_round_number)

# Optional: drop intermediate datetime column
proposals.drop(columns=['creation_datetime'], inplace=True)

In [55]:
proposals.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29 entries, 0 to 28
Data columns (total 17 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   proposal_id             29 non-null     object
 1   proposal_title          29 non-null     object
 2   proposal_description    29 non-null     object
 3   proposal_type           29 non-null     object
 4   proposal_creation_date  29 non-null     object
 5   proposal_start_date     29 non-null     object
 6   proposal_end_date       29 non-null     object
 7   proposal_choices        29 non-null     object
 8   proposal_scores         29 non-null     object
 9   proposal_total_votes    29 non-null     int64 
 10  proposal_author         29 non-null     object
 11  proposal_discussion     27 non-null     object
 12  space                   29 non-null     object
 13  source                  29 non-null     object
 14  document_role           29 non-null     object
 15  proposal

In [56]:
desired_order = [
    "source",
    "document_role",
    "proposal_id",
    "proposal_title",
    "proposal_description",
    "proposal_type",
    "proposal_category",
    "proposal_creation_date",
    "proposal_start_date",
    "proposal_end_date",
    "proposal_choices",
    "proposal_scores",
    "proposal_total_votes",
    "round_number",
    "proposal_author",
    "proposal_discussion"
]

# Reorder the columns
proposals = proposals[desired_order]

In [57]:
# Clean text columns
text_columns = ['proposal_title', 'proposal_description', 'proposal_discussion']

for col in text_columns:
    if col in proposals.columns:
        # Convert to string, clean \n and \r, remove typographic apostrophes (’), and strip whitespace
        proposals[col] = (
            proposals[col]
            .astype(str)
            .str.replace('\n', ' ', regex=False)
            .str.replace('\r', ' ', regex=False)
            .str.replace('’', "'", regex=False)  
            .str.strip()
        )

# Convert to JSON string
json_str = proposals.to_json(orient="records", indent=2, force_ascii=False)

# Remove escaped forward slashes for clean URLs
json_str = json_str.replace('\\/', '/')

# Save to file
output_path = "../Data/Proposals_Data.json"
with open(output_path, "w", encoding="utf-8") as f:
    f.write(json_str)

print(f"Clean JSON saved to: {output_path}")


Clean JSON saved to: ../Data/Proposals_Data.json


### Create the proposal categories file

In [58]:
categories = proposals[['proposal_title', 'proposal_category']]

In [59]:
categories

Unnamed: 0,proposal_title,proposal_category
0,Maintenance Upgrade: Absolute Prestate Updates...,Maintenance Upgrade
1,Season 8 and 9: Budget Board Member Ratification,Budget Board Ratification
2,Upgrade Proposal #15: Isthmus Hard Fork,Upgrade Proposals
3,Upgrade Proposal #14: Isthmus L1 Contracts + M...,Upgrade Proposals
4,Upgrade Proposal #13: OPCM and Incident Respon...,Upgrade Proposals
5,Allow the Optimism Foundation to Stake a Porti...,Sequencer Revenue
6,Maintenance Upgrade: L1 Pectra Readiness,Maintenance Upgrade
7,Retro Funding S7 - Dev Tooling - Eval Algos,Eval Algos
8,Retro Funding S7 - Onchain Builders - Eval Algos,Eval Algos
9,Protocol Upgrade: Superchain Registry 2.0,Upgrade Proposals


In [60]:
categories.to_json("../Data/Proposal_Categories.json", orient="records", indent=2, force_ascii=False)