## Fetch EAS attestations from HyperSync

### Setup

In [1]:
import hypersync
from hypersync import (
    LogSelection,
    LogField,
    FieldSelection,
    HexOutput
)
import os, glob
import pandas as pd
import json

In [2]:
def get_envio_key():
    return os.getenv("HYPERSYNC_API_KEY")

In [3]:
def get_data_root_dir():
    return "../../data/eas_attestations"

In [4]:
# Define a dictionary of key blockchain name and value a dictionary of contract address, eascan url and hypersync url
blockchain_networks = {
    "arbitrum": {
        "contract_address": "0xbD75f629A22Dc1ceD33dDA0b68c546A1c035c458",
        "easscan_url": "https://arbitrum.easscan.org",
        "hypersync_url": "https://arbitrum.hypersync.xyz"
    },
    "base": {
        "contract_address": "0x4200000000000000000000000000000000000021",
        "easscan_url": "https://base.easscan.org",
        "hypersync_url": "https://base.hypersync.xyz"
    },
    "optimism": {
        "contract_address": "0x4200000000000000000000000000000000000021",
        "easscan_url": "https://optimism.easscan.org",
        "hypersync_url": "https://optimism.hypersync.xyz"
    },
    "polygon": {
        "contract_address": "0x5E634ef5355f45A855d02D66eCD687b1502AF790",
        "easscan_url": "https://polygon.easscan.org",
        "hypersync_url": "https://polygon.hypersync.xyz"
    },
    "mainnet": {
        "contract_address": "0xA1207F3BBa224E2c9c3c6D5aF63D0eb1582Ce587",
        "easscan_url": "https://easscan.org",
        "hypersync_url": "https://eth.hypersync.xyz"
    },
    "sepolia": {
        "contract_address": "0xC2679fBD37d54388Ce493F1DB75320D236e1815e",
        "easscan_url": "https://sepolia.easscan.org",
        "hypersync_url": "https://sepolia.hypersync.xyz"
    }
}

In [5]:
def get_contract_address(blockchain_name):
    return blockchain_networks[blockchain_name]["contract_address"]

In [6]:
def get_eas_graph_ql_url(blockchain_name):
    return blockchain_networks[blockchain_name]["easscan_url"] + "/graphql"

In [7]:
def get_hypersync_url(blockchain_name):
    return blockchain_networks[blockchain_name]["hypersync_url"]


### Collect events with Hypersync

In [8]:
from hypersync import JoinMode


def read_attestation_events(root_dir):
    # Read the attestations from the parquet file
    log_parts = glob.glob(f"{root_dir}/**/logs.parquet", recursive=True)
    df = pd.concat([pd.read_parquet(p) for p in log_parts], ignore_index=True)

    print(f"Found {len(df)} total attestations")

    return df

async def collect_attestation_events(start_block, end_block,blockchain_name="arbitrum"):
    hypersync_api_key = get_envio_key()
    # The event signature string
    event_signature = "Attested(address indexed recipient, address indexed attester, bytes32 uid, bytes32 indexed schema)"

    # Initialize client
    client = hypersync.HypersyncClient(
        hypersync.ClientConfig(
            url=get_hypersync_url(blockchain_name),  # Arbitrum network for EAS
            bearer_token=hypersync_api_key,
        )
    )

    # Define field selection
    field_selection = FieldSelection(
        log=[
            LogField.ADDRESS,
            LogField.TOPIC0,
            LogField.TOPIC1,
            LogField.TOPIC2,
            LogField.TOPIC3,
            LogField.DATA,
            LogField.BLOCK_NUMBER
        ]
    )

    # Define query for EAS Attested events
    query = hypersync.Query(
        from_block=0,  # Start from genesis
        to_block=100000000,  # Large number to get recent blocks
        field_selection=field_selection,
        join_mode= JoinMode.JOIN_ALL,
        logs=[
            LogSelection(
                address=[get_contract_address(blockchain_name)],  # EAS contract on Arbitrum
                topics=[
                    ["0x8bf46bf4cfd674fa735a3d63ec1c9ad4153f033c290341f3a588b75685141b35"]  # Attested event signature
                ]
            )
        ]
    )

    # Configure output
    config = hypersync.StreamConfig(
        hex_output=HexOutput.PREFIXED,
        event_signature=event_signature
    )

    root_dir = f"{get_data_root_dir()}/{blockchain_name}/logs_dataset"
    os.makedirs(root_dir, exist_ok=True)

    # Collect data to a Parquet file
    current_block = start_block
    while current_block < end_block:
        query.from_block = current_block
        query.to_block = min(current_block + 1_000_000, end_block)
        out_dir = f"{root_dir}/chunk_{query.from_block}_{query.to_block}"
        print(f"Processing blocks {query.from_block}→{query.to_block} out of {end_block} → {out_dir}")
        await client.collect_parquet(out_dir, query, config)  # writes logs.parquet, blocks.parquet, etc. into out_dir
        current_block = query.to_block + 1
        
    print(f"Processed blocks {query.from_block} to {query.to_block}")
    df = read_attestation_events(root_dir)

    return len(df)

    

In [9]:
# Run the function
# await collect_attestation_events(386757242-10000000,386757242+1)

### Process events

In [10]:
def last_40_addr(topic):
    if not isinstance(topic, str):
        return None
    h = topic[2:] if topic.startswith('0x') else topic
    if len(h) < 40:
        return None
    return '0x' + h[-40:]
    
def process_attestation_events(blockchain_name="arbitrum"):
    root_dir = f"{get_data_root_dir()}/{blockchain_name}/logs_dataset"

    # Read the attestations from the parquet file
    df = read_attestation_events(root_dir)
    
    # Decode the attestation data
    decoded_attestations = []
    
    for index, row in df.iterrows():
        # Decode the topics to get the indexed parameters
        recipient = last_40_addr(row['topic1'])
        attester = last_40_addr(row['topic2'])
        schema = row['topic3']                  # Full topic3 is the schema
        
        # Decode the data field to get the uid (first 32 bytes)
        data_hex = row['data'][2:]  # Remove 0x prefix
        uid = "0x" + data_hex[:64]  # First 32 bytes (64 hex chars)
        
        attestation = {
            "recipient": recipient,
            "attester": attester,
            "uid": uid,
            "schema": schema,
            "block_number": row.get('block_number', 'N/A'),
            "raw_data": row['data'],
            "raw_topics": {
                "topic0": row['topic0'],
                "topic1": row['topic1'],
                "topic2": row['topic2'],
                "topic3": row['topic3']
            }
        }
        
        decoded_attestations.append(attestation)
    
    # Save as JSON
    with open(f'{get_data_root_dir()}/{blockchain_name}/processed_attestation_events.json', 'w') as f:
        json.dump(decoded_attestations, f, indent=2)
    
    print(f"Processed {len(decoded_attestations)} attestations and saved to {get_data_root_dir()}/{blockchain_name}/processed_attestation_events.json")
    
    # Print first few for inspection
    print("\nFirst few processed attestations:")
    for i, att in enumerate(decoded_attestations[:3]):
        print(f"\nAttestation {i+1}:")
        print(f"  Recipient: {att['recipient']}")
        print(f"  Attester: {att['attester']}")
        print(f"  UID: {att['uid']}")
        print(f"  Schema: {att['schema']}")
    
    return decoded_attestations


In [11]:
# process_attestation_events("arbitrum")

## Enrich Attestation Data with data from EAS GraphQL

In [12]:
import requests

### fetch_attestation_eas_graph_ql_by_uid (unused)

In [13]:
async def fetch_attestation_eas_graph_ql_by_uid(uid,blockchain_name="arbitrum"):
    """
    Fetch attestation data from EAS API by UID
    
    Args:
        uid (str): The attestation UID to fetch
        
    Returns:
        dict: The attestation data or None if not found
    """
    
    # EAS GraphQL API endpoint
    url = get_eas_graph_ql_url(blockchain_name)
    
    # GraphQL query to fetch attestation by UID (correct structure)
    query = """
        query Attestation($id: String!) {
            attestation(where: { id: $id }) {
                id
                attester
                recipient
                refUID
                revocable
                revocationTime
                expirationTime
                data
            }
        }
    """
    
    # Variables for the query
    variables = {
        "id": uid
    }
    
    # Request payload
    payload = {
        "query": query,
        "variables": variables
    }
    
    try:
        # Make the request
        response = requests.post(
            url,
            json=payload,
            headers={"Content-Type": "application/json"}
        )
        
        # Check if request was successful
        response.raise_for_status()
        
        # Parse the response
        data = response.json()
        
        # Check for GraphQL errors
        if "errors" in data:
            print(f"GraphQL errors: {data['errors']}")
            return None
            
        # Return the attestation data
        attestation = data.get("data", {}).get("attestation")
        
        if attestation is None:
            print(f"No attestation found with UID: {uid}")
            return None
            
        print(f"Successfully fetched attestation for UID: {uid}")
        return attestation
        
    except requests.exceptions.RequestException as e:
        print(f"Request failed: {e}")
        return None
    except json.JSONDecodeError as e:
        print(f"Failed to parse JSON response: {e}")
        return None
    except Exception as e:
        print(f"Unexpected error: {e}")
        return None

In [14]:
# Test function with one of the UIDs from your data
test_uid = "0x3ffcecf3fd79254919ddba80370729327f44472a296e0e8f54e3996652b38d51"  # Replace with actual UID
result = await fetch_attestation_eas_graph_ql_by_uid(test_uid)

if result:
    print("Attestation data:")
    print(json.dumps(result, indent=2))
else:
    print("Failed to fetch attestation data")

Successfully fetched attestation for UID: 0x3ffcecf3fd79254919ddba80370729327f44472a296e0e8f54e3996652b38d51
Attestation data:
{
  "id": "0x3ffcecf3fd79254919ddba80370729327f44472a296e0e8f54e3996652b38d51",
  "attester": "0xB2331EAad45730CD8CC3A553E3eC43257829DC03",
  "recipient": "0xB2331EAad45730CD8CC3A553E3eC43257829DC03",
  "refUID": "0x5632ba1bd850239423138587d3862ff89f67c982d2e311132954a877541b3383",
  "revocable": true,
  "revocationTime": 0,
  "expirationTime": 0,
  "data": "0x0000000000000000000000000000000000000000000000000000000000000020000000000000000000000000000000000000000000000000000000000000005d7b2274797065223a22636f6d706c65746564222c22726561736f6e223a22222c2270726f6f664f66576f726b223a22222c22636f6d706c6574696f6e50657263656e74616765223a22222c2264656c6976657261626c6573223a5b5d7d000000"
}


### fetch_attestation_eas_graph_ql_for_multiple_uids

In [15]:
# This function should take a list of attestations and return a list of enriched attestations with schema, block_number and blockchain_name
async def fetch_attestation_eas_graph_ql_for_multiple_uids(attestation_events,blockchain_name="arbitrum"):
    """
    Fetch attestation data from EAS API by UID
    
    Args:
        uid (str): The attestation UID to fetch
        
    Returns:
        dict: The attestation data or None if not found
    """
    
    # EAS GraphQL API endpoint
    url = get_eas_graph_ql_url(blockchain_name)
    
    # GraphQL query to fetch attestation by UID (correct structure)
    query = """
        query Attestations($ids: [String!]!) {
            attestations(where: { id:{in: $ids }}) {
                id
                attester
                recipient
                refUID
                revocable
                revocationTime
                expirationTime
                data
            }
        }
    """
    
    # Variables for the query
    variables = {
        "ids": attestation_events.uid.tolist()
    }
    
    # Request payload
    payload = {
        "query": query,
        "variables": variables
    }
    
    try:
        # Make the request
        response = requests.post(
            url,
            json=payload,
            headers={"Content-Type": "application/json"}
        )
        
        # Check if request was successful
        response.raise_for_status()
        
        # Parse the response
        data = response.json()
        
        # Check for GraphQL errors
        if "errors" in data:
            print(f"GraphQL errors: {data['errors']}")
            return None
            
        # Return the attestation data
        attestations = data.get("data", {}).get("attestations")
        
        if attestations is None:
            print(f"No attestation found with UIDs: {attestation_events.uid.tolist()}")
            return None
            
        print(f"Successfully fetched attestation for UIDs: {attestation_events.uid.tolist()}")
        # Enrich attestations with additional metadata
        enriched_attestations = []
        
        # dedupe latest per uid
        aed = (
            attestation_events.sort_values('block_number')
            .groupby('uid', as_index=False)
            .last()[['uid', 'schema', 'block_number']]
        )

        lookup = aed.set_index('uid')[['schema', 'block_number']].to_dict('index')

        enriched_attestations = []
        for att in attestations:
            meta = lookup.get(att['id'])
            if not meta:
                # skip or set None defaults
                # enriched_attestations.append({**att, 'schema': None, 'block_number': None})
                continue
            enriched_attestations.append({
                **att,
                'schema': meta['schema'],
                'block_number': meta['block_number'],
                'blockchain_name': blockchain_name,
            })
        return enriched_attestations
        
    except requests.exceptions.RequestException as e:
        print(f"Request failed: {e}")
        return None
    except json.JSONDecodeError as e:
        print(f"Failed to parse JSON response: {e}")
        return None
    except Exception as e:
        print(f"Unexpected error: {e}")
        return None

async def fetch_attestation_eas_graph_ql_for_multiple_uids_in_chunks(uids,blockchain_name="arbitrum",chunk_size=100):
    """
    Fetch attestation data from EAS API in chunks of a specified size
    
    Args:
        uids (list): List of attestation UIDs to fetch
        chunk_size (int): Number of UIDs per chunk
        
    Returns:
        list: List of attestation data or None if no data is found
    """

    all_attestations = []
    nb_of_fetched_attestations = 0
    for i in range(0, len(uids), chunk_size):
        chunk_uids = uids[i:i+chunk_size]
        print(f"Fetching attestations {nb_of_fetched_attestations} to {nb_of_fetched_attestations+len(chunk_uids)} out of {len(uids)} for {blockchain_name}")
        nb_of_fetched_attestations += len(chunk_uids)
        attestations = await fetch_attestation_eas_graph_ql_for_multiple_uids(chunk_uids,blockchain_name)
        if attestations:
            all_attestations.extend(attestations)
    
    return all_attestations


In [16]:
# # Test function with one of the UIDs from your data
# test_uids = ["0x7530011d80d3b2e66efe436ee57b4ecd33bd54f36480d992d5e5ac63fea85c1a", "0x9fa9c36cd6952fe2ef0663a2023f0d97525c0bacd14fca06d568b8a190007976"]
# blockchain_name = "arbitrum"

# # Fetch the attestation events for the test UIDs
# df_test = pd.read_json(f"{get_data_root_dir()}/{blockchain_name}/processed_attestation_events.json")
# df_test = df_test[df_test['uid'].isin(test_uids)]
# df_test= df_test.groupby('uid', as_index=False).last()
# print("Input contents:")
# display(df_test.style.set_properties(**{'text-align': 'left'})
#        .set_table_styles([{'selector': 'th', 'props': [('text-align', 'left')]}]))

# result = await fetch_attestation_eas_graph_ql_for_multiple_uids_in_chunks(df_test,blockchain_name)

# if result:
#     print("Attestation data:")
#     print(json.dumps(result, indent=2, default=str))
# else:
#     print("Failed to fetch attestation data")


### fetch schema from schema id using eas

In [17]:
async def fetch_schema_from_schema_id(schema_id,blockchain_name="arbitrum"):
    """
    Fetch schema data from EAS API by schema id
    
    Args:
        uid (str): The attestation UID to fetch
        
    Returns:
        dict: The attestation data or None if not found
    """
    
    # EAS GraphQL API endpoint
    url = get_eas_graph_ql_url(blockchain_name)
    
    # GraphQL query to fetch attestation by UID (correct structure)
    query = """
        query GetSchema($where: SchemaWhereUniqueInput!) {
            getSchema(where: $where) {
                schema
            }
        }
    """
    
    # Variables for the query
    variables = {
        "where": {
            "id": schema_id
        }
    }
    
    # Request payload
    payload = {
        "query": query,
        "variables": variables
    }
    
    try:
        # Make the request
        response = requests.post(
            url,
            json=payload,
            headers={"Content-Type": "application/json"}
        )
        
        # Check if request was successful
        response.raise_for_status()
        
        # Parse the response
        data = response.json()
        
        # Check for GraphQL errors
        if "errors" in data:
            print(f"GraphQL errors: {data['errors']}")
            return None
            
        # Return the attestation data
        schema = data.get("data", {}).get("getSchema")
        
        if schema is None:
            print(f"No attestation found with schema_id: {schema_id}")
            return None
            
        print(f"Successfully fetched attestation for schema_id: {schema_id}")
        return schema.get("schema")
        
    except requests.exceptions.RequestException as e:
        print(f"Request failed: {e}")
        return None
    except json.JSONDecodeError as e:
        print(f"Failed to parse JSON response: {e}")
        return None
    except Exception as e:
        print(f"Unexpected error: {e}")
        return None

In [18]:
result=await fetch_schema_from_schema_id("0xbcc545b516ee9f84d2f549215c9b016e07c6bd7b22751de0a80a632f7c540f28","mainnet")
if result:
    print("Schema data:")
    print(json.dumps(result, indent=2))
else:
    print("Failed to fetch schema data")

Successfully fetched attestation for schema_id: 0xbcc545b516ee9f84d2f549215c9b016e07c6bd7b22751de0a80a632f7c540f28
Schema data:
"string programName,string participantName,uint16 completionYear,uint256 completionTimestamp,string[] attendedCourses,string annotation"


### decode_attestation_data

In [19]:
import re
from typing import List, Tuple, Optional, Dict, Any
from eth_abi import decode as abi_decode
from eth_utils import remove_0x_prefix, is_hex
from eth_abi.codec import ABICodec
from eth_abi.registry import registry

class ParsedField:
    def __init__(self, abi_type: str, name: Optional[str], tuple_member_names: Optional[List[Optional[str]]]):
        self.abi_type = abi_type      # uses paren-form for tuples: "(string,address)[]"
        self.name = name
        self.tuple_member_names = tuple_member_names

def _strip_schema_prefix(s: str) -> str:
    return re.sub(r'^\s*schema\s*:?\s*', '', s, flags=re.IGNORECASE).strip()

def _strip_wrapping_parens(s: str) -> str:
    s = s.strip()
    if not (s.startswith('(') and s.endswith(')')):
        return s
    depth = 0
    for i, ch in enumerate(s):
        if ch == '(':
            depth += 1
        elif ch == ')':
            depth -= 1
            if depth == 0 and i != len(s) - 1:
                return s
    return s[1:-1].strip()

def _split_top_level_commas(s: str) -> List[str]:
    parts, buf, depth = [], [], 0
    for ch in s:
        if ch == '(':
            depth += 1
            buf.append(ch)
        elif ch == ')':
            depth = max(0, depth - 1)
            buf.append(ch)
        elif ch == ',' and depth == 0:
            parts.append(''.join(buf).strip())
            buf = []
        else:
            buf.append(ch)
    if buf:
        parts.append(''.join(buf).strip())
    return [p for p in parts if p]

def _parse_tuple_components(body: str) -> Tuple[List[str], List[Optional[str]]]:
    comps = _split_top_level_commas(body)
    inner_types: List[str] = []
    inner_names: List[Optional[str]] = []
    for c in comps:
        c_norm = re.sub(r'\s+', ' ', c.strip())
        if c_norm.startswith('(') or c_norm.startswith('tuple('):
            # nested tuple
            depth, i = 0, 0
            while i < len(c_norm):
                if c_norm[i] == '(':
                    depth += 1
                elif c_norm[i] == ')':
                    depth -= 1
                    if depth == 0:
                        i += 1
                        break
                i += 1
            nested_body = c_norm[c_norm.find('(')+1:i-1]
            suffix_match = re.match(r'(?:\[\d*\])*', c_norm[i:])
            suffix = suffix_match.group(0) if suffix_match else ''
            rest = c_norm[i + len(suffix):].strip()
            nested_types, nested_names = _parse_tuple_components(nested_body)
            inner_types.append(f"({','.join(nested_types)}){suffix}")  # paren-form
            inner_names.append(rest if rest else None)
        else:
            parts = c_norm.split(' ', 1)
            typ = parts[0]
            nm = parts[1].strip() if len(parts) == 2 and parts[1].strip() else None
            inner_types.append(typ)
            inner_names.append(nm)
    return inner_types, inner_names

def _parse_field(token: str) -> ParsedField:
    token = re.sub(r'\s+', ' ', token.strip())
    if token.startswith('(') or token.startswith('tuple('):
        # tuple shorthand or explicit tuple(...)
        depth, i = 0, 0
        while i < len(token):
            if token[i] == '(':
                depth += 1
            elif token[i] == ')':
                depth -= 1
                if depth == 0:
                    i += 1
                    break
            i += 1
        body = token[token.find('(')+1:i-1]
        suffix_match = re.match(r'(?:\[\d*\])*', token[i:])
        suffix = suffix_match.group(0) if suffix_match else ''
        rest = token[i + len(suffix):].strip()
        name = rest if rest else None
        inner_types, inner_names = _parse_tuple_components(body)
        abi_type = f"({','.join(inner_types)}){suffix}"  # paren-form, NOT "tuple(...)"
        return ParsedField(abi_type, name, inner_names)
    else:
        parts = token.split(' ', 1)
        abi_type = parts[0]
        name = parts[1].strip() if len(parts) == 2 and parts[1].strip() else None
        return ParsedField(abi_type, name, None)

def _extract_fields(schema: str) -> List[ParsedField]:
    schema = _strip_schema_prefix(schema)
    schema = _strip_wrapping_parens(schema)
    if schema.startswith('[') or schema.startswith('{'):
        raise ValueError("Pass a comma-separated Solidity schema string.")
    tokens = _split_top_level_commas(schema)
    return [_parse_field(tok) for tok in tokens]

def _unique_names(fields: List[ParsedField]) -> List[str]:
    names: List[str] = []
    seen: Dict[str, int] = {}
    for idx, f in enumerate(fields):
        name = f.name
        if not name:
            base = f.abi_type.replace('[', '_arr').replace(']', '')
            base = re.sub(r'[^a-zA-Z0-9_]', '_', base) or 'field'
            name = f"{base}_{idx}"
        if name in seen:
            seen[name] += 1
            name = f"{name}_{seen[name]}"
        else:
            seen[name] = 0
        names.append(name)
    return names

def _to_bytes_hex_strict(hex_data: str, skip_first_4_bytes: bool = False) -> bytes:
    if not isinstance(hex_data, str):
        raise TypeError("hex_data must be a 0x-prefixed hex string.")
    if not is_hex(hex_data) or remove_0x_prefix(hex_data) == "":
        raise ValueError("hex_data must be a non-empty 0x-prefixed hex string.")
    b = bytes.fromhex(remove_0x_prefix(hex_data))
    return b[4:] if skip_first_4_bytes else b

def _is_array_type(abi_type: str) -> bool:
    return bool(re.search(r'(?:\[\d*\])+$', abi_type))

def _is_tuple_type(abi_type: str) -> bool:
    return abi_type.startswith('(') or abi_type.startswith('tuple(')

def _map_tuple_value(names: List[Optional[str]], value: Any) -> Any:
    if isinstance(value, (list, tuple)):
        keys = [nm if nm else f"field_{i}" for i, nm in enumerate(names)]
        return {k: v for k, v in zip(keys, value)}
    return value

def _postprocess_value(field: ParsedField, value: Any) -> Any:
    if not _is_tuple_type(field.abi_type):
        return value
    if _is_array_type(field.abi_type):
        return [_map_tuple_value(field.tuple_member_names or [], elem) for elem in value]
    return _map_tuple_value(field.tuple_member_names or [], value)

# Replace your single abi_decode(...) call with this helper:
def _abi_decode_lenient(types, payload, *, strict=False):
    codec = ABICodec(registry)
    return codec.decode(types, payload, strict=strict)

# Optional: upcast small uints to uint256 (common on-chain)
import re as _re
def _normalize_uints_to_256(types):
    return [_re.sub(r'^uint(8|16|32|64|128|160|192)$', 'uint256', t) for t in types]

def decode_with_schema_dict_hex(schema: str, hex_data: str, *, has_function_selector: bool = False) -> Dict[str, Any]:
    try:
        fields = _extract_fields(schema)
        types = [f.abi_type for f in fields]
        types = _normalize_uints_to_256(types) if ' _normalize_uints_to_256' in globals() else types
        names = _unique_names(fields)
        payload = _to_bytes_hex_strict(hex_data, skip_first_4_bytes=has_function_selector)

        # decode
        values = _abi_decode_lenient(types, payload, strict=False)

        # build dict
        out: Dict[str, Any] = {}
        for name, field, value in zip(names, fields, values):
            out[name] = _postprocess_value(field, value)
        return out
    except Exception:
        try:
            payload2 = _to_bytes_hex_strict(hex_data, skip_first_4_bytes=True)
            values = _abi_decode_lenient(types, payload2, strict=False)
            return {name: _postprocess_value(field, value) for name, field, value in zip(names, fields, values)}
        except Exception:
            return {"rawHex": hex_data}

print('test1')
test_data = "0x00000000000000000000000000000000000000000000000000000000000000c0000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000000000007e90000000000000000000000000000000000000000000000000000000068db02cf00000000000000000000000000000000000000000000000000000000000001400000000000000000000000000000000000000000000000000000000000000600000000000000000000000000000000000000000000000000000000000000000f50726f746f636f6c205363686f6f6c0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000c416e64726520436f6d6561750000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000c000000000000000000000000000000000000000000000000000000000000018000000000000000000000000000000000000000000000000000000000000001c000000000000000000000000000000000000000000000000000000000000002000000000000000000000000000000000000000000000000000000000000000240000000000000000000000000000000000000000000000000000000000000028000000000000000000000000000000000000000000000000000000000000002c000000000000000000000000000000000000000000000000000000000000003000000000000000000000000000000000000000000000000000000000000000340000000000000000000000000000000000000000000000000000000000000038000000000000000000000000000000000000000000000000000000000000003c00000000000000000000000000000000000000000000000000000000000000400000000000000000000000000000000000000000000000000000000000000044000000000000000000000000000000000000000000000000000000000000000154379626572706879736963616c2053797374656d73000000000000000000000000000000000000000000000000000000000000000000000000000000000000104265796f6e6420436f6e73656e73757300000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000a476f7665726e616e636500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000e50726f746f636f6c204172742049000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001d5374726174656779206173204f72672e20436f6f7264696e6174696f6e000000000000000000000000000000000000000000000000000000000000000000000f50726f746f636f6c204172742049490000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001550726f746f636f6c2053746f727974656c6c696e6700000000000000000000000000000000000000000000000000000000000000000000000000000000000019536872657961205368616e6b61722047756573742074616c6b0000000000000000000000000000000000000000000000000000000000000000000000000000194d75736963616c697a6174696f6e2c206e6f74204d7573696300000000000000000000000000000000000000000000000000000000000000000000000000000f44657369676e696e672054727573740000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001844657369676e696e67204469676974616c20576f726c647300000000000000000000000000000000000000000000000000000000000000000000000000000029546f77617264732061204e657720536f6369616c20536369656e6365206f662050726f746f636f6c730000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000e50696f6e65657220436f686f7274000000000000000000000000000000000000"
test_schemaAbiString = "string programName,string participantName,uint16 completionYear,uint256 completionTimestamp,string[] attendedCourses,string annotation"
decoded = decode_with_schema_dict_hex(test_schemaAbiString, test_data)  # test_data must be 0x...print("Decoded attestation data:")
print(json.dumps(decoded, indent=2))

print("test2")
test_data="0x00000000000000000000000000000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000140000000000000000000000000000000000000000000000000000000000000018000000000000000000000000000000000000000000000000000000000000001c00000000000000000000000000000000000000000000000000000000000000320000000000000000000000000000000000000000000000000000000000000034000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000e35fa931a0000000000000000000000000000000000000000000000000000000000000000020636339616134366630616165346363383936366366376436643365376132633900000000000000000000000000000000000000000000000000000000000000203934626237336537316132373433343039393862666531383666613633363836000000000000000000000000000000000000000000000000000000000000002031656431653233396161396234656661393233666132353435323533343639660000000000000000000000000000000000000000000000000000000000000002000000000000000000000000000000000000000000000000000000000000004000000000000000000000000000000000000000000000000000000000000000c00000000000000000000000000000000000000000000000000000000000000040000000000000000000000000297be55d38bf3de94db2dfe5756a8e01852adda8000000000000000000000000000000000000000000000000000000000000002038346164643564666631326534323365383633316633346635613239303032320000000000000000000000000000000000000000000000000000000000000040000000000000000000000000700e2c7ed93d8f7aebc174eaa6be1d8a2d87e42a000000000000000000000000000000000000000000000000000000000000002037656631636666653739303034333666613666373636393666663362353861610000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000000000000000000000000000000000000000000000000000200000000000000000000000000000000000000000000000000000000000000040000000000000000000000000000000000000000000000000000000000000000100000000000000000000000000000000000000000000000000000000000000034c46470000000000000000000000000000000000000000000000000000000000"
test_schemaAbiString="string project_uuid, string user_uuid, string project_contribution_uuid, (string user_uuid, address wallet_address)[] builders, string message, (string name, uint8 quantity)[] stickers, address token_address, uint256 value"
decoded = decode_with_schema_dict_hex(test_schemaAbiString,test_data)
print("Decoded attestation data:")
print(json.dumps(decoded, indent=2))

print("test3")
test_data="0x00000000000000000000000000000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000140000000000000000000000000000000000000000000000000000000000000018000000000000000000000000000000000000000000000000000000000000001c06148c6d550cab3cb168000206f64a5276633a6dfe6504125563c8c516b0845680000000000000000000000000000000000000000000000000000000000000200000000000000000000000000000000000000000000000000000001951a1edd48926b83a8d7546cf946402b1442e29d072754d9aca2569b3af00ddee5af6092f800000000000000000000000000000000000000000000000000000000000000084964656e74697479000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000762696e616e63650000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000094b5943204c6576656c000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000033e3d32000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000045452554500000000000000000000000000000000000000000000000000000000"
test_schemaAbiString="string ProofType,string Source,string Content,string Condition,bytes32 SourceUserIdHash,bool Result,uint64 Timestamp,bytes32 UserIdHash"
decoded = decode_with_schema_dict_hex(test_schemaAbiString,test_data)
print("Decoded attestation data:")
print(json.dumps(decoded, indent=2))

test1
{
  "programName": "Protocol School",
  "participantName": "Andre Comeau",
  "completionYear": 2025,
  "completionTimestamp": 1759183567,
  "attendedCourses": [
    "Cyberphysical Systems",
    "Beyond Consensus",
    "Governance",
    "Protocol Art I",
    "Strategy as Org. Coordination",
    "Protocol Art II",
    "Protocol Storytelling",
    "Shreya Shankar Guest talk",
    "Musicalization, not Music",
    "Designing Trust",
    "Designing Digital Worlds",
    "Towards a New Social Science of Protocols"
  ],
  "annotation": "Pioneer Cohort"
}
test2
Decoded attestation data:
{
  "project_uuid": "cc9aa46f0aae4cc8966cf7d6d3e7a2c9",
  "user_uuid": "94bb73e71a274340998bfe186fa63686",
  "project_contribution_uuid": "1ed1e239aa9b4efa923fa2545253469f",
  "builders": [
    {
      "user_uuid": "84add5dff12e423e8631f34f5a290022",
      "wallet_address": "0x297be55d38bf3de94db2dfe5756a8e01852adda8"
    },
    {
      "user_uuid": "7ef1cffe7900436fa6f76696ff3b58aa",
      "wallet_address"

#### deprecated

In [20]:
# import json
# from eth_abi import decode as abi_decode
# from eth_utils import remove_0x_prefix

# # deprecated
# def decode_attestation_data(hex_data):
#     """
#     Decode the data field from an EAS attestation
    
#     Args:
#         hex_data (str): The hex-encoded data field from the attestation
        
#     Returns:
#         dict: Decoded data or None if decoding fails
#     """
#     try:
#         # Remove 0x prefix if present
#         if hex_data.startswith('0x'):
#             hex_data = hex_data[2:]
        
#         # Convert hex to bytes
#         data_bytes = bytes.fromhex(hex_data)
        
#         # The data is typically ABI-encoded
#         # First let's use the ABI to decode the data
#         # schemaAbiString looks like this: "bool isWitnessed,string leafIndex"

#         # Then, let's try to decode it as a string (common for EAS attestations)
        
#         # Skip the first 32 bytes (offset) and next 32 bytes (length)
#         # Then read the actual string data
#         if len(data_bytes) > 64:  # At least 64 bytes (32 + 32)
#             # Get the length of the string (second 32 bytes)
#             length_hex = data_bytes[32:64]
#             length = int.from_bytes(bytes.fromhex(length_hex.hex()), 'big')
            
#             # Extract the string data
#             if length > 0 and len(data_bytes) >= 64 + length:
#                 string_data = data_bytes[64:64+length]
                
#                 try:
#                     # Try to decode as UTF-8 string
#                     decoded_string = string_data.decode('utf-8')
                    
#                     # Try to parse as JSON if it looks like JSON
#                     if decoded_string.strip().startswith('{'):
#                         return json.loads(decoded_string)
#                     else:
#                         return {"decoded_string": decoded_string}
                        
#                 except (UnicodeDecodeError, json.JSONDecodeError):
#                     # If it's not a string or JSON, return the raw hex
#                     return {"raw_hex": hex_data, "raw_bytes": data_bytes.hex()}
        
#         # If we can't decode as string, return raw data
#         return {"raw_hex": hex_data, "raw_bytes": data_bytes.hex()}
        
#     except Exception as e:
#         print(f"Error decoding data: {e}")
#         return {"error": str(e), "raw_hex": hex_data}

# # Test with your attestation data
# test_data = "0x00000000000000000000000000000000000000000000000000000000000000c0000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000000000007e90000000000000000000000000000000000000000000000000000000068db02cf00000000000000000000000000000000000000000000000000000000000001400000000000000000000000000000000000000000000000000000000000000600000000000000000000000000000000000000000000000000000000000000000f50726f746f636f6c205363686f6f6c0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000c416e64726520436f6d6561750000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000c000000000000000000000000000000000000000000000000000000000000018000000000000000000000000000000000000000000000000000000000000001c000000000000000000000000000000000000000000000000000000000000002000000000000000000000000000000000000000000000000000000000000000240000000000000000000000000000000000000000000000000000000000000028000000000000000000000000000000000000000000000000000000000000002c000000000000000000000000000000000000000000000000000000000000003000000000000000000000000000000000000000000000000000000000000000340000000000000000000000000000000000000000000000000000000000000038000000000000000000000000000000000000000000000000000000000000003c00000000000000000000000000000000000000000000000000000000000000400000000000000000000000000000000000000000000000000000000000000044000000000000000000000000000000000000000000000000000000000000000154379626572706879736963616c2053797374656d73000000000000000000000000000000000000000000000000000000000000000000000000000000000000104265796f6e6420436f6e73656e73757300000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000a476f7665726e616e636500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000e50726f746f636f6c204172742049000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001d5374726174656779206173204f72672e20436f6f7264696e6174696f6e000000000000000000000000000000000000000000000000000000000000000000000f50726f746f636f6c204172742049490000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001550726f746f636f6c2053746f727974656c6c696e6700000000000000000000000000000000000000000000000000000000000000000000000000000000000019536872657961205368616e6b61722047756573742074616c6b0000000000000000000000000000000000000000000000000000000000000000000000000000194d75736963616c697a6174696f6e2c206e6f74204d7573696300000000000000000000000000000000000000000000000000000000000000000000000000000f44657369676e696e672054727573740000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001844657369676e696e67204469676974616c20576f726c647300000000000000000000000000000000000000000000000000000000000000000000000000000029546f77617264732061204e657720536f6369616c20536369656e6365206f662050726f746f636f6c730000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000e50696f6e65657220436f686f7274000000000000000000000000000000000000"
# test_schemaAbiString = "string programName,string participantName,uint16 completionYear,uint256 completionTimestamp,string[] attendedCourses,string annotation"
# decoded = decode_attestation_data(test_data)
# print("Decoded attestation data:")
# print(json.dumps(decoded, indent=2))


### enrich_attestation_data

In [21]:
import json
import numpy as np
import pandas as pd
from datetime import datetime
from decimal import Decimal

def to_jsonable(o):
    if isinstance(o, (np.integer,)):
        return int(o)
    if isinstance(o, (np.floating,)):
        return float(o)
    if isinstance(o, (np.bool_,)):
        return bool(o)
    if isinstance(o, (pd.Timestamp, datetime)):
        return o.isoformat()
    if isinstance(o, Decimal):
        return float(o)
    if isinstance(o, (np.ndarray,)):
        return o.tolist()
    return str(o)

async def enrich_attestation_data(blockchain_name="arbitrum"):
    # Read the processed attestation events
    df = pd.read_json(f"{get_data_root_dir()}/{blockchain_name}/processed_attestation_events.json").groupby('uid', as_index=False).last()
    print(len(df))
    enriched_attestations = await fetch_attestation_eas_graph_ql_for_multiple_uids_in_chunks(df,blockchain_name,10000)
    print(len(enriched_attestations))
    # Decode the attestation data
    enriched_attestations_decoded = []
    schema_cache = {}
    if enriched_attestations is not None:
        for attestation in enriched_attestations:
            if attestation is not None:
                schema_id = attestation['schema']
                if schema_id not in schema_cache:
                    schema_cache[schema_id] = await fetch_schema_from_schema_id(schema_id, blockchain_name)
                schema = schema_cache[schema_id]
                #decode data
                attestation['decoded_data'] = decode_with_schema_dict_hex(schema, attestation['data'])
                #remove data field
                del attestation['data']
                # Check if the decoded data, converted to string, contains "\u0000"
                if "\u0000" not in str(attestation['decoded_data']):
                    enriched_attestations_decoded.append(attestation)

    # log the number of lines in the file before appending, or none if the file does not exist
    if os.path.exists(f"{get_data_root_dir()}/enriched_attestation_events.jsonl"):
        nb_of_lines = sum(1 for line in open(f"{get_data_root_dir()}/enriched_attestation_events.jsonl"))
        print(f"Number of lines in the file before appending: {nb_of_lines}")
    else:
        print("File does not exist yet")
        
    # save the enriched_attestations_decoded to a json file
    with open(f"{get_data_root_dir()}/enriched_attestation_events.jsonl", 'a') as f:
        for attestation_decoded in enriched_attestations_decoded:
            f.write(json.dumps(attestation_decoded, default=to_jsonable) + "\n")

    # log the number of lines in the file after appending, or none if the file does not exist
    print(f"Processed {len(enriched_attestations_decoded)} for {blockchain_name} attestations and saved to {get_data_root_dir()}/enriched_attestation_events.json")

    nb_of_lines = sum(1 for line in open(f"{get_data_root_dir()}/enriched_attestation_events.jsonl"))
    print(f"Number of lines in the file after appending: {nb_of_lines}")
    
    
    return enriched_attestations_decoded
    

In [22]:
# await enrich_attestation_data()

## Filter attestations containing "github"

In [23]:
import json

def filter_attestations_containing_github():

    in_path = f"{get_data_root_dir()}/enriched_attestation_events.jsonl"
    out_path = f"{get_data_root_dir()}/filtered_attestation_with_github.jsonl"

    total = 0
    with open(in_path, "r") as fin, open(out_path, "w") as fout:
        for line in fin:
            if not line.strip():
                continue
            try:
                obj = json.loads(line)
            except json.JSONDecodeError:
                continue
            if "decoded_data" in obj and "github" in str(obj["decoded_data"]).lower():
                fout.write(json.dumps(obj) + "\n")
                total += 1

    print(f"Found {total} attestations containing 'github'")


In [24]:
# filter_attestations_containing_github()

## Full Data process

### full_data_process

In [25]:
async def full_data_process(start_block, end_block, blockchain_name="arbitrum"):
    # Collect EAS attestations
    await collect_attestation_events(start_block, end_block,blockchain_name)

    # Process EAS attestations
    process_attestation_events(blockchain_name)

    # Enrich EAS attestations
    await enrich_attestation_data(blockchain_name)

    # Filter attestations containing "github"
    filter_attestations_containing_github()
    
    return

### Run

#### mainnet

In [26]:
# mainnet
# lastblock=23570552
# await full_data_process(0,lastblock,"mainnet")

#### arbitrum

In [27]:
# arbitrum
lastblock=308918372
await full_data_process(0,lastblock,"arbitrum")

Processing blocks 0→1000000 out of 308918372 → ../../data/eas_attestations/arbitrum/logs_dataset/chunk_0_1000000
Processing blocks 1000001→2000001 out of 308918372 → ../../data/eas_attestations/arbitrum/logs_dataset/chunk_1000001_2000001
Processing blocks 2000002→3000002 out of 308918372 → ../../data/eas_attestations/arbitrum/logs_dataset/chunk_2000002_3000002
Processing blocks 3000003→4000003 out of 308918372 → ../../data/eas_attestations/arbitrum/logs_dataset/chunk_3000003_4000003
Processing blocks 4000004→5000004 out of 308918372 → ../../data/eas_attestations/arbitrum/logs_dataset/chunk_4000004_5000004
Processing blocks 5000005→6000005 out of 308918372 → ../../data/eas_attestations/arbitrum/logs_dataset/chunk_5000005_6000005
Processing blocks 6000006→7000006 out of 308918372 → ../../data/eas_attestations/arbitrum/logs_dataset/chunk_6000006_7000006
Processing blocks 7000007→8000007 out of 308918372 → ../../data/eas_attestations/arbitrum/logs_dataset/chunk_7000007_8000007
Processing b

#### optimism

In [28]:
# optimism
# lastblock=142391425
# await full_data_process(0,lastblock,"optimism")

#### base

In [29]:
# base
# lastblock=36820269
# await full_data_process(0,lastblock,"base")