## DEVFOLIO ONCHAIN CREDENTIALS ATTESTATIONS

### Setup

In [1]:
import pandas as pd
import matplotlib

In [2]:
def get_data_root_dir():
    return "../../data/eas_attestations"

### Refine data

#### Visualize

In [None]:
devfolio_o_c_a_schema_id="0x364a59df1d48d4b6c0f8f0c1176504b252bce5ce57e0d1ca75b1bf70c2f0ec14"

In [4]:
import json

def filter_attestations_by_schema_id(schemaId,fileName="devfolio_o_c_a_schema"):

    in_path = f"{get_data_root_dir()}/enriched_attestation_events.jsonl"
    out_path = f"{get_data_root_dir()}/filtered_attestation_with_{fileName}.jsonl"

    total = 0
    with open(in_path, "r") as fin, open(out_path, "w") as fout:
        for line in fin:
            if not line.strip():
                continue
            try:
                obj = json.loads(line)
            except json.JSONDecodeError:
                continue
            if obj["schema"]==schemaId:
                fout.write(json.dumps(obj) + "\n")
                total += 1

    print(f"Found {total} attestations for schema '{schemaId}'")


In [5]:
filter_attestations_by_schema_id(devfolio_o_c_a_schema_id)

Found 1524 attestations for schema '0x364a59df1d48d4b6c0f8f0c1176504b252bce5ce57e0d1ca75b1bf70c2f0ec14'


In [11]:
# get filtered_attestation_with_devfolio_o_c_a_schema.jsonl
df = pd.read_json("../../data/eas_attestations/filtered_attestation_with_devfolio_o_c_a_schema.jsonl", lines=True)

# Inspect decoded_data structure on the first row
first_decoded = df.iloc[0]["decoded_data"] if isinstance(df.iloc[0], pd.Series) else df.iloc[0].decoded_data
print("decoded_data type:", type(first_decoded))
if isinstance(first_decoded, dict):
    print("decoded_data keys (first 20):", list(first_decoded.keys())[:20])
    print("decoded_data sample:", {k: first_decoded[k] for k in list(first_decoded.keys())[:5]})
else:
    print("decoded_data value:", first_decoded)

# Safely extract nft_metadata_ipfs_url if present in dict
def get_first_nft_metadata_url(df):
    """Print the nft_metadata_ipfs_url from the first row of the dataframe if available"""
    decoded = df.iloc[0]["decoded_data"]
    first_url = decoded.get("nft_metadata_ipfs_url") if isinstance(decoded, dict) else None
    print(f"First NFT metadata URL: {first_url}")
    return first_url

get_first_nft_metadata_url(df)

decoded_data type: <class 'dict'>
decoded_data keys (first 20): ['nft_metadata_ipfs_url', 'user_uuid', 'credential_uuid', 'hackathon_uuid', 'user_hackathon_credential_uuid', 'nft_contract_address']
decoded_data sample: {'nft_metadata_ipfs_url': 'https://ipfs.io/ipfs/Qmd7FukB2LASSTLLtZw6Lpz2GWfNDKg21ABSpVivG8F5GW', 'user_uuid': 'dd743bbd44274a8a9b65759f287d216d', 'credential_uuid': 'b1426e6a8b7d4867924f588e95eb70b1', 'hackathon_uuid': 'becdb269b9ea4e708c7d96329563e478', 'user_hackathon_credential_uuid': '1821718837164bd184737133bbcdf0a8'}
First NFT metadata URL: https://ipfs.io/ipfs/Qmd7FukB2LASSTLLtZw6Lpz2GWfNDKg21ABSpVivG8F5GW

First Element Details:
----------------------------------------

id:
0x153909c95a54503a1f3d08dde5a4a4700bdf7e8d6e3360af1df6ce7ad23e8fb6

attester:
0x3Ce7b2b2a9F3C27aFa6EC511679f606412fb497b

recipient:
0x3c1763006FcdEa4b467cC8FE9c28Fab664d0F6ED

refUID:
0x0000000000000000000000000000000000000000000000000000000000000000

revocable:
True

revocationTime:
0

expir

In [13]:
# pretty print the first entry:
def pretty_print_first_element(df):
    """Pretty prints the first element of the dataframe"""
    import json
    first_element = df.iloc[0]
    print("\nFirst Element Details:")
    print("-" * 40)
    for key, value in first_element.items():
        print(f"\n{key}:")
        if key == "decoded_data":
            print(json.dumps(value, indent=2))
        else:
            print(value)

pretty_print_first_element(df)


First Element Details:
----------------------------------------

id:
0x153909c95a54503a1f3d08dde5a4a4700bdf7e8d6e3360af1df6ce7ad23e8fb6

attester:
0x3Ce7b2b2a9F3C27aFa6EC511679f606412fb497b

recipient:
0x3c1763006FcdEa4b467cC8FE9c28Fab664d0F6ED

refUID:
0x0000000000000000000000000000000000000000000000000000000000000000

revocable:
True

revocationTime:
0

expirationTime:
0

schema:
0x364a59df1d48d4b6c0f8f0c1176504b252bce5ce57e0d1ca75b1bf70c2f0ec14

block_number:
296748689

blockchain_name:
arbitrum

decoded_data:
{
  "nft_metadata_ipfs_url": "https://ipfs.io/ipfs/Qmd7FukB2LASSTLLtZw6Lpz2GWfNDKg21ABSpVivG8F5GW",
  "user_uuid": "dd743bbd44274a8a9b65759f287d216d",
  "credential_uuid": "b1426e6a8b7d4867924f588e95eb70b1",
  "hackathon_uuid": "becdb269b9ea4e708c7d96329563e478",
  "user_hackathon_credential_uuid": "1821718837164bd184737133bbcdf0a8",
  "nft_contract_address": "0xe34494de41383fbad7d1cdba6730d0e943425701"
}


#### Trim

In [14]:
# remove refUID, revocable, revocationTime, expirationTime fields
def remove_useless_fields(df):
    return df[["id","attester","recipient","schema","block_number","blockchain_name","decoded_data"]]

In [16]:
df_trimmed=remove_useless_fields(df)
pretty_print_first_element(df_trimmed)


First Element Details:
----------------------------------------

id:
0x153909c95a54503a1f3d08dde5a4a4700bdf7e8d6e3360af1df6ce7ad23e8fb6

attester:
0x3Ce7b2b2a9F3C27aFa6EC511679f606412fb497b

recipient:
0x3c1763006FcdEa4b467cC8FE9c28Fab664d0F6ED

schema:
0x364a59df1d48d4b6c0f8f0c1176504b252bce5ce57e0d1ca75b1bf70c2f0ec14

block_number:
296748689

blockchain_name:
arbitrum

decoded_data:
{
  "nft_metadata_ipfs_url": "https://ipfs.io/ipfs/Qmd7FukB2LASSTLLtZw6Lpz2GWfNDKg21ABSpVivG8F5GW",
  "user_uuid": "dd743bbd44274a8a9b65759f287d216d",
  "credential_uuid": "b1426e6a8b7d4867924f588e95eb70b1",
  "hackathon_uuid": "becdb269b9ea4e708c7d96329563e478",
  "user_hackathon_credential_uuid": "1821718837164bd184737133bbcdf0a8",
  "nft_contract_address": "0xe34494de41383fbad7d1cdba6730d0e943425701"
}


#### Get ipfs hosted json

In [27]:
import requests
import json
import re

# ipfs link like https://ipfs.io/ipfs/Qmd7FukB2LASSTLLtZw6Lpz2GWfNDKg21ABSpVivG8F5GW contains a json, lets get it
def get_ipfs_json(ipfs_link):
    """Gets JSON data from an IPFS link
    
    Args:
        ipfs_link (str): IPFS link to JSON data
        
    Returns:
        dict: JSON data from IPFS link
    """

    
    try:
        response = requests.get(ipfs_link)
        response.raise_for_status()  # Raise exception for bad status codes
        return response.json()
        
    except requests.exceptions.RequestException as e:
        print(f"Error fetching IPFS data: {e}")
        return None
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON data: {e}")
        return None
    
res=get_ipfs_json("https://ipfs.io/ipfs/Qmd7FukB2LASSTLLtZw6Lpz2GWfNDKg21ABSpVivG8F5GW")
# print(json.dumps(res, indent=2))

def process_ipfs_json(ipfs_json):
    # search https://devfolio.co/projects/empoweria-e0ee regexp pattern in the description field
    match = re.search(r'(https:\/\/devfolio\.co\/projects\/[^\)\s]+)', ipfs_json["description"])
    project_url = match.group(1) if match else None
    return {
        "project_url": project_url,
        "attributes": ipfs_json["attributes"]
    }

res=process_ipfs_json(res)
print(json.dumps(res, indent=2))

{
  "project_url": "https://devfolio.co/projects/empoweria-e0ee",
  "attributes": [
    {
      "trait_type": "hackathon_name",
      "value": "ETHIndia 2023"
    },
    {
      "trait_type": "nft_type",
      "value": "BUILDER"
    },
    {
      "trait_type": "team_name",
      "value": "DEFY"
    },
    {
      "trait_type": "project_name",
      "value": "Empoweria"
    },
    {
      "display_type": "date",
      "trait_type": "project_submission_date",
      "value": 1702179294391
    }
  ]
}


In [48]:
import re
import json
from typing import Dict, Any, List, Optional
import requests
from bs4 import BeautifulSoup, Tag

_PROBLEM_HEADING_RE = re.compile(r'^\s*the\s+problem\b.*\bsolves\b', re.I)
_CHALLENGE_RE = re.compile(r'^\s*Challenges', re.I)

def extract_problem_section(soup: BeautifulSoup, regex) -> Optional[str]:
    # Find the problem header by regex, independent of project name
    header = soup.find(
        lambda t: isinstance(t, Tag)
        and t.name in ("h2", "h3")
        and regex.search(t.get_text(strip=True) or "")
    )
    if not header:
        return None

    # Find the next div whose class contains 'ProjectListingContent'
    plc_div = None
    for el in header.next_elements:
        if isinstance(el, Tag) and el.name == "div":
            classes = " ".join(el.get("class", []))
            if "ProjectListingContent" in classes:
                plc_div = el
                break
        # stop if a new major section header is encountered
        if isinstance(el, Tag) and el.name in ("h2", "h3") and el is not header:
            break

    if not plc_div:
        return None

    # Extract readable text from the ProjectListingContent block
    # Prefer paragraphs and list items; fall back to full text if needed
    lines: List[str] = []
    for br in plc_div.find_all(["br"]):
        br.replace_with("\n")
    for el in plc_div.descendants:
        if isinstance(el, Tag):
            if el.name == "p":
                txt = el.get_text(" ", strip=True)
                if txt:
                    lines.append(txt)
            elif el.name == "li":
                li_txt = el.get_text(" ", strip=True)
                if li_txt:
                    lines.append(li_txt)

    text = "\n\n".join([l for l in lines if l]).strip()
    return text or plc_div.get_text(" ", strip=True) or None


def extract_technologies(soup: BeautifulSoup) -> List[str]:
    """Return a list of technology names from Devfolio project pages.
    Targets divs whose class contains 'ProjectTechCard__ProjectTechChip' and
    collects their <p> text content. Deduplicates while preserving order.
    """
    technologies: List[str] = []
    seen = set()

    def has_chip_class(tag: Tag) -> bool:
        classes = tag.get("class", [])
        if not classes:
            return False
        joined = " ".join(classes)
        return "ProjectTechCard__ProjectTechChip" in joined or "ProjectTech" in joined

    for chip_div in soup.find_all(lambda t: isinstance(t, Tag) and t.name == "div" and has_chip_class(t)):
        p = chip_div.find("p")
        if p:
            name = p.get_text(" ", strip=True)
            if name and name not in seen:
                seen.add(name)
                technologies.append(name)

    # Fallback: search any <p> whose parent div class includes 'ProjectTech'
    if not technologies:
        for p in soup.find_all("p"):
            parent = p.parent
            if isinstance(parent, Tag):
                classes = " ".join(parent.get("class", []))
                if "ProjectTech" in classes:
                    name = p.get_text(" ", strip=True)
                    if name and name not in seen:
                        seen.add(name)
                        technologies.append(name)

    return technologies

def scrape_devfolio_project(url: str, timeout: int = 20) -> Dict[str, Any]:
    headers = {
        "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
                      "(KHTML, like Gecko) Chrome/124.0 Safari/537.36"
    }
    resp = requests.get(url, headers=headers, timeout=timeout)
    resp.raise_for_status()
    soup = BeautifulSoup(resp.text, "lxml")

    def text_or_none(el) -> Optional[str]:
        return el.get_text(strip=True) if el else None

    # Basic fields
    title = text_or_none(soup.find(["h1"]))
    subtitle = text_or_none(soup.find(["h2"]))
    

    # Quadratic voting / stats
    page_text = soup.get_text(" ", strip=True)
    matching_amount = None
    votes = None
    quadratic_votes = None

    m = re.search(r"\$([0-9][0-9,]*)\s*Matching Amount", page_text, flags=re.I)
    if m:
        matching_amount = m.group(1).replace(",", "")
        try:
            matching_amount = int(matching_amount)
        except ValueError:
            pass

    m = re.search(r"(\d+)\s*Votes", page_text, flags=re.I)
    if m:
        votes = int(m.group(1))

    m = re.search(r"([0-9]+(?:\.[0-9]+)?)\s*Quadratic Votes", page_text, flags=re.I)
    if m:
        try:
            quadratic_votes = float(m.group(1))
        except ValueError:
            pass

    # Built at / dates
    built_at = None
    m = re.search(r"Built at\s+([A-Za-z0-9 ]+)", page_text, flags=re.I)
    if m:
        built_at = m.group(1).strip()

    created_on = None
    m = re.search(r"Created on\s+([0-9]{1,2}(?:st|nd|rd|th)?\s+[A-Za-z]+\s+[0-9]{4})", page_text, flags=re.I)
    if m:
        created_on = m.group(1)

    last_edited = None
    m = re.search(r"Last Edited\s+([0-9]{1,2}(?:st|nd|rd|th)?\s+[A-Za-z]+\s+[0-9]{4})", page_text, flags=re.I)
    if m:
        last_edited = m.group(1)
    github_link = None
    m = re.search(r'github\.com/[^/\s]+/[^/\s]+', str(soup), flags=re.I)
    if m:
        github_link = "https://" + m.group(0)

    problem_statement = extract_problem_section(soup,_PROBLEM_HEADING_RE)
    challenges=extract_problem_section(soup,_CHALLENGE_RE)
    technologies=extract_technologies(soup)

    return {
        "url": url,
        "title": title,
        "subtitle": subtitle,
        "technologies": technologies,
        "matching_amount_usd": matching_amount,
        "votes": votes,
        "quadratic_votes": quadratic_votes,
        "built_at": built_at,
        "created_on": created_on,
        "last_edited": last_edited,
        "problem_statement": problem_statement,
        "challenges":challenges,
        "github_link":github_link
    }

# Example
data = scrape_devfolio_project("https://devfolio.co/projects/empoweria-e0ee")
print(json.dumps(data, indent=2, ensure_ascii=False))

{
  "url": "https://devfolio.co/projects/empoweria-e0ee",
  "title": "Empoweria",
  "subtitle": "A platform that lets the citizens speak without fear and take the community towards development together",
  "technologies": [
    "Solidity",
    "Node.js",
    "Next.js",
    "TypeScript",
    "MongoDB",
    "mongoose",
    "Hardhat"
  ],
  "matching_amount_usd": 109,
  "votes": 531,
  "quadratic_votes": 88.183,
  "built_at": "ETHIndia 2023 Created on 10th December 2023",
  "created_on": "10th December 2023",
  "last_edited": "10th December 2023",
  "problem_statement": "In the fabric of our communities, a pervasive silence has taken root, stifling the voices of those yearning to speak out due to the paralyzing fear of judgment and reprisal. In response to this, we embark on a transformative mission, introducing a groundbreaking platform designed to empower the common individual to articulate their societal challenges and grievances anonymously. Utilizing anon aadhar, our platform ensures