In [None]:
## setup

## Setup

#### Pipeline steps
1. Install libraries
2. Establish connection for proposals
3. Create Lido Proposals Table
4. Populate Proposal Table
5. Establish connection for votes
6. Create Lido Votes Table
7. Populate Votes Table
8. Add embedding id column
9. Generate Lido Proposal embeddings
10. Insert Lido Proposal embeddings

11. Summarize Latest Proposal among all Lido Proposals
12. Generate prompt to summarize latest proposal

13. Semantic Search
- Validate Lido Proposal embeddings 
- Generate Prompt and Summarize

In [1]:
!pip install requests
!pip install openai
!pip install tiktoken
!pip install pandas
!pip install matplotlib
!pip install plotly
!pip install scipy
!pip install scikit-learn


[0mCollecting matplotlib
  Using cached matplotlib-3.8.1-cp310-cp310-macosx_11_0_arm64.whl.metadata (5.8 kB)
Collecting contourpy>=1.0.1 (from matplotlib)
  Using cached contourpy-1.2.0-cp310-cp310-macosx_11_0_arm64.whl.metadata (5.8 kB)
Collecting cycler>=0.10 (from matplotlib)
  Using cached cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)
Collecting fonttools>=4.22.0 (from matplotlib)
  Using cached fonttools-4.44.0-cp310-cp310-macosx_10_9_universal2.whl.metadata (153 kB)
Collecting kiwisolver>=1.3.1 (from matplotlib)
  Using cached kiwisolver-1.4.5-cp310-cp310-macosx_11_0_arm64.whl.metadata (6.4 kB)
Collecting pillow>=8 (from matplotlib)
  Using cached Pillow-10.1.0-cp310-cp310-macosx_11_0_arm64.whl.metadata (9.5 kB)
Collecting pyparsing>=2.3.1 (from matplotlib)
  Using cached pyparsing-3.1.1-py3-none-any.whl.metadata (5.1 kB)
Using cached matplotlib-3.8.1-cp310-cp310-macosx_11_0_arm64.whl (7.5 MB)
Using cached contourpy-1.2.0-cp310-cp310-macosx_11_0_arm64.whl (242 kB)
Using cache

#### Establish Connection - Lido Proposals

In [3]:
import requests
import pandas as pd
# Initialize variables
url = "https://hub.snapshot.org/graphql"
skip = 0
first = 6
space_in = ["lido-snapshot.eth"]
state = "all"

# Infinite loop for pagination
while True:
    # GraphQL query parameters
    payload = {
        "operationName": "Proposals",
        "variables": {
            "first": first,
            "skip": skip,
            "space_in": space_in,
            "state": state,
            "author_in": [],
            "title_contains": "",
            "flagged": False,
        },
        "query": """query Proposals($first: Int!, $skip: Int!, $state: String!, $space: String, $space_in: [String], $author_in: [String], $title_contains: String, $space_verified: Boolean, $flagged: Boolean) {
                      proposals(
                        first: $first,
                        skip: $skip,
                        where: {space: $space, state: $state, space_in: $space_in, author_in: $author_in, title_contains: $title_contains, space_verified: $space_verified, flagged: $flagged}
                      ) {
                        id
                        ipfs
                        title
                        body
                        start
                        end
                        state
                        author
                        created
                        choices
                        space {
                          id
                          name
                          members
                          avatar
                          symbol
                          verified
                          plugins
                        }
                        scores_state
                        scores_total
                        scores
                        votes
                        quorum
                        symbol
                        flagged
                      }
                    }""",
    }

    # Make the request
    response = requests.post(url, json=payload)
    data = response.json()

    # Extract proposals
    proposals = data.get("data", {}).get("proposals", [])

    # Break loop if no more proposals
    if not proposals:
        break

    # Transform the data into a DataFrame
    df = pd.DataFrame(proposals)

    # Print the DataFrame
    print(df["start"], df["end"], df["created"])
    print("\n")
    print(df.dtypes)

    # Increment skip for pagination
    skip += first

print("----Lido ETL complete ----")

0    1698333638
1    1698332400
2    1698332400
3    1698332400
4    1695909797
5    1694777722
Name: start, dtype: int64 0    1698944400
1    1698944400
2    1698944400
3    1698944400
4    1696528800
5    1695405600
Name: end, dtype: int64 0    1698333638
1    1698331833
2    1698331321
3    1698329711
4    1695909797
5    1694777723
Name: created, dtype: int64


id               object
ipfs             object
title            object
body             object
start             int64
end               int64
state            object
author           object
created           int64
choices          object
space            object
scores_state     object
scores_total    float64
scores           object
votes             int64
quorum            int64
symbol           object
flagged            bool
dtype: object
0    1694776789
1    1694419286
2    1694098812
3    1694097894
4    1691071200
5    1689876000
Name: start, dtype: int64 0    1695405600
1    1695045600
2    1694700000
3    1694700000


#### Create Lido Proposal Table in SQLite db

In [5]:
import sqlite3

# Define the path to the SQLite database
db_path = "lido_space.db"

# Create a SQLite database connection
conn = sqlite3.connect(db_path)
cursor = conn.cursor()

# Define the SQL command to create the 'lido_proposals' table with appropriate data types
create_table_sql = """
CREATE TABLE IF NOT EXISTS lido_proposals (
    id TEXT PRIMARY KEY,
    ipfs TEXT,
    title TEXT,
    body TEXT,
    start DATETIME,
    end DATETIME,
    state TEXT,
    author TEXT,
    created DATETIME,
    choices BLOB,
    scores BLOB,
    space TEXT,
    scores_state TEXT,
    scores_total REAL,
    votes INTEGER,
    quorum INTEGER,
    symbol TEXT,
    flagged BOOLEAN
);
"""

# Execute the SQL command to create the table
cursor.execute(create_table_sql)

# Commit the changes and close the database connection
conn.commit()
conn.close()

print("lido_proposals table created successfully.")

lido_proposals table created successfully.


#### Populate Lido Proposals Table

In [6]:
import requests
import pandas as pd
import sqlite3
import json

# Define the SQLite database path
db_path = "lido_space.db"

# Initialize variables
url = "https://hub.snapshot.org/graphql"
first = 6
space_in = ["lido-snapshot.eth"]
state = "all"
skip = 0

# Create a SQLite database connection
conn = sqlite3.connect(db_path)
cursor = conn.cursor()

# Define the SQL command to insert data into the 'lido_proposals' table
insert_data_sql = """
INSERT OR REPLACE INTO lido_proposals (
    id, ipfs, title, body, start, end, state, author, created, choices, scores, space, 
    scores_state, scores_total, votes, quorum, symbol, flagged
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
"""

# Infinite loop for pagination
while True:
    # GraphQL query parameters
    payload = {
        "operationName": "Proposals",
        "variables": {
            "first": first,
            "skip": skip,
            "space_in": space_in,
            "state": state,
            "author_in": [],
            "title_contains": "",
            "flagged": False,
        },
        "query": """query Proposals($first: Int!, $skip: Int!, $state: String!, $space: String, $space_in: [String], $author_in: [String], $title_contains: String, $space_verified: Boolean, $flagged: Boolean) {
                      proposals(
                        first: $first,
                        skip: $skip,
                        where: {space: $space, state: $state, space_in: $space_in, author_in: $author_in, title_contains: $title_contains, space_verified: $space_verified, flagged: $flagged}
                      ) {
                        id
                        ipfs
                        title
                        body
                        start
                        end
                        state
                        author
                        created
                        choices
                        space {
                          id
                          name
                          members
                          avatar
                          symbol
                          verified
                          plugins
                        }
                        scores_state
                        scores_total
                        scores
                        votes
                        quorum
                        symbol
                        flagged
                      }
                    }""",
    }

    # Make the request
    response = requests.post(url, json=payload)
    data = response.json()

    # Extract proposals
    proposals = data.get("data", {}).get("proposals", [])

    # Break loop if no more proposals
    if not proposals:
        break

    # Transform the data into a DataFrame
    df = pd.DataFrame(proposals)

    # Iterate through DataFrame and insert data into SQLite table
    for _, row in df.iterrows():
        # Convert the 'choices' column to a JSON string
        choices_json = json.dumps(row["choices"])
        scores_json = json.dumps(row["scores"])

        proposal_data = (
            row["id"],
            row["ipfs"],
            row["title"],
            row["body"],
            row["start"],
            row["end"],
            row["state"],
            row["author"],
            row["created"],
            choices_json,  # Insert the JSON string
            # row["choices"],
            scores_json,  # Insert the JSON string for 'scores'
            # row["scores"],
            row["space"]["name"],
            row["scores_state"],
            row["scores_total"],
            row["votes"],
            row["quorum"],
            row["symbol"],
            row["flagged"],
        )
        cursor.execute(insert_data_sql, proposal_data)

    # Increment skip for pagination
    skip += first

# Commit the changes and close the database connection
conn.commit()
conn.close()

print("---- Lido ETL complete ----")

---- Lido ETL complete ----


#### Establish Connection for (Proposal) Votes

note: proposal-to-votes is a one-to-many relationship


In [7]:
import requests
import pandas as pd

# Initialize variables
url = "https://hub.snapshot.org/graphql"
first = 6
space_in = ["lido-snapshot.eth"]
state = "all"


# Function to fetch proposals
def fetch_proposals(skip):
    payload = {
        "operationName": "Proposals",
        "variables": {
            "first": first,
            "skip": skip,
            "space_in": space_in,
            "state": state,
            "author_in": [],
            "title_contains": "",
            "flagged": False,
        },
        "query": """query Proposals($first: Int!, $skip: Int!, $state: String!, $space: String, $space_in: [String], $author_in: [String], $title_contains: String, $space_verified: Boolean, $flagged: Boolean) {
                      proposals(
                        first: $first,
                        skip: $skip,
                        where: {space: $space, state: $state, space_in: $space_in, author_in: $author_in, title_contains: $title_contains, space_verified: $space_verified, flagged: $flagged}
                      ) {
                        id
                        ipfs
                        title
                        body
                        start
                        end
                        state
                        author
                        created
                        choices
                        space {
                          id
                          name
                          members
                          avatar
                          symbol
                          verified
                          plugins
                        }
                        scores_state
                        scores_total
                        scores
                        votes
                        quorum
                        symbol
                        flagged
                      }
                    }""",
    }

    # Make the request
    response = requests.post(url, json=payload)
    data = response.json()

    # Extract proposals
    proposals = data.get("data", {}).get("proposals", [])

    return proposals


# Function to fetch and print votes for each proposal
def fetch_and_print_votes(proposal_id):
    payload = {
        "operationName": "Votes",
        "variables": {"proposal": proposal_id},
        "query": """query Votes($proposal: String!) {
                      votes(
                        first: 1000,
                        where: {proposal: $proposal}
                      ) {
                        id
                        voter
                        created
                        choice
                        space {
                          id
                        }
                      }
                    }""",
    }

    # Make the request
    response = requests.post(url, json=payload)
    data = response.json()

    # Extract and print votes
    votes = data.get("data", {}).get("votes", [])
    df = pd.DataFrame(votes)
    df["proposal_id"] = proposal_id
    print("Votes for Proposal ID:", proposal_id)
    print(df)
    print("\n")
    print(df.dtypes)


# Main loop to fetch and process proposals
skip = 0
while True:
    proposals = fetch_proposals(skip)

    # Break loop if no more proposals
    if not proposals:
        break

    # Process each proposal and fetch votes
    for proposal in proposals:
        proposal_id = proposal["id"]
        fetch_and_print_votes(proposal_id)

    # Increment skip for pagination
    skip += first

print("----Established connection to Votes GraphQL endpoint ----")

print("----Lido Votes nested in Proposals ----")

Votes for Proposal ID: 0xf3ac657484444f0b54eba2c251135c47f875e3d1821496247d11bdd7fab0f291
                                                    id  \
0    0x89166a7edc6eca4ed28f00beb63eda5c21df79795845...   
1    0x66b04d9c1e118be853de7a1ce3dfa212b0341a666c5f...   
2    0xf716daa7852614842edc703dd49f98b4e0ea4609e2b5...   
3    0xce2f9c439def2a5c82eb486b231ce11ac29e84564748...   
4    0x32d342a30183ecadf15cb84ed3669886233e8e7a7f5a...   
..                                                 ...   
428  0x50259e617c1145a81607f4b24c603354021a27510ba5...   
429  0x0c4887053e801dd299d0d26a507f4428580308cccf77...   
430  0xa8f4f9dec75ec12dbdda7c5890027351686597c9e1b4...   
431  0x654a06000b5335f2942c724fd782168e74c3536ad467...   
432  0x50618d4735d31b8152f20ffe6c279a2fcc1be4752679...   

                                          voter     created        choice  \
0    0x19929432f5811BA967b78fdB053cFcFd1518FEAD  1698937392           [2]   
1    0x256e0c8490C693068A7D7923c2535f1AA761F35c  1698937049

#### Create Lido Votes Table

In [8]:
import sqlite3

# Define the path to the SQLite database
db_path = "lido_space.db"

# Create a SQLite database connection
conn = sqlite3.connect(db_path)
cursor = conn.cursor()


# Define the SQL command to create the 'lido_votes' table with appropriate data types
create_votes_table_sql = """
CREATE TABLE IF NOT EXISTS lido_votes (
    id TEXT PRIMARY KEY,
    voter TEXT,
    created DATETIME,
    choice INTEGER,
    space_id TEXT,
    proposal_id TEXT,
    FOREIGN KEY (proposal_id) REFERENCES lido_proposals (id)
);
"""

# Execute the SQL commands to create the tables
cursor.execute(create_votes_table_sql)

# Commit the changes and close the database connection
conn.commit()
conn.close()

print("SQLite table: lido_votes created successfully.")

SQLite table: lido_votes created successfully.


#### Populate Lido Votes Table

In [9]:
import requests
import pandas as pd
import json
import sqlite3

# Initialize variables
url = "https://hub.snapshot.org/graphql"
first = 6
space_in = ["lido-snapshot.eth"]
state = "all"
db_path = "lido_space.db"  # SQLite database path


# Function to fetch proposals
def fetch_proposals(skip):
    payload = {
        "operationName": "Proposals",
        "variables": {
            "first": first,
            "skip": skip,
            "space_in": space_in,
            "state": state,
            "author_in": [],
            "title_contains": "",
            "flagged": False,
        },
        "query": """query Proposals($first: Int!, $skip: Int!, $state: String!, $space: String, $space_in: [String], $author_in: [String], $title_contains: String, $space_verified: Boolean, $flagged: Boolean) {
                      proposals(
                        first: $first,
                        skip: $skip,
                        where: {space: $space, state: $state, space_in: $space_in, author_in: $author_in, title_contains: $title_contains, space_verified: $space_verified, flagged: $flagged}
                      ) {
                        id
                        ipfs
                        title
                        body
                        start
                        end
                        state
                        author
                        created
                        choices
                        space {
                          id
                          name
                          members
                          avatar
                          symbol
                          verified
                          plugins
                        }
                        scores_state
                        scores_total
                        scores
                        votes
                        quorum
                        symbol
                        flagged
                      }
                    }""",
    }

    # Make the request
    response = requests.post(url, json=payload)
    data = response.json()

    # Extract proposals
    proposals = data.get("data", {}).get("proposals", [])

    return proposals


# Function to fetch and insert votes for a proposal into SQLite
def fetch_and_insert_votes(cursor, proposal_id):
    payload = {
        "operationName": "Votes",
        "variables": {"proposal": proposal_id},
        "query": """query Votes($proposal: String!) {
                      votes(
                        first: 1000,
                        where: {proposal: $proposal}
                      ) {
                        id
                        voter
                        created
                        choice
                        space {
                          id
                        }
                      }
                    }""",
    }

    # Make the request
    response = requests.post(url, json=payload)
    data = response.json()

    # Extract and insert votes
    votes = data.get("data", {}).get("votes", [])
    for vote in votes:
        # Check if 'choice' is an integer
        if isinstance(vote["choice"], int):
            # Handle the integer value as needed
            choice = vote["choice"]
        elif "choice" in vote and str(vote["choice"]).isdigit():
            # Convert 'choice' to an integer if it's a string containing digits
            choice = int(vote["choice"])
        else:
            # Handle other cases, e.g., set it to None
            choice = None

        # Insert vote data into 'lido_votes' table
        cursor.execute(
            """
            INSERT OR REPLACE INTO lido_votes (id, voter, created, choice, space_id, proposal_id)
            VALUES (?, ?, ?, ?, ?, ?)
            """,
            (
                vote["id"],
                vote["voter"],
                vote["created"],
                choice,  # Insert the integer value or None based on the handling
                vote["space"]["id"] if "space" in vote else None,
                proposal_id,
            ),
        )


# Create a SQLite database connection
conn = sqlite3.connect(db_path)
cursor = conn.cursor()

# Main loop to fetch and process proposals and their votes
skip = 0
while True:
    proposals = fetch_proposals(skip)

    # Break loop if no more proposals
    if not proposals:
        break

    # Process each proposal and fetch/insert votes
    for proposal in proposals:
        proposal_id = proposal["id"]
        fetch_and_insert_votes(cursor, proposal_id)

    # Increment skip for pagination
    skip += first

# Commit the changes and close the database connection
conn.commit()
conn.close()

print("----Lido ETL complete ----")
print("Data inserted into lido_votes table successfully.")

----Lido ETL complete ----
Data inserted into lido_votes table successfully.


#### Add embedding_id to Lido Proposal Table

In [13]:
import sqlite3

# Connect to the SQLite database
conn = sqlite3.connect("lido_space.db")
cursor = conn.cursor()

# Add an "embedding_id" column to the "lido_proposals" table
cursor.execute("ALTER TABLE lido_proposals ADD COLUMN embedding_id INTEGER")

# Commit the changes and close the connection
conn.commit()
conn.close()

print("Added 'embedding_id' column to 'lido_proposals' table.")

Added 'embedding_id' column to 'lido_proposals' table.


#### Generate Proposal Embeddings

In [14]:
import sqlite3
import json
import openai
import os
from tiktoken import get_encoding
from openai.embeddings_utils import get_embedding
from dotenv import load_dotenv


# Load environment variables from .env file
load_dotenv()

# Set your OpenAI API key
openai.api_key = os.getenv("openai.api_key")

# Initialize the dictionary for embeddings
embeddings_kv_store = {}

# Initialize a list to store updated proposals
proposals_with_embeddings_keys = []

# Database connection
db_path = "lido_space.db"
conn = sqlite3.connect(db_path)
cursor = conn.cursor()

# Get the encoding method
# Embedding model parameters
embedding_model = "text-embedding-ada-002"
embedding_encoding = "cl100k_base"  # Encoding for text-embedding-ada-002
max_tokens = 8000  # Maximum for text-embedding-ada-002 is 8191

encoding = get_encoding(embedding_encoding)

# Query proposals from the 'lido_proposals' table
query = "SELECT id, title, body FROM lido_proposals"
cursor.execute(query)
proposals = cursor.fetchall()


# Define a function to store embeddings and update proposals
def store_embeddings_and_update_proposals(proposal_id, combined_text):
    # Count the number of tokens using the specific encoding
    n_tokens = len(encoding.encode(combined_text))

    # Check if the text is within the token limit
    if n_tokens <= max_tokens:
        # Generate the embedding
        embedding = get_embedding(combined_text, engine=embedding_model)

        # Determine the embedding_id (you can use any suitable logic)
        embedding_id = len(embeddings_kv_store) + 1

        # Store the embedding in the dictionary
        embeddings_kv_store[(proposal_id, embedding_id)] = embedding

        return embedding_id
    else:
        return None


# Loop through each proposal
for proposal in proposals:
    proposal_id, title, body = proposal

    # Combine title and body
    combined_text = f"title: {title}, content: {body}"

    # Store the embedding and get the embedding_id
    embedding_id = store_embeddings_and_update_proposals(proposal_id, combined_text)

    if embedding_id is not None:
        # Add the embedding_id to the proposal
        cursor.execute(
            "UPDATE lido_proposals SET embedding_id = ? WHERE id = ?",
            (embedding_id, proposal_id),
        )

        # Commit the changes to the database
        conn.commit()

# Close the database connection
conn.close()

# Write the embeddings dictionary to a pickle file
import pickle

with open("embeddings.pickle", "wb") as f:
    pickle.dump(embeddings_kv_store, f)

print("Embeddings stored and proposals updated successfully.")

Embeddings stored and proposals updated successfully.


#### Insert Embeddings in Lido Embeddings Table

In [15]:
import sqlite3
import pickle

# Load the embeddings dictionary from the pickle file
with open("embeddings.pickle", "rb") as f:
    embeddings_kv_store = pickle.load(f)

# Database connection
db_path = "lido_space.db"
conn = sqlite3.connect(db_path)
cursor = conn.cursor()

# Create the "lido_embeddings" table if it doesn't exist
cursor.execute(
    """
    CREATE TABLE IF NOT EXISTS lido_embeddings (
        proposal_id TEXT,
        embedding_id INTEGER,
        embedding BLOB,
        PRIMARY KEY (proposal_id, embedding_id),
        FOREIGN KEY (proposal_id) REFERENCES lido_proposals (id)
    )
    """
)

# Insert data from the embeddings dictionary into the "lido_embeddings" table
for (proposal_id, embedding_id), embedding in embeddings_kv_store.items():
    # Convert the embedding to bytes using pickle serialization
    embedding_bytes = pickle.dumps(embedding)

    cursor.execute(
        "INSERT INTO lido_embeddings (proposal_id, embedding_id, embedding) VALUES (?, ?, ?)",
        (proposal_id, embedding_id, sqlite3.Binary(embedding_bytes)),
    )

# Commit the changes and close the database connection
conn.commit()
conn.close()

print("Embeddings stored in the 'lido_embeddings' table successfully.")

Embeddings stored in the 'lido_embeddings' table successfully.


#### Summarize Latest Proposal among all Lido Proposals

In [18]:
import sqlite3

# Define the SQLite database path
db_path = "lido_space.db"

# Connect to the SQLite database
conn = sqlite3.connect(db_path)
cursor = conn.cursor()

# Query the table to find the most recently created row
query_latest_row = """
SELECT *
FROM lido_proposals
ORDER BY created DESC
LIMIT 1
"""

cursor.execute(query_latest_row)
latest_row = cursor.fetchone()

# Check if a row was found
if latest_row:
    # Get the column names from the cursor description
    column_names = [description[0] for description in cursor.description]

    # Create a dictionary with column names as keys and row values as values
    latest_proposal = dict(zip(column_names, latest_row))

    # Print all columns and values for the latest row
    for column, value in latest_proposal.items():
        print(f"{column}: {value}")
else:
    print("No data found in the table.")

# Close the database connection
conn.close()



id: 0xf3ac657484444f0b54eba2c251135c47f875e3d1821496247d11bdd7fab0f291
ipfs: bafkreifmdas54sitalpv4lqwoafspuk45lbsyenyc5csk5l7sngr3tfoge
title: Staking Router Module Proposal: Simple DVT
body: Following a [discussion of the draft proposal on the Lido Research forum](https://research.lido.fi/t/staking-router-module-proposal-simple-dvt/5625) for the Simple DVT Module, the advancement of the proposed module is now up for Snapshot vote by the Lido DAO.

**The vote will consider two main components:**

**1. Should the Lido DAO add the Simple DVT module as described within the proposal?**
**2. Should potential module slashing penalties be covered by the DAO cover fund or explicitly purchased & scoped by third-party cover?**

The vote will cover these questions in a multiple choice format:
  1a. Deploy Simple DVT
  1b. Do not deploy Simple DVT
  2a. Use Lido cover fund
  2b. Use 3rd party cover provider

*Voters should first select between options 1a and 1b to determine if the module should b

#### Generate Prompt to Summarize Latest Proposal

In [21]:
from datetime import datetime
import textwrap

# maximum width for text wrapping
max_width = 80

# Corrected function to generate the report with winning choice
about = """
the following is a Snapshot vote (a DAO proposal with voting quorums) for Lido DAO.
Lido is a liquid staking token derivatives protocol (LST) that stake tokens on users behalf
in Proof of Stake blockchain and issue IOU in the form of bearer tokens, allowing for
the bearer to passively earn yield on their assets without having to run their own validators nodes
users can thus hold these LST in their wallets or use them in DeFi protocol.

Lido is governed in a DAO via LDO token holders. The following proposal occurs in the Snapshot space
where token holders and delegates can vote for their interests on the next action of the DAO transparently
"""

goal = """
your goal is to create a summary of the Lido DAO proposal on snapshot
1. highlight the most important aspects of the proposal
2. highlight the winning and losing vote choices
3. if the proposal is closed and result is final, say so and speak to which choice is winning
4. explain, in brief, what the implication of this proposal might be, and if winning choice is picked, what it meant for the DAO
5. mention when the proposal was created
"""

def generate_prompt(preamble, proposal, goal):
    title = proposal['title']
    body = proposal['body']
    scores_state = proposal['scores_state']
    vote_choices = proposal['choices']
    state = proposal['state']
    human_readable_date = datetime.utcfromtimestamp(proposal['created']).strftime('%Y-%m-%d %H:%M:%S UTC')

    # Generate the votes section
    votes_section = "votes:\n"
    for i, choice in enumerate(vote_choices, 1):
        votes_section += f"vote choice {i}: {choice}\n"

    # Determine the winning choice, if applicable
    winning_choice = ""
    if scores_state == 'final':  # Removed check for 'state'
        max_index = proposal['scores'].index(max(proposal['scores']))
        winning_choice = f"Winning Choice: {vote_choices[max_index]}"

    # Construct the final text report
    text_str = f"""
    {preamble}
    {title}
    {body}
    Vote Status: {scores_state}
    {votes_section}
    {winning_choice}
    Date time when the proposal was created: {human_readable_date}
    """

    return text_str

# Generate and print the corrected report
prompt_final = generate_prompt(about, latest_proposal, goal)
# Wrap the text using textwrap
wrapped_prompt = textwrap.fill(prompt_final, width=max_width)
print(wrapped_prompt)

      the following is a Snapshot vote (a DAO proposal with voting quorums) for
Lido DAO. Lido is a liquid staking token derivatives protocol (LST) that stake
tokens on users behalf in Proof of Stake blockchain and issue IOU in the form of
bearer tokens, allowing for the bearer to passively earn yield on their assets
without having to run their own validators nodes users can thus hold these LST
in their wallets or use them in DeFi protocol.  Lido is governed in a DAO via
LDO token holders. The following proposal occurs in the Snapshot space where
token holders and delegates can vote for their interests on the next action of
the DAO transparently      Staking Router Module Proposal: Simple DVT
Following a [discussion of the draft proposal on the Lido Research
forum](https://research.lido.fi/t/staking-router-module-proposal-simple-
dvt/5625) for the Simple DVT Module, the advancement of the proposed module is
now up for Snapshot vote by the Lido DAO.  **The vote will consider two main
co

#### Validate Lido Proposal Embeddings

In [30]:
import sqlite3
import pickle

# Database connection
db_path = "lido_space.db"
conn = sqlite3.connect(db_path)
cursor = conn.cursor()

# Query to retrieve the first row of the "lido_embeddings" table
cursor.execute("SELECT embedding FROM lido_embeddings LIMIT 1")

# Fetch the result
result = cursor.fetchone()

if result is not None:
    # Extract the BLOB data
    embedding_blob = result[0]

    # Deserialize the BLOB data using pickle
    embedding = pickle.loads(embedding_blob)

    # Now, 'embedding' contains the deserialized data that you can work with
    print("Deserialized Embedding:")
    print(embedding)
else:
    print("No data found in the 'lido_embeddings' table.")

# Close the database connection
conn.close()

Deserialized Embedding:
[0.003062581643462181, -0.006340459454804659, -0.016850478947162628, -0.02309405617415905, -0.040762659162282944, 0.03892546892166138, -0.03602615371346474, -0.007815235294401646, -0.021630045026540756, -0.010771962814033031, 0.022792641073465347, 0.0022139577195048332, -0.015788353979587555, 0.019405322149395943, 0.012609153054654598, 0.005368040408939123, 0.0028957275208085775, -0.012243150733411312, 0.03981535881757736, -0.0022085753735154867, -0.011259966529905796, -0.0012128326343372464, -0.014510932378470898, 0.0019322792068123817, 0.0014980994164943695, 0.01939096860587597, 0.0421118438243866, -0.023739943280816078, -0.005658689420670271, 0.01399422250688076, 0.02566325105726719, -0.004693447146564722, -0.017855193465948105, -0.027687031775712967, 0.010563843883574009, -0.01217856165021658, 0.008209943771362305, -0.0075425272807478905, 0.031232234090566635, 0.003593644592911005, 0.027213379740715027, 0.00591704435646534, -0.004661152604967356, -0.02391217

#### Implement Semantic Search with OpenAI Embeddings Utility Functions

In [38]:
import sqlite3
import pickle
from openai.embeddings_utils import get_embedding, cosine_similarity

# Connect to the SQLite database
db_path = "lido_space.db"
conn = sqlite3.connect(db_path)
cursor = conn.cursor()

# Function to search through the proposals
def search_proposals(query_text, n=3):
    # Generate the embedding for the query text using the specified engine
    query_embedding = get_embedding(query_text, engine="text-embedding-ada-002")

    # Query all embeddings from the database
    cursor.execute("SELECT proposal_id, embedding FROM lido_embeddings")
    rows = cursor.fetchall()

    # Calculate cosine similarity with each proposal
    similarities = {}
    for proposal_id, stored_embedding in rows:
        # Load the stored embedding from binary
        stored_embedding = pickle.loads(stored_embedding)

        # Calculate cosine similarity (no reshaping needed)
        similarity = cosine_similarity(stored_embedding, list(query_embedding))
        similarities[proposal_id] = similarity

    # Sort by similarity and get top n matches
    sorted_similarities = sorted(similarities.items(), key=lambda x: x[1], reverse=True)[:n]

    # Return the top matching proposals with both ID and title
    top_matches = [(proposal_id, get_title_from_id(proposal_id)) for proposal_id, _ in sorted_similarities if get_title_from_id(proposal_id) is not None]
    return top_matches

# Function to retrieve the title of a proposal based on its ID
def get_title_from_id(proposal_id):
    # Query the "lido_proposals" table in the database to get the title
    cursor.execute("SELECT title FROM lido_proposals WHERE id=?", (proposal_id,))
    result = cursor.fetchone()
    if result:
        return result[0]  # Assuming the title is in the first column of the result
    else:
        return None  # Return None if the proposal ID is not found

# Example query
query_text = "Treasury Management"
top_matches = search_proposals(query_text)

# Close the database connection
conn.close()

# Print or use the top matching proposals with both ID and title
print("Top matching proposals:")
for proposal_id, proposal_title in top_matches:
    print(f"Proposal ID: {proposal_id}, Title: {proposal_title}")

Top matching proposals:
Proposal ID: 0xac31f800288c68e32d1eb3cea7a525022faae3eb3bf805d1b3d248cda5375a13, Title: Proposal to approve Lido DAO Treasury Management Principles and authorize the formation of a Treasury Management Committee
Proposal ID: QmcwXpGstkwGVNDXctxwYYRTrGJxd3aedZUMRAQPqBjuSN, Title: Proposal: LDO Treasury Diversification - Part 2
Proposal ID: QmTR46STEjdRwmCKJjDThXXekKJHbhuiUHaXqDJqry3KqK, Title: Proposal: LDO Treasury Diversification 


#### Generate Prompt to Summarize results of Semantic Search

In [51]:
find_sum_goal = f"""
your goal is to create a summary of the Lido DAO proposal on snapshot.
The user has performed a search query for search term: {query_text}.
1. firstly, you will need to decide whether the proposal information given to you above corresponded to the search term
if it doesn't, then simply say that the search result is irrelevant to user, and briefly summarize the content- namely title and short 1 sentence description of the content
If the proposal is relevant, then be perform the following:
1.1. describe what the proposal is in a short description
2. highlight the most important aspects of the proposal
3. highlight the winning and losing vote choices
4. if the proposal is closed and result is final, say so and speak to which choice is winning
5. explain, in brief, what the implication of this proposal might be, and if winning choice is picked, what it meant for the DAO
6. tailor the summary in the way that tailor to user intent given on the search term as best as you can
7. if the date that the proposal was created is available, you should also mention which month, day that the proposal was created

your total output should be around 3-5 sentences, it needs to be descriptive but short form
your output structure should be a single line
"""
import sqlite3
import openai
import textwrap
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Get the OpenAI API key from the environment variable
api_key = os.getenv("openai.api_key")
db_path = "lido_space.db"

# Define your find_sum_goal prompt here

# Initialize OpenAI
openai.api_key = api_key

# Function to generate a summary for a given proposal title
def generate_summary(proposal_title, find_sum_goal):
    # Use the proposal title in your prompt
    sum_prompt = find_sum_goal.replace("{proposal_title}", proposal_title)
    
    # Generate a summary using ChatCompletion
    response = openai.ChatCompletion.create(
        model='gpt-3.5-turbo-16k',
        messages=[
            {"role": "system", "content": sum_prompt},
        ],
        max_tokens=4000,
        temperature=1
    )
    
    return response.choices[0].message.content.strip()

# Connect to the SQLite database
conn = sqlite3.connect(db_path)
cursor = conn.cursor()

# Loop through top_matches
for proposal_id, _ in top_matches:
    # Fetch the proposal title from the database using the proposal_id
    cursor.execute("SELECT title FROM lido_proposals WHERE id=?", (proposal_id,))
    result = cursor.fetchone()
    
    if result:
        proposal_title = result[0]
        # Generate a summary for the proposal_title
        summary = generate_summary(proposal_title, find_sum_goal)
        print(f"Proposal ID: {proposal_id}, Title: {proposal_title}")
        print(f"Summary: {summary}")
    else:
        print(f"Proposal ID: {proposal_id}, Title: No title available")

# Close the database connection
cursor.close()
conn.close()

Proposal ID: 0xac31f800288c68e32d1eb3cea7a525022faae3eb3bf805d1b3d248cda5375a13, Title: Proposal to approve Lido DAO Treasury Management Principles and authorize the formation of a Treasury Management Committee
Summary: The search result is irrelevant to the user as the proposal is not related to Treasury Management. The proposal is titled "Lido DAO" and aims to create a decentralized autonomous organization (DAO) for the Lido project. The winning and losing vote choices are not mentioned. The outcome of the proposal is not specified. The implication of this proposal would be the establishment of a DAO for Lido, potentially enhancing its governance and decision-making processes.
Proposal ID: QmcwXpGstkwGVNDXctxwYYRTrGJxd3aedZUMRAQPqBjuSN, Title: Proposal: LDO Treasury Diversification - Part 2
Summary: The search result is not relevant to the user's query for "Treasury Management". The proposal on snapshot is titled "Lido DAO Treasury Diversification" and aims to address the diversifica

In [50]:
import sqlite3
import openai
import textwrap
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Get the OpenAI API key from the environment variable
api_key = os.getenv("openai.api_key")
db_path = "lido_space.db"

# Define your find_sum_goal prompt here

# Initialize OpenAI
openai.api_key = api_key

# Function to generate a summary for a given proposal title
def generate_summary(proposal_title, find_sum_goal):
    # Use the proposal title in your prompt
    sum_prompt = find_sum_goal.replace("{proposal_title}", proposal_title)
    
    # Generate a summary using ChatCompletion
    response = openai.ChatCompletion.create(
        model='gpt-3.5-turbo-16k',
        messages=[
            {"role": "system", "content": sum_prompt},
        ],
        max_tokens=4000,
        temperature=1
    )
    
    return response.choices[0].message.content.strip()

# Connect to the SQLite database
conn = sqlite3.connect(db_path)
cursor = conn.cursor()

# Loop through top_matches
for proposal_id, _ in top_matches:
    # Fetch the proposal title from the database using the proposal_id
    cursor.execute("SELECT title FROM lido_proposals WHERE id=?", (proposal_id,))
    result = cursor.fetchone()
    
    if result:
        proposal_title = result[0]
        # Generate a summary for the proposal_title
        summary = generate_summary(proposal_title, find_sum_goal)
        
        # Wrap the summary text into paragraphs
        paragraphs = textwrap.wrap(summary, width=80)  # Adjust width as needed
        
        print(f"Proposal ID: {proposal_id}, Title: {proposal_title}")
        
        # Print each paragraph
        for paragraph in paragraphs:
            print(paragraph)
        print()  # Add a newline between summaries
    else:
        print(f"Proposal ID: {proposal_id}, Title: No title available")

# Close the database connection
cursor.close()
conn.close()

Proposal ID: 0xac31f800288c68e32d1eb3cea7a525022faae3eb3bf805d1b3d248cda5375a13, Title: Proposal to approve Lido DAO Treasury Management Principles and authorize the formation of a Treasury Management Committee
The search term "Treasury Management" is not relevant to the Lido DAO proposal
on snapshot, which is titled "Lido DAO Treasury proposal submission." Therefore,
the proposal content is summarized briefly as the title and short description:
"Lido DAO Treasury proposal submission - This proposal focuses on the management
of Lido DAO's treasury."

Proposal ID: QmcwXpGstkwGVNDXctxwYYRTrGJxd3aedZUMRAQPqBjuSN, Title: Proposal: LDO Treasury Diversification - Part 2
The search result is irrelevant to the user's search term "Treasury Management."
This proposal discusses the creation of the Lido DAO and its governance process
to manage the Lido protocol. (Proposal created in X month)

Proposal ID: QmTR46STEjdRwmCKJjDThXXekKJHbhuiUHaXqDJqry3KqK, Title: Proposal: LDO Treasury Diversification