# DAO - Snapshot votings

This script downloads all spaces, proposals, votes from the snapshot API (https://docs.snapshot.org/graphql-api) and stores the data in a local SQLite data base. 



Notes:
- To speed up the process (and get as close as possible to the API limit), I use multithreading whenever possible. However, as the API gets more popular, the rate limit has decreased causing 429 errors. Turning off multithreading mitigates this problem. 



## Table of content

1. Import libraries 
2. Define function to get spaces 
3. Define function to get proposal information
4. Define function to get votes information
5. Define function to get follower information
6. Define function to get user information
7. Create a sqlite database 
8. Call all functions and store the results in the sqlite database
9. Show descriptives of available DAO data


## 1. Import libraries

In [1]:
import requests
import json
import pandas as pd
import numpy as np

## 2. Get spaces
The following function allows to to retrieve a predefined number of spaces from the Snapshot API and returns a list of dictionaries. 
If you want to query all spaces, then set space_limit to 10,000. Currently there are ~9,000 spaces. Unfortunately, there is not way to determine the number of spaces in advance. 

It is also important to note that only 20 spaces can be retrieved at once. 

In [1]:
def get_spaces(space_limit):
    """
    this function queries the snapshot graphql API and retrieves all spaces together with some meta data
    pram space_limit: integer that indicates up to which limit the skip counter runs
    return: list of unique spaces on snapshot
    """

    # import libraries
    import requests
    import json
    import time

    # initiate skip counter
    skip = 0 

    # initiate list to store all spaces
    result_list = []


    #define a API request function 

    def API_call(url, query):
        # send query to API
        r = requests.post(url, json={'query': query})

        # initiate list to be filled later on
        dict_list = []

        # check if status code is valid
        if r.status_code == 200:
            
            # load query result as json
            r_dict = json.loads(r.text)

            # progress notification
            print("status_code: " + str(r.status_code))

            # access space data from API response
            space_list = r_dict["data"]["spaces"]

            # loop through all spaces 
            for space in space_list:
                    
                space_dict = {}
                space_dict["space_id"] = space["id"]
                space_dict["space_name"] = space["name"]
                space_dict["about"] = space["about"]
                space_dict["network"] = space["network"]
                space_dict["symbol"] = space["symbol"]
                space_dict["number_of_strategies"] = len(space["strategies"])
                space_dict["strategies_list"] = space["strategies"]
                space_dict["number_of_admins"] = len(space["admins"])
                space_dict["admins"] = space["admins"]
                space_dict["number_of_members"] = len(space["members"])
                space_dict["members_list"] = space["members"]
                space_dict["filters_minScore"] = space["filters"]["minScore"]
                space_dict["filters_onlyMembers"] = space["filters"]["onlyMembers"]
                space_dict["plugins"] = space["plugins"]
                dict_list.append(space_dict)
            return dict_list
                
        else:
            # error message
            print("Invalide status code: "+ str(r.status_code))
            return r.status_code



    # snapshot does not tell how many spaces there are. Skipping restarts the loop after all spaces have been queried. Solution is to filter duplicates at the end.
    while skip < space_limit: 
        # progress notification
        print("skip: "+str(skip))


        # define graphQL query string
        query = """{
                    spaces(
                        first: 20,
                        skip: """+str(skip)+""",
                        orderBy: "created",
                        orderDirection: asc
                    ) {
                        id
                        name
                        about
                        network
                        symbol
                        strategies {
                        name
                        params
                        }
                        admins
                        members
                        filters {
                        minScore
                        onlyMembers
                        }
                        plugins
                    }
                    } """

        url = 'https://hub.snapshot.org/graphql'

        # call the API
        result = API_call(url, query)
        
        if type(result) == list:
            # add list of new spaces to final list
            result_list = result_list+result
        else:
            
            if result == 429:
                print("Error code 429: waiting 10s before retry")
                time.sleep(10)

                result2 = API_call(url, query)
                if type(result2) == list:
                    # add list of new spaces to final list
                    result_list = result_list+result2
                else:
                    print("retry has failed")
                    break
            else: 
                print("Stopped due to error code: "+ str(result))
                break

        # incriment skip by 20 (API does not allow to skip more spaces at every iteration)
        skip += 20


    # filter duplicates
    seen = list()
    uniq_spaces = [x for x in result_list if x not in seen and not seen.append(x)]    
    
    # return final list of unique spaces
    return uniq_spaces


#### Call the function to get all spaces

In [3]:
# call the function and store the output as a csv file (as a backup)
spaces = get_spaces(12000)

df_spaces = pd.DataFrame(spaces)

# save data as csv
df_spaces.to_csv('snapshot_spaces_nov2022.csv', index=False, sep=";")

# read csv data backup 
#df_spaces = pd.read_csv('snapshot_spaces.csv',  sep=";")  

skip: 0
status_code: 200
skip: 20
status_code: 200
skip: 40
status_code: 200
skip: 60
status_code: 200
skip: 80
status_code: 200
skip: 100
status_code: 200
skip: 120
status_code: 200
skip: 140
status_code: 200
skip: 160
status_code: 200
skip: 180
status_code: 200
skip: 200
status_code: 200
skip: 220
status_code: 200
skip: 240
status_code: 200
skip: 260
status_code: 200
skip: 280
status_code: 200
skip: 300
status_code: 200
skip: 320
status_code: 200
skip: 340
status_code: 200
skip: 360
status_code: 200
skip: 380
status_code: 200
skip: 400
status_code: 200
skip: 420
status_code: 200
skip: 440
status_code: 200
skip: 460
status_code: 200
skip: 480
status_code: 200
skip: 500
status_code: 200
skip: 520
status_code: 200
skip: 540
status_code: 200
skip: 560
status_code: 200
skip: 580
status_code: 200
skip: 600
status_code: 200
skip: 620
status_code: 200
skip: 640
status_code: 200
skip: 660
status_code: 200
skip: 680
status_code: 200
skip: 700
status_code: 200
skip: 720
status_code: 200
skip: 7

## 3. Get all proposals for every space
The following function allows to retrieve all proposals for a space and returns a list of dictionaries. 

In [10]:
# define a function that retrieves all proposals of a space

def get_proposals(space_id):
  """
    this function queries the snapshot graphql API and retrieves all proposals together with some meta data for a given space
    pram space_id: id of a space (can be retrieved by calling the space endpoint)
    return: list of all proposals for a given space
  """

  # import libraries
  import requests
  import json
  import time

  # init list to store all proposal dicts
  proposal_list = []

  # define query string
  query = """{
    proposals (
      first: 10000,
      skip: 0,
      where: {
        space_in: ["%s"],
        state: "all"
      },
      orderBy: "created",
      orderDirection: desc
    ) {
      id
      title
      body
      choices
      start
      end
      snapshot
      state
      scores
      scores_by_strategy
      scores_total
      scores_updated
      author
      space {
        id
        name
      }
    }
  } """ % (space_id)

  # progress notification
  print("query proposal for: " + space_id)

  # API query routine
  url = 'https://hub.snapshot.org/graphql'
  r = requests.post(url, json={'query': query})
  if r.status_code == 200:
    r_dict = json.loads(r.text)

    # loop through list of returned proposals and add space variables
    for proposal in r_dict["data"]["proposals"]:

        proposal_dict = {}
        
        # extract space id and name from the space entry of the original dict
        proposal_dict["space_id"] =  proposal["space"]["id"]
        proposal_dict["space_name"] = proposal["space"]["name"]

        # add space id and name dict to the original dict
        proposal_dict = {**proposal_dict, **proposal}

        # delete space from original dict
        del proposal_dict['space']
        proposal_list.append(proposal_dict)
  else:
    print("Error! status code: "+str(r.status_code))
    if r.status_code == 429:
      print("retry after waiting 3s")
      time.sleep(3)
    get_proposals(space_id)

  # return final list of proposal dicts
  return proposal_list
  


In [11]:
# creat multithread function for proposal API call

def multithread_get_proposals(space_list):
    """
    this function splits up the task to loop through a list of space IDs and retrievs a list of proposals for all spaces.
    :param: space_list: list of space IDs
    return: list of all proposals (for all spaces)
    """

    # import libraries
    import requests
    from concurrent.futures import ThreadPoolExecutor, as_completed



    # initiate proposal list to be filled 
    nested_proposal_dict_list = []
    proposal_dict_list = []

    # define a runner that splits up the list of spaces among a number of workers 
    def runner():
        threads= []
        with ThreadPoolExecutor(max_workers=9) as executor: #more than 10 workers results in significant more 429 errors
            for space_id in space_list:
                threads.append(executor.submit(get_proposals, space_id))

            for task in as_completed(threads):
                nested_proposal_dict_list.append(task.result())    
    # call the runner  
    runner()

    # unpack the lists of proposals for every space into one list of proposals for all spaces
    for dict_list in nested_proposal_dict_list:
        for proposal_dict in dict_list:
            proposal_dict_list.append(proposal_dict)

    # return list of all proposal dicts
    return proposal_dict_list


#### Get all proposals (multithread)

In [12]:
# get list of all space ids
space_list = df_spaces["space_id"]

# shorter list for tests
# space_list = list(df_spaces["space_id"][0:100])

# query all spaces 
prop_list = multithread_get_proposals(space_list)

# save the resulting list of dicts as csv file for backup 
#df_proposals = pd.DataFrame(prop_list)
#df_proposals.to_csv('snapshot_proposals_nov2022.csv', index=False, sep=";")

# read file from backup 
#df_proposals = pd.read_csv('snapshot_proposals_nov2022.csv',  sep=";")  

query proposal for: bonustrack.eth
query proposal for: fabien.eth
query proposal for: ichi.eth
query proposal for: loyalfinance.eth
query proposal for: damflux.eth
query proposal for: ufode.eth
query proposal for: boringvote.eth
query proposal for: relayernetwork.eth
query proposal for: prophetfinance.eth
query proposal for: liddao.eth
query proposal for: idlefinance.ethquery proposal for: scradclub.eth

query proposal for: pub-finance.eth
query proposal for: saffronfinance.eth
query proposal for: sovi.eth
query proposal for: monportefeuille.eth
query proposal for: flowcommunity.eth
query proposal for: tradersquare.eth
query proposal for: xionetwork.eth
query proposal for: minkfinance.eth
query proposal for: mutualdao.eth
query proposal for: esd.eth
query proposal for: vote.southchain.eth
query proposal for: flushlol.eth
query proposal for: xethfinance.eth
query proposal for: dodobird.eth
query proposal for: archerdao.eth
query proposal for: bbra.eth
query proposal for: truegov.eth
que

In [None]:
# get number of proposals per space
df_proposals_grouped = df_proposals.groupby(by=["space_id"]).count().sort_values(['id'],ascending=False)
df_proposals_grouped[df_proposals_grouped['id']>=10]

# 4. Get votes information 


The following function retrieves all votes for a give proposal. 


In [43]:
# definition of get_votes function
# (with exception handling)

def get_votes(proposal_id):
    """ this function retrieves all votes for a given proposal
    :param: proposal_id: ID of a snapshot proposal
    returns: list of dictionaries 
    """

    # init list that stores all votes dicts and is returned later
    votes_dict_list = []

    # import libraries
    import requests
    import json
    import time

    #define query string
    query = """ {
                votes (
                    first: 1000000
                    skip: 0
                    where: {
                    proposal: "%s"
                    }
                    orderBy: "created",
                    orderDirection: desc
                ) {
                    id
                    voter
                    vp
                    vp_by_strategy
                    vp_state
                    created
                    proposal {
                    id
                    }
                    choice
                    space {
                    id
                    }
                }
                } """ % (proposal_id)


    # API query routine
    url = 'https://hub.snapshot.org/graphql'


    # try loop
    try: 
        r = requests.post(url, json={'query': query})

        r.raise_for_status()

        if r.status_code == 200: 
            r_dict = json.loads(r.text)

            # loop through votes and store variables in a dict
            for vote in r_dict["data"]["votes"]:

                vote_dict = {}
                vote_dict["voter_proposal_id"] = vote["voter"]+"&&"+vote["proposal"]["id"]
                vote_dict["voter_id"] = vote["voter"]
                vote_dict["proposal_id"] = vote["proposal"]["id"]
                vote_dict["vp"] = vote["vp"]
                vote_dict["vp_by_strategy"] = vote["vp_by_strategy"]
                vote_dict["vp_state"] = vote["vp_state"]
                vote_dict["created"] = vote["created"]
                vote_dict["choice"] = vote["choice"]
                vote_dict["space_id"] = vote["space"]["id"]
                
                # append dict to the list of dicts
                votes_dict_list.append(vote_dict)
        else:
            print("Error message: " + str(r.status_code) + " @proposal: "+ str(proposal_id))

            if r.status_code == 429:
                # wait 5s and retry
                print("Retry after 5s")
                time.sleep(5)
                get_votes(proposal_id)


    except requests.exceptions.HTTPError as e:
        # Maybe set up for a retry, or continue in a retry loop
        print("HTTPError")
        print(e)
        print("Error at proposal: "+ proposal_id)
        print("TooManyRedirects: waiting some seconds before retry")
        time.sleep(5)
        get_votes(proposal_id)

    except requests.exceptions.Timeout as e:
    # Maybe set up for a retry, or continue in a retry loop
        print("Timeout")
        print(e)
        print("Error at proposal: "+ proposal_id)

    except requests.exceptions.ConnectionError as e:
    # Maybe set up for a retry, or continue in a retry loop
        print("ConnectionError")
        print(e)
        print("Error at proposal: "+ proposal_id)


    except requests.exceptions.TooManyRedirects as e:
        # Tell the user their URL was bad and try a different one
        print("TooManyRedirects: wait some seconds and restart")
        print(e)
        print("Error at proposal: "+ proposal_id)
        time.sleep(5)
        get_votes(proposal_id)


    except requests.exceptions.RequestException as e:
        # catastrophic error. bail.
        print(e)
        print("Error at proposal: "+ proposal_id)

    # return the final list of dicts
    return votes_dict_list

In [46]:
# creat multithread function for votes API call

def multithread_get_votes(prop_list):
    """
    this function splits to task to query the votes for all proposals among a number of workers
    :param: prop_list: list of snapshot proposal IDs
    return: list of votes dicts for all proposals
    """

    # import libraries
    import requests
    import json
    import time
    from concurrent.futures import ThreadPoolExecutor, as_completed


    # initiate vote list to be filled 
    nested_votes_dict_list = []
    votes_dict_list = []

    # define a runner that splits up the task among a number of works
    def runner():
        threads= []
        with ThreadPoolExecutor(max_workers=2) as executor: # before the rate limit you could use up to 10 workers w/o 429 errors. 
            for proposal_id in prop_list:

                threads.append(executor.submit(get_votes, proposal_id))
                
                
            for task in as_completed(threads):
                nested_votes_dict_list.append(task.result())   

    # call the runner
    runner()

    # loop through nested list and flatten it
    for dict_list in nested_votes_dict_list:

        for votes_dict in dict_list:

            votes_dict_list.append(votes_dict)

    # return the final list of all votes dicts
    return votes_dict_list


#### Get all votes (multithread)

In [32]:
# load proposals from disk if necessary
#df_proposals = pd.read_csv('snapshot_proposals.csv',  sep=";") 

votes_dict_list = []

# create chunks to prevent API limit
proposal_chunks = [df_proposals["id"][x:x+6000] for x in range(0, len(df_proposals["id"]), 6000)]

In [None]:

# check number of chunks 

#votes_dict_list_chunk1 = multithread_get_votes(proposal_chunks[0])
votes_dict_list_chunk2 = multithread_get_votes(proposal_chunks[1])
votes_dict_list_chunk3 = multithread_get_votes(proposal_chunks[2])
votes_dict_list_chunk4 = multithread_get_votes(proposal_chunks[3])
votes_dict_list_chunk5 = multithread_get_votes(proposal_chunks[4])
votes_dict_list_chunk6 = multithread_get_votes(proposal_chunks[5])
votes_dict_list_chunk7 = multithread_get_votes(proposal_chunks[6])
votes_dict_list_chunk8 = multithread_get_votes(proposal_chunks[7])
votes_dict_list_chunk9 = multithread_get_votes(proposal_chunks[8])
votes_dict_list_chunk10 = multithread_get_votes(proposal_chunks[9])
votes_dict_list_chunk11 = multithread_get_votes(proposal_chunks[10])


In [44]:
votes_dict_list_chunk1 = multithread_get_votes(proposal_chunks[0])

df_votes_dict_list_chunk1 = pd.DataFrame(votes_dict_list_chunk1)
df_votes_dict_list_chunk1.to_csv('votes_dict_list_chunk1.csv', index=False, sep=";")

HTTPError
429 Client Error: Too Many Requests for url: https://hub.snapshot.org/graphql
Error at proposal: Qma7L1cNfLdLZJG6UoVowsCrQ39dvQ8sZMkEd7vfNdBpkH
TooManyRedirects: wait some seconds and restart
HTTPError
429 Client Error: Too Many Requests for url: https://hub.snapshot.org/graphql
Error at proposal: QmTBoSv6LeVUBJuXZx38u85DWbSDw3jXNFmvf1LjL5K4DZ
TooManyRedirects: wait some seconds and restart
HTTPError
429 Client Error: Too Many Requests for url: https://hub.snapshot.org/graphql
Error at proposal: QmZCYGXcmamCajfr6BVxPfUgCgbJrBucDVUfVFoLfpoASP
TooManyRedirects: wait some seconds and restart
HTTPError
429 Client Error: Too Many Requests for url: https://hub.snapshot.org/graphql
Error at proposal: Qma7L1cNfLdLZJG6UoVowsCrQ39dvQ8sZMkEd7vfNdBpkH
TooManyRedirects: wait some seconds and restart
HTTPError
429 Client Error: Too Many Requests for url: https://hub.snapshot.org/graphql
Error at proposal: QmTBoSv6LeVUBJuXZx38u85DWbSDw3jXNFmvf1LjL5K4DZ
TooManyRedirects: wait some seconds an

In [47]:
votes_dict_list_chunk2 = multithread_get_votes(proposal_chunks[1])

df_votes_dict_list_chunk2 = pd.DataFrame(votes_dict_list_chunk2)
df_votes_dict_list_chunk2.to_csv('votes_dict_list_chunk2.csv', index=False, sep=";")

HTTPError
429 Client Error: Too Many Requests for url: https://hub.snapshot.org/graphql
Error at proposal: QmTsfMhnFLaCyaroQ1tE3GuuFFFNtVLy7aZ8SSBsES51AY
TooManyRedirects: wait some seconds and restart
HTTPError
429 Client Error: Too Many Requests for url: https://hub.snapshot.org/graphql
Error at proposal: Qmcq42jXtXxFSDo7rmNgo5nFU1a4LYaEpn3u68KvmUKj7z
TooManyRedirects: wait some seconds and restart
HTTPError
429 Client Error: Too Many Requests for url: https://hub.snapshot.org/graphql
Error at proposal: QmTsfMhnFLaCyaroQ1tE3GuuFFFNtVLy7aZ8SSBsES51AY
TooManyRedirects: wait some seconds and restart
HTTPError
429 Client Error: Too Many Requests for url: https://hub.snapshot.org/graphql
Error at proposal: Qmcq42jXtXxFSDo7rmNgo5nFU1a4LYaEpn3u68KvmUKj7z
TooManyRedirects: wait some seconds and restart
HTTPError
429 Client Error: Too Many Requests for url: https://hub.snapshot.org/graphql
Error at proposal: QmTsfMhnFLaCyaroQ1tE3GuuFFFNtVLy7aZ8SSBsES51AY
TooManyRedirects: wait some seconds an

In [51]:
votes_dict_list_chunk3 = multithread_get_votes(proposal_chunks[2])

df_votes_dict_list_chunk3 = pd.DataFrame(votes_dict_list_chunk3)
df_votes_dict_list_chunk3.to_csv('votes_dict_list_chunk3.csv', index=False, sep=";")

HTTPError
429 Client Error: Too Many Requests for url: https://hub.snapshot.org/graphql
Error at proposal: QmNzkRJdQ4DqbiNGyGWXaN3fBd31Y3M9ZDgoCi6D72Vf6U
TooManyRedirects: wait some seconds and restart
HTTPError
429 Client Error: Too Many Requests for url: https://hub.snapshot.org/graphql
Error at proposal: QmZk5HWASJF4QecLBLSwW15bhXRvZTCkHoPzDvJ4iirCWj
TooManyRedirects: wait some seconds and restart
HTTPError
429 Client Error: Too Many Requests for url: https://hub.snapshot.org/graphql
Error at proposal: QmaYA2bt9b8VmqU4x5vuEgxppnQinRFoT13bdseCYxoZY4
TooManyRedirects: wait some seconds and restart
HTTPError
429 Client Error: Too Many Requests for url: https://hub.snapshot.org/graphql
Error at proposal: QmTcAQdBmLqZLvmoUnDtMVRkTVesGkEcamnaJLKWScbysQ
TooManyRedirects: wait some seconds and restart
HTTPError
429 Client Error: Too Many Requests for url: https://hub.snapshot.org/graphql
Error at proposal: QmTcAQdBmLqZLvmoUnDtMVRkTVesGkEcamnaJLKWScbysQ
TooManyRedirects: wait some seconds an

In [53]:
votes_dict_list_chunk4 = multithread_get_votes(proposal_chunks[3])

df_votes_dict_list_chunk4 = pd.DataFrame(votes_dict_list_chunk4)
df_votes_dict_list_chunk4.to_csv('votes_dict_list_chunk4.csv', index=False, sep=";")

HTTPError
504 Server Error: Gateway Time-out for url: https://hub.snapshot.org/graphql
Error at proposal: 0x37d8613e98f33393712ca2ac4dd018eafdecd127acd32888a316c49a9b3d34a8
TooManyRedirects: wait some seconds and restart
HTTPError
429 Client Error: Too Many Requests for url: https://hub.snapshot.org/graphql
Error at proposal: 0xcf37e6794e6ad3bf8016a0f494d58970270dad277db9d43f82e9754fa833fb75
TooManyRedirects: wait some seconds and restart
HTTPError
429 Client Error: Too Many Requests for url: https://hub.snapshot.org/graphql
Error at proposal: 0xca69eec17dec58661a11f193ae5f04aec13a73ccc846008f2a592b64c9da654e
TooManyRedirects: wait some seconds and restart
HTTPError
429 Client Error: Too Many Requests for url: https://hub.snapshot.org/graphql
Error at proposal: 0xcf37e6794e6ad3bf8016a0f494d58970270dad277db9d43f82e9754fa833fb75
TooManyRedirects: wait some seconds and restart
HTTPError
429 Client Error: Too Many Requests for url: https://hub.snapshot.org/graphql
Error at proposal: 0xca69

In [56]:
votes_dict_list_chunk5 = multithread_get_votes(proposal_chunks[4])

df_votes_dict_list_chunk5 = pd.DataFrame(votes_dict_list_chunk5)
df_votes_dict_list_chunk5.to_csv('votes_dict_list_chunk5.csv', index=False, sep=";")

HTTPError
429 Client Error: Too Many Requests for url: https://hub.snapshot.org/graphql
Error at proposal: 0x0b5a359f298fdb55193418fb5450f45153b288795f9b85bf1565d3afc145bc6e
TooManyRedirects: wait some seconds and restart
HTTPError
429 Client Error: Too Many Requests for url: https://hub.snapshot.org/graphql
Error at proposal: 0xa549c02b323789f37f856fb33be072bcd23ae3e50b5d01850b50d3d578a1a23d
TooManyRedirects: wait some seconds and restart
HTTPError
429 Client Error: Too Many Requests for url: https://hub.snapshot.org/graphql
Error at proposal: 0x0b5a359f298fdb55193418fb5450f45153b288795f9b85bf1565d3afc145bc6e
TooManyRedirects: wait some seconds and restart
HTTPError
429 Client Error: Too Many Requests for url: https://hub.snapshot.org/graphql
Error at proposal: 0xa549c02b323789f37f856fb33be072bcd23ae3e50b5d01850b50d3d578a1a23d
TooManyRedirects: wait some seconds and restart
HTTPError
429 Client Error: Too Many Requests for url: https://hub.snapshot.org/graphql
Error at proposal: 0xe8c

In [58]:
votes_dict_list_chunk6 = multithread_get_votes(proposal_chunks[5])

df_votes_dict_list_chunk6 = pd.DataFrame(votes_dict_list_chunk6)
df_votes_dict_list_chunk6.to_csv('votes_dict_list_chunk6.csv', index=False, sep=";")

HTTPError
429 Client Error: Too Many Requests for url: https://hub.snapshot.org/graphql
Error at proposal: 0xf1794fc7d6d0c0210f67678f1e41cf15f639f604392a84f80d4e41cb4f355c49
TooManyRedirects: wait some seconds and restart
HTTPError
429 Client Error: Too Many Requests for url: https://hub.snapshot.org/graphql
Error at proposal: 0x63936f00a8e75bf6439edb39fab8b613beb76bacb2ea55376aeb2a4b8a10a70c
TooManyRedirects: wait some seconds and restart
HTTPError
429 Client Error: Too Many Requests for url: https://hub.snapshot.org/graphql
Error at proposal: 0xf1794fc7d6d0c0210f67678f1e41cf15f639f604392a84f80d4e41cb4f355c49
TooManyRedirects: wait some seconds and restart
HTTPError
429 Client Error: Too Many Requests for url: https://hub.snapshot.org/graphql
Error at proposal: 0x63936f00a8e75bf6439edb39fab8b613beb76bacb2ea55376aeb2a4b8a10a70c
TooManyRedirects: wait some seconds and restart
HTTPError
429 Client Error: Too Many Requests for url: https://hub.snapshot.org/graphql
Error at proposal: 0xf17

In [None]:
votes_dict_list_chunk7 = multithread_get_votes(proposal_chunks[6])

df_votes_dict_list_chunk7 = pd.DataFrame(votes_dict_list_chunk7)
df_votes_dict_list_chunk7.to_csv('votes_dict_list_chunk7.csv', index=False, sep=";")

In [None]:
votes_dict_list_chunk8 = multithread_get_votes(proposal_chunks[7])

df_votes_dict_list_chunk8 = pd.DataFrame(votes_dict_list_chunk8)
df_votes_dict_list_chunk8.to_csv('votes_dict_list_chunk8.csv', index=False, sep=";")

In [None]:
votes_dict_list_chunk9 = multithread_get_votes(proposal_chunks[8])

df_votes_dict_list_chunk9 = pd.DataFrame(votes_dict_list_chunk9)
df_votes_dict_list_chunk9.to_csv('votes_dict_list_chunk9.csv', index=False, sep=";")

In [None]:
votes_dict_list_chunk10 = multithread_get_votes(proposal_chunks[9])

df_votes_dict_list_chunk10 = pd.DataFrame(votes_dict_list_chunk10)
df_votes_dict_list_chunk10.to_csv('votes_dict_list_chunk10.csv', index=False, sep=";")

In [None]:
votes_dict_list_chunk11 = multithread_get_votes(proposal_chunks[10])

df_votes_dict_list_chunk11 = pd.DataFrame(votes_dict_list_chunk11)
df_votes_dict_list_chunk11.to_csv('votes_dict_list_chunk11.csv', index=False, sep=";")

In [71]:
chunk_lists = [votes_dict_list_chunk1, votes_dict_list_chunk2, votes_dict_list_chunk3, votes_dict_list_chunk4, votes_dict_list_chunk5, votes_dict_list_chunk6, votes_dict_list_chunk7, votes_dict_list_chunk8, votes_dict_list_chunk9, votes_dict_list_chunk10, votes_dict_list_chunk11]

votes_dict_list = []

for c_list in chunk_lists:
    for vote in c_list:
        votes_dict_list.append(vote)

len(votes_dict_list)

5774011

In [74]:
df_votes = pd.DataFrame(votes_dict_list)

# save data as csv
df_votes.to_csv('snapshot_votes_nov2022.csv', index=False, sep=";")

# 5. Get follower information 

The following function retrieves all followers for a give space. 


In [3]:
# definition of get_votes function 

def get_followers(init_skip, follower_query_limit):
    """
    this function loops through range of following relationship and return the data
    :param: init_skip: integer - the start value of the range
    :param: follower_query_limit: integer - the end of the range to be queried
    returns: list of dicts containing information about the follower relationship
    """

    # import libraries
    import requests
    import json
    import time

    # init list to store all dicts
    follow_dict_list = []

    # loop through range of following relationships
    while init_skip <= follower_query_limit:
        
        # define query string
        query = """ {
                    follows(
                        first: 1000,
                        skip: %s
                    ) {
                        follower
                        space {
                        id
                        }
                        created
                    }
                    } """ % (str(init_skip))

        # API request routine
        url = 'https://hub.snapshot.org/graphql'
        r = requests.post(url, json={'query': query})
        if r.status_code == 200: 
            r_dict = json.loads(r.text)
            r_dict

            # loop through results and store the data in a dict
            for follow in r_dict["data"]["follows"]:
                follow_dict = {}
                follow_dict["follow_id"] = follow["follower"]+"&&"+follow["space"]["id"]
                follow_dict["follower_id"] = follow["follower"]
                follow_dict["space_id"] = follow["space"]["id"]
                follow_dict["created"] = follow["created"]
                
                # append follow list with follow dict
                follow_dict_list.append(follow_dict)

        else: 
            print("Error message: " + str(r.status_code))
            if r.status_code == 429:
                print("Waiting 5s before retry")
                time.sleep(5)
                get_followers(init_skip, follower_query_limit)
            else:
                print("No retry possible")


        # increment skip value
        init_skip += 1000

    # return final list of follow dicts
    return follow_dict_list



In [4]:
# creat multithread function for followers API call

def multithread_get_followers(number_of_follows_to_query, workers):
    """
    this function splits up the task to retrieve all following relationships among a number of workers
    :param number_of_follows_to_query: integer indicating the number of following relationships to be queried
    :param workers: integer that indicates the number of works that split up the task
    :return: list of dicts of all following relationships
    """
    
    import requests
    import numpy as np
    from concurrent.futures import ThreadPoolExecutor, as_completed


    # split the query space into multithread tasks
    task_size = number_of_follows_to_query/workers

    skip_start = np.arange(0, number_of_follows_to_query, task_size).tolist()
    skip_end = np.arange(task_size, number_of_follows_to_query+task_size, task_size).tolist()

    skip_list = list(zip(skip_start, skip_end))


    # initiate vote list to be filled 
    nested_followers_dict_list = []
    followers_dict_list = []

    
    def runner():
        threads= []
        with ThreadPoolExecutor(max_workers=workers) as executor: #more than 10 workers results in significant more 429 errors
            for skip in skip_list:
                threads.append(executor.submit(get_followers, int(skip[0]), int(skip[1])))

            for task in as_completed(threads):
                nested_followers_dict_list.append(task.result())    
        
    runner()

    # loop through nested list and flatten it
    for dict_list in nested_followers_dict_list:

        for follower_dict in dict_list:

            followers_dict_list.append(follower_dict)


    # filter unique follows ?

    return followers_dict_list

#### Get all follows

In [17]:
follower_dict_list = multithread_get_followers(2000000, 1)

In [18]:
df_follows  = pd.DataFrame(follower_dict_list)
df_follows.to_csv('snapshot_follows.csv', index=False, sep=";")

In [91]:
follower_dict_list = multithread_get_followers(1100000, 10)

# save the follows as csv file
#df_follows  = pd.DataFrame(follower_dict_list)
#df_follows.to_csv('snapshot_follows.csv', index=False, sep=";")


# read file from backup 
#df_follows = pd.read_csv('snapshot_followers.csv',  sep=";")  

In [95]:
# size of follows df
len(df_followers)

# unique follower addresses
len(df_followers["follower_id"].unique())

df_follows_grouped = df_followers.groupby(by=["space_id"]).count().sort_values(['follower_id'],ascending=False)

df_follows_grouped_mostused = df_follows_grouped[df_follows_grouped['follower_id']>=50]

1085224

# 6. Get user information (not relevant)


The following function retrieves all user information for a given user address.


In [128]:
# definition of get_votes function

def get_user_info(user_id):

    query = """ {
                    users(first: 10, where: { id: "%s" }) {
                        id
                        name
                        about
                        avatar
                    }
                    } """ % (user_id)



    url = 'https://hub.snapshot.org/graphql'
    r = requests.post(url, json={'query': query})
    print(r.status_code)
    r_dict = json.loads(r.text)

    return r_dict["data"]["users"][0]




In [129]:
get_user_info("0xeF8305E140ac520225DAf050e2f71d5fBcC543e7")

200


{'id': '0xeF8305E140ac520225DAf050e2f71d5fBcC543e7',
 'name': 'Fabien!',
 'about': 'The less code you use, the less there is to break',
 'avatar': 'ipfs://QmdR1QWmDxYTwoDo2hp9N5iGEMHKxPH2LyhE4ACkTndn84'}

# 7. Create a SQLite database to store the data 

The database has 4 different tables:

- Spaces (PK: space_id FK: follower_id)
- Proposals (PK: proposal_id FK: space_id)
- Votes (PK: vote_id  FK: space_id, proposal_id)
- Followers (PK: follower_id FK:space_id)
- User (PK/FK: follower_id )


In [12]:
# function that allows to create a new database

def create_data_base(db_file):
    """ create a database connection to a SQLite database 
    :param db_file: path to data base file"""

    # import libraries
    import sqlite3
    from sqlite3 import Error

    # create a new connection
    conn = None
    try:
        conn = sqlite3.connect(db_file)
        print("db successfully create; sqlite version: "+sqlite3.version)
    except Error as e:
        print(e)
    finally:
        if conn:
            conn.close()



In [None]:
# call the function
#create_data_base(r"snapshot_sqlite.db")

In [13]:
# function that creates a connection to the database

def create_connection(db_file):
    """ create a database connection to the SQLite database
        specified by db_file
    :param db_file: database file
    :return conn: Connection object or None
    """

    # import libraries
    import sqlite3
    from sqlite3 import Error

    conn = None
    try:
        conn = sqlite3.connect(db_file)
        print("successfully connected to the db")
        return conn
    except Error as e:
        print(e)

    return conn


In [14]:
# function that closes the connection to the database

def close_conn(conn):
    """ this function takes a connection to a db as argument and closes it
    :param conn: Connection object
    """

    # import libraries
    import sqlite3
    from sqlite3 import Error

    # close conncetion
    conn.close()
    print("Connection has been closed")
    

    

## 7.1 Create tables

In [15]:
# function that creates a new table 

def create_table(conn, create_table_sql):
    """ create a table from the create_table_sql statement
    :param conn: Connection object
    :param create_table_sql: a CREATE TABLE statement
    """

    # import libraries
    import sqlite3
    from sqlite3 import Error

    try:
        c = conn.cursor()
        c.execute(create_table_sql)

        
    except Error as e:
        print(e)

In [16]:
# define a function that creates all tables

def create_all_tables(db_file, create_table_queries):
    """ this function creates all tables from a list of create table queries 
    :param db_file: path of the data base file
    :param create_table_queries: a list of CREATE TABLE statement
    :return:
    """
    # import libraries
    import sqlite3
    from sqlite3 import Error

    # create a connection the the data base file
    conn = create_connection(db_file)

    # create tables
    if conn is not None:

        for table in create_table_queries:

        # create projects table
            create_table(conn, table)
            print("table created")

        # commit changes 
        conn.commit()

        # close the connection to the data base
        close_conn(conn)
    else:
        print("Error! cannot create the database connection.")

In [17]:
# define create table queries

# create spaces table
create_spaces_table = """CREATE TABLE IF NOT EXISTS spaces (
	                     space_id text PRIMARY KEY,
						 space_name text,
						 about text,
						 network integer,
						 symbol text,
						 number_of_strategies integer,
						 strategies_list text,
						 number_of_admins integer,
						 admins text,
						 number_of_members integer,
						 member_list text,
						 filters_minScore text,
						 filters_onlyMembers text,
						 plugins text
                        );
                      """

# create proposals table
create_proposals_table = """CREATE TABLE IF NOT EXISTS proposals (
						 space_id text, 
						 space_name text,
						 proposal_id text PRIMARY KEY,
						 title text,
						 body text,
						 choices text,
						 start integer,
						 end integer,
						 snapshot text,
						 state text,
						 scores text,
						 scores_by_strategy text,
						 scores_total  text,
						 scores_updated text,
						 author text
                        );
                      """

# create votes table
create_votes_table = """CREATE TABLE IF NOT EXISTS votes (
	                     voter_proposal_id text PRIMARY KEY,
						 voter text,
						 proposal_id text,
						 vp text,
						 vp_by_strategy text,
						 vp_state text,
						 created text,
						 choice text,
						 space_id text
                        );
                      """

# create follows table
create_follows_table = """CREATE TABLE IF NOT EXISTS follows (
	                     follow_id text text PRIMARY KEY,
	                     follower_id text NOT NULL,
	                     space_id text,
	                     created integer
                        );
                      """

# create users table
create_users_table = """CREATE TABLE IF NOT EXISTS users (
	                     user_id text PRIMARY KEY,
	                     about text,
	                     avatar text
                        );
                      """


create_table_queries = [create_spaces_table, create_proposals_table, create_votes_table, create_follows_table, create_users_table]




drop_spaces_table = """DROP TABLE spaces"""

drop_proposals_table = """DROP TABLE proposals"""

drop_votes_table = """DROP TABLE votes"""

drop_follows_table = """DROP TABLE follows"""

drop_users_table = """DROP TABLE users"""


drop_table_queries = [drop_spaces_table, drop_proposals_table, drop_votes_table, drop_follows_table, drop_users_table]

In [81]:
# create all tables by calling the create table function

create_all_tables(r"snapshot_sqlite_nov2022.db", create_table_queries)

successfully connected to the db
table created
table created
table created
table created
table created
Connection has been closed


In [18]:
# function that drops a table 

def drop_table(conn, drop_table_sql):
    """ drop a table from the drop_table_sql statement
    :param conn: Connection object
    :param drop_table_sql: a DROP TABLE statement
    """

    # import libraries
    import sqlite3
    from sqlite3 import Error

    try:
        c = conn.cursor()
        c.execute(drop_table_sql)
        conn.commit()

        print("Table has been dropped")

        # close the connection to the data base
        close_conn(conn)

        
        
    except Error as e:
        print(e)

In [17]:
# drop_table(create_connection(r"snapshot_sqlite.db"), drop_follows_table)

successfully connected to the db
Connection has been closed
Table has been dropped


## 7.2 Insert data into the tables

#### 7.2.1 Insert into spaces table

In [19]:
# insert all spaces into spaces table

def insert_spaces(spaces_dict_list):
    """
    This function connects to the snapshot_sqlite database and inserts all space into the spaces table
    :param spaces_dict_list: list of dicts
    """

    # creat connection to the data base
    conn = create_connection( r"snapshot_sqlite_nov2022.db")
    cur = conn.cursor()

    # loop through dicts in dict list and insert them into the data base
    for space_dict in spaces_dict_list:
        space_values = list(space_dict.values())

        query = """ INSERT OR REPLACE INTO spaces(space_id, space_name, about, network, symbol, number_of_strategies, strategies_list, number_of_admins, admins, number_of_members, member_list, filters_minScore, filters_onlyMembers, plugins)
                VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?) 
                """
        
        cur.execute(query, space_values)

    # commit changes to the database
    conn.commit()

    # close connection
    close_conn(conn)

    

In [20]:
# call the insert function

# load spaces from backup csv
df_spaces = pd.read_csv('snapshot_spaces_nov2022.csv',  sep=";")  

# turn data frame into list of dictionaries
spaces_dict_list = df_spaces.to_dict('records')

# insert spaces into table
insert_spaces(spaces_dict_list)

successfully connected to the db
Connection has been closed


#### 7.2.2 Insert into proposals table

In [22]:
# insert all proposals into proposals table

def insert_proposals(proposals_dict_list):
    """
    This function connects to the snapshot_sqlite database and inserts all proposals into the proposals table
    :param proposals_dict_list: list of dicts
    """

    # creat connection to the data base
    conn = create_connection( r"snapshot_sqlite_nov2022.db")
    cur = conn.cursor()

    # loop through dicts in dict list and insert them into the data base
    for proposal_dict in proposals_dict_list:
        proposal_values = list(proposal_dict.values())

        query = """ INSERT OR REPLACE INTO proposals(space_id, space_name, proposal_id, title, body, choices, start, end, snapshot, state, scores, scores_by_strategy, scores_total, scores_updated, author)
                VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?) 
                """
        
        cur.execute(query, proposal_values)

    # commit changes to the database
    conn.commit()

    # close connection
    close_conn(conn)

    

In [94]:
proposals_dict_list[2]

{'space_id': 'ufode.eth',
 'space_name': 'UFO Finance',
 'id': '0x00b88e72b4d422fbd775d5ba36f8d25431226b5a00113019ddfb2dff12b8f70c',
 'title': '  Can our project recruit more volunteers to join the promotion?',
 'body': '',
 'choices': ['YES', 'NO'],
 'start': 1642305600,
 'end': 1642392000,
 'snapshot': '14014693',
 'state': 'closed',
 'scores': [0, 0],
 'scores_by_strategy': [[0], [0]],
 'scores_total': 0,
 'scores_updated': 1642392018,
 'author': '0xa7c30BC54E3ce11dB9de9D74e5ea7C1a08770163'}

In [23]:
# call the insert function

# load proposals from backup csv
df_proposals = pd.read_csv('snapshot_proposals_nov2022.csv',  sep=";")  

# turn data frame into list of dictionaries
proposals_dict_list = df_proposals.to_dict('records')

# insert proposals into table
insert_proposals(proposals_dict_list)

successfully connected to the db
Connection has been closed


#### 7.2.3 Insert into votes table

In [25]:
# insert all votes into votes table

def insert_votes(votes_dict_list):
    """
    This function connects to the snapshot_sqlite database and inserts all votes into the votes table
    :param votes_dict_list: list of dicts
    """

    # creat connection to the data base
    conn = create_connection( r"snapshot_sqlite_nov2022.db")
    cur = conn.cursor()

    # loop through dicts in dict list and insert them into the data base
    for vote_dict in votes_dict_list:
        vote_values = list(vote_dict.values())
        
        # ensure data types are correct
        vote_values[0] = str(vote_values[0])
        vote_values[1] = str(vote_values[1])
        vote_values[2] = str(vote_values[2])
        vote_values[3] = str(vote_values[3])
        vote_values[4] = str(vote_values[4])
        vote_values[5] = str(vote_values[5])
        vote_values[6] = str(vote_values[6])
        vote_values[7] = str(vote_values[7])
        vote_values[8] = str(vote_values[8])

        query = """ INSERT OR REPLACE INTO votes(voter_proposal_id, voter, proposal_id, vp, vp_by_strategy, vp_state, created, choice, space_id)
                VALUES(?,?,?,?,?,?,?,?,?) 
                """
        
        cur.execute(query, vote_values)

    # commit changes to the database
    conn.commit()

    # close connection
    close_conn(conn)

    

In [26]:
# call the insert function

# load votes from backup csv
df_votes = pd.read_csv('snapshot_votes_nov2022.csv',  sep=";")  

# turn data frame into list of dictionaries
votes_dict_list = df_votes.to_dict('records')

# insert proposals into table
insert_votes(votes_dict_list)

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


successfully connected to the db
Connection has been closed


#### 7.2.4 Insert into follows table

In [30]:
# insert all following relationships into follows table

def insert_follows(follows_dict_list):
    """
    This function connects to the snapshot_sqlite database and inserts all following relationships into the follows table
    :param follows_dict_list: list of dicts
    """

    # creat connection to the data base
    conn = create_connection( r"snapshot_sqlite.db")
    cur = conn.cursor()

    # loop through dicts in dict list and insert them into the data base
    try:
        for follow_dict in follows_dict_list:
            follow_values = list(follow_dict.values())

            query = """ INSERT OR REPLACE INTO follows(follow_id, follower_id, space_id, created)
                        VALUES(?,?,?,?) 
                        """
                
            cur.execute(query, follow_values)

    except:
        print("could not insert data")

    # commit changes to the database
    conn.commit()

    # close connection
    close_conn(conn)
    

In [28]:
# call the insert function

# load follows from backup csv
df_follows = pd.read_csv('snapshot_follows.csv',  sep=";")  

# turn data frame into list of dictionaries
follows_dict_list = df_follows.to_dict('records')

# insert proposals into table
insert_follows(follows_dict_list)

successfully connected to the db


# 8. Query the tables (check data)

In [None]:
# load sql extension and connect to the data base with magic commands
#%%capture
%load_ext sql
%sql sqlite:///snapshot_sqlite_nov2022.db

In [None]:
# 
%sql select * from spaces limit 10
%sql select * from proposals limit 10
%sql select * from votes limit 10

In [4]:
%sql select * from users limit 10

 * sqlite:///snapshot_sqlite_nov2022.db
Done.


user_id,about,avatar


# 9. Show descriptives of available DAO data