# Dapp Token Scraper
The purpose of this file is to collect the daily price history of tokens associated with dApps from state of the dApps

### Table of content

1. Get all contract addresses from contract_link table
2. Use addresses to query Coin Gecko API
3. Store the results in Postgres DB (first the table has to be created)

# 1. Get contract addresses from contract_link table

### 1.1 Connect to the DB 

In [1]:
# import libraries
import psycopg2
import ast


host = "localhost" #default
dbname = "postgres" #default
user = "postgres" #default
password = "" # load password from environment

# function to connect to db


def connect_database(host, dbname, user, password):
    """
    Connects to the existing ethereum_data database, this DB is created in SQL commandline and can be viewd under pg admin
    Arguments:
        - host (String): location of db; localhost or IP address
        - dbname (String): has to be created in pg admin
        - user (String): default is postgres -> or create new user in pg admin
        - password (String): password of user

    Returns two objects a cursor and a connection to the db 

    """
    conn = psycopg2.connect("host="+host+" dbname=" +
                            dbname+" user="+user+" password="+password)
    conn.set_session(autocommit=True)
    cur = conn.cursor()

    print("has successfully connceted to db")

    return cur, conn


def disconnect_database(conn):
    """
    this function disconnect from the database"""

    # close connections to the database
    conn.close()

    print("has successfully disconnceted from db")


### 1.2 Create call query command

In [2]:
# send queries to DB
def call_query(query):
    """
    This function can be used to send queries to the db.
    It automatically connects to the db, sends the query, and returns the result

    Argument:
    query (string): SQL command with 3x quotes

    Returns the result from the db as a list with fetchall()
    """

    # connects to the db
    cur, conn = connect_database(host, dbname, user, password)

    # sends query to db
    cur.execute(query)

    # stores results
    results = cur.fetchall()
    print("has successfully received results")

    # closes connection to the db
    disconnect_database(conn)

    # returns results
    return results


### 1.3 Query the DB

In [3]:
# get contract address and dapp names in a list
dapp_contracts = call_query(
    """SELECT d_name, c_address FROM dapp_contract_link """)
dapp_contracts


has successfully connceted to db
has successfully received results
has successfully disconnceted from db


[('OpenSea', '0x1f52b87c3503e537853e160adbf7e330ea0be7c4'),
 ('OpenSea', '0x23b45c658737b12f1748ce56e9b6784b5e9f3ff8'),
 ('OpenSea', '0x78997e9e939daffe7eb9ed114fbf7128d0cfcd39'),
 ('OpenSea', '0x7be8076f4ea4a4ad08075c2508e481d6c946d12b'),
 ('Tether', '0xdac17f958d2ee523a2206206994597c13d831ec7'),
 ('Oasis', '0x89d24a6b4ccb1b6faa2625fe562bdd9a23260359'),
 ('Oasis', '0x9b0ccf7c8994e19f39b2b4cf708e0a7df65fa8a3'),
 ('Oasis', '0x448a5065aebb8e423f0896e6c5d525c040f59af3'),
 ('Oasis', '0xbda109309f9fafa6dd6a9cb9f1df4085b27ee8ef'),
 ('Oasis', '0x9b0f70df76165442ca6092939132bbaea77f2d7a'),
 ('Oasis', '0xf53ad2c6851052a81b42133467480961b2321c09'),
 ('Oasis', '0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2'),
 ('Oasis', '0x9f8f72aa9304c8b593d555f12ef6589cc3a579a2'),
 ('Oasis', '0xdDb108893104dE4E1C6d0E47c42237dB4E617ACc'),
 ('Oasis', '0x5e227ad1969ea493b43f840cff78d08a6fc17796'),
 ('Oasis', '0xbaa65281c2fa2baacb2cb550ba051525a480d3f4'),
 ('Oasis', '0x9eF05f7F6deB616fd37aC3c959a2dDD25A54E4F5'),
 ('Oa

# 2. Query Coin Gecko API

In [4]:
# get the list of all tokens on Coin Gecko as a json
import requests
import json

gecko_api = "https://api.coingecko.com/api/v3/coins/list?include_platform=true"

response = requests.get(gecko_api)
token_json = response.json()

In [28]:
# define a function that loops through the token list from coin gecko and extracts the gecko ID for token contract addresses
# of dapps from the db
def get_gecko_id(contract_address, token_json):
    """

    Arguments:
    - contract_address (string): contract address of a dApp
    - dapp_token_json (string): 

    """

    for token in token_json:
        if "ethereum" in token["platforms"].keys():
            if token["platforms"]["ethereum"].lower() == contract_address.lower():
                gecko_id = token["id"]
                gecko_name = token["name"]
                break
            else:
                gecko_id = ""
                gecko_name = ""
        else:
            gecko_id = ""
            gecko_name = ""

    return gecko_id, gecko_name


In [31]:
# create a new list of dictionaries that contains the token id, name, and contract address that is listed in coin gecko

listed_dapp_tokens_list = []

for dapp_contract in dapp_contracts:
    token_dict = {}
    token_dict["dapp_name"] = dapp_contract[0]
    token_dict["token_address"] = dapp_contract[1]
    
    token_dict["gecko_id"],token_dict["gecko_name"] = get_gecko_id(dapp_contract[1], token_json)
    if token_dict["gecko_id"] != "":
        listed_dapp_tokens_list.append(token_dict)
    
# len(listed_dapp_tokens_list)

In [45]:
len(listed_dapp_tokens_list)

234

In [34]:
# get token information of tokens that are not associated with dapps from stateofthedapps but listed on coingecko

# initiate empty list to store tokens that are not associated with dapps from stateofthedapps
listed_non_dapp_tokens_list = []
dapp_contract_list = []

# create a list containing all dapp contracts
for dapp_contract in dapp_contracts:
    dapp_contract_list.append(dapp_contract[1].lower())

# loop through tokens from coingecko
    # 1. check if the are on thereum
    # 2. check if they have a token address (contract address) that is associated with a dapp or that is empty
    # 3. if not add this token to the list
for token in token_json:
    non_dapp_token_dict = {}

    if "ethereum" in token["platforms"].keys():
            if token["platforms"]["ethereum"].lower() not in dapp_contract_list and token["platforms"]["ethereum"] != "" :
                non_dapp_token_dict["gecko_id"] = token["id"]
                non_dapp_token_dict["gecko_name"] = token["name"]
                non_dapp_token_dict["dapp_name"] = "no_dApp"
                non_dapp_token_dict["token_address"] = token["platforms"]["ethereum"]
                listed_non_dapp_tokens_list.append(non_dapp_token_dict)

# len(listed_non_dapp_tokens_list)


In [46]:
len(listed_non_dapp_tokens_list)

4894

In [37]:
# define API query function that returns the token price history 

def get_token_price_history(token_id):

    """
    This function queries the gecko token history endpoint and return the whole price history of a token on a daily level

    Arguements:
    token_id (string): id of the token on the coingecko website 
    """

    import requests
    import json
    import pandas as pd

    gecko_api = "https://api.coingecko.com/api/v3/coins/"+token_id+"/market_chart?vs_currency=usd&days=max&interval=daily"

    response = requests.get(gecko_api)
    if response.status_code == 200:
        dapp_token_json = response.json()
    else:
        print(token_id + " could not be collected")
        print("API status code: "+ str(response.status_code))
        return None

    return dapp_token_json

In [75]:
test = get_token_price_history("tether")

In [76]:
test

{'prices': [[1424822400000, 1.21016],
  [1425254400000, 0.572521],
  [1425600000000, 1.0],
  [1425686400000, 1.0],
  [1425772800000, 1.0],
  [1425859200000, 1.0],
  [1425945600000, 1.0],
  [1426032000000, 1.0],
  [1426118400000, 1.0],
  [1426204800000, 1.0],
  [1426291200000, 1.0],
  [1426377600000, 1.0],
  [1426464000000, 1.0],
  [1426550400000, 1.0],
  [1426636800000, 1.0],
  [1426723200000, 1.0],
  [1426809600000, 1.0],
  [1426896000000, 1.0],
  [1426982400000, 1.0],
  [1427068800000, 1.0],
  [1427155200000, 1.0],
  [1427241600000, 1.0],
  [1427328000000, 1.0],
  [1427414400000, 1.0],
  [1427500800000, 1.0],
  [1427587200000, 1.0],
  [1427673600000, 1.0],
  [1427760000000, 1.0],
  [1427932800000, 1.0],
  [1428019200000, 1.0],
  [1428105600000, 1.0],
  [1428192000000, 1.0],
  [1428278400000, 1.0],
  [1428364800000, 1.0],
  [1428451200000, 1.0],
  [1428537600000, 1.0],
  [1428624000000, 1.0],
  [1428710400000, 1.0],
  [1428796800000, 1.0],
  [1428883200000, 1.0],
  [1428969600000, 1.0

In [38]:
# define a function that turns token price json into a list of dicts

def create_dapp_token_dict(dapp_token_json, gecko_id, token_address, dapp_name):

    """
    This function creates the right format so that the data can be inserted into the DB or uploaded to google big query

    dapp_token_json (json): Json output from Gecko API that contains daily price, market cap, and total volume of a token.
    gecko_id (string): Name of the token. 
    dapp_name (string): Name of the dApp the token is associated with.
    """

    import pandas as pd

    # Create the pandas DataFrame
    df_prices = pd.DataFrame(dapp_token_json["prices"], columns = ['date', 'price'])
    df_market_caps = pd.DataFrame(dapp_token_json["market_caps"], columns = ['date', 'market_cap'])
    df_volumes = pd.DataFrame(dapp_token_json["total_volumes"], columns = ['date', 'total_volumes'])
    

    token_history = df_prices.merge(df_market_caps, left_on='date', right_on='date').merge(df_volumes, left_on='date', right_on='date')

    # rename columns 
    token_history = token_history.rename(columns={"date": "token_date", "price": "token_price", "market_cap":"token_market_cap","total_volumes":"token_total_volume"})

    # add columns with address, dapp name, and name of the token as the gecko id of the token (the gecko id is similar to the token name)
    token_history["token_address"] = token_address
    token_history["token_dapp_name"] = dapp_name
    token_history["token_name"] = gecko_id
    
    # change order of the columns in order to match DB table 


    cols = ["token_address","token_dapp_name","token_name","token_price","token_market_cap","token_total_volume","token_date"]
    token_history = token_history[cols]

    # return dataframe
    return token_history

In [39]:
###################################################
# get price history for tokens linked to dapps
###################################################

# call all functions and create a list of dicts that can be sent to the data base or saved as csv
import pandas as pd
import time

dapp_token_history_df_list = []

i = 0

for dapp_token in listed_dapp_tokens_list:

    i += 1

    # get the price history of a token from the coin gecko API and store resulting json as a variable
    price_history = get_token_price_history(dapp_token["gecko_id"])

    # check if price history is not empty
    if price_history != None:
        # takes the the price history json and token info to create a df
        dapp_token_history_df = create_dapp_token_dict(price_history, dapp_token["gecko_id"],dapp_token["token_address"],dapp_token["dapp_name"])

        # adds the df to a list of dfs 
        dapp_token_history_df_list.append(dapp_token_history_df)

        print("Token "+ dapp_token["gecko_id"] + " has been collected; counter: "+ str(i))
    
    else:
        print("moving on to next token...; counter: "+ str(i))

    if i%60 == 0:
        time.sleep(60)
    

print("all tokens have been collected")

# concat all dfs to obtain one large df with price history of all tokens that are associated with dapps
dapp_token_price_history_df = pd.concat(dapp_token_history_df_list)

Token tether has been collected; counter: 1
Token sai has been collected; counter: 2
Token weth has been collected; counter: 3
Token maker has been collected; counter: 4
Token dai has been collected; counter: 5
Token chainlink has been collected; counter: 6
Token decentraland has been collected; counter: 7
Token omisego has been collected; counter: 8
Token status has been collected; counter: 9
Token singularitynet has been collected; counter: 10
Token gala has been collected; counter: 11
Token staked-ether has been collected; counter: 12
Token sushi has been collected; counter: 13
Token 0x has been collected; counter: 14
Token polymath has been collected; counter: 15
Token basic-attention-token has been collected; counter: 16
Token compound-ether has been collected; counter: 17
Token cdai has been collected; counter: 18
Token compound-sai has been collected; counter: 19
Token compound-usd-coin has been collected; counter: 20
Token compound-basic-attention-token has been collected; coun

In [47]:
###################################################
# get price history for tokens NOT linked to dapps
###################################################

# call all functions and create a list of dicts that can be sent to the data base or saved as csv
import pandas as pd
import time

non_dapp_token_history_df_list = []

i = 0

for token in listed_non_dapp_tokens_list:

    i += 1

    # get the price history of a token from the coin gecko API and store resulting json as a variable
    price_history = get_token_price_history(token["gecko_id"])

    # check if price history is not empty
    if price_history != None:
        # takes the the price history json and token info to create a df
        non_dapp_token_history_df = create_dapp_token_dict(price_history, token["gecko_id"], token["token_address"], token["dapp_name"])

        # adds the df to a list of dfs 
        non_dapp_token_history_df_list.append(non_dapp_token_history_df)

        print("Token "+ token["gecko_id"] + " has been collected; counter: "+ str(i))
    
    else:
        print("moving on to next token...; counter: "+ str(i))

    if i%60 == 0:
        time.sleep(60)
    

print("all tokens have been collected")

# concat all dfs to obtain one large df with price history of all tokens that are associated with dapps
non_dapp_token_price_history_df = pd.concat(non_dapp_token_history_df_list)


Token 0chain has been collected; counter: 1
Token 0xmonero has been collected; counter: 2
Token 1000-florida has been collected; counter: 3
Token 10024-10028-appoline has been collected; counter: 4
Token 10084-grayton has been collected; counter: 5
Token 10604-somerset has been collected; counter: 6
Token 10612-somerset has been collected; counter: 7
Token 10616-mckinney has been collected; counter: 8
Token 10617-hathaway has been collected; counter: 9
Token 10629-mckinney has been collected; counter: 10
Token 10639-stratman has been collected; counter: 11
Token 10700-whittier has been collected; counter: 12
Token 10974-worden has been collected; counter: 13
10x-gg could not be collected
API status code: 404
moving on to next token...; counter: 14
Token 11078-longview has been collected; counter: 15
Token 11078-wayburn has been collected; counter: 16
Token 11201-college has been collected; counter: 17
Token 11300-roxbury has been collected; counter: 18
Token 11653-nottingham has been c

In [41]:
dapp_token_price_history_df

Unnamed: 0,token_address,token_dapp_name,token_name,token_price,token_market_cap,token_total_volume,token_date
0,0xdac17f958d2ee523a2206206994597c13d831ec7,Tether,tether,1.210160,304476.000000,5.000000,1424822400000
1,0xdac17f958d2ee523a2206206994597c13d831ec7,Tether,tether,0.572521,144046.000000,2.000000,1425254400000
2,0xdac17f958d2ee523a2206206994597c13d831ec7,Tether,tether,1.000000,251600.000000,50.000000,1425600000000
3,0xdac17f958d2ee523a2206206994597c13d831ec7,Tether,tether,1.000000,251600.000000,58196.000000,1425686400000
4,0xdac17f958d2ee523a2206206994597c13d831ec7,Tether,tether,1.000000,251600.000000,31.000000,1425772800000
...,...,...,...,...,...,...,...
1537,0xd0d6d6c5fe4a677d343cc433536bb717bae167dd,adChain,adtoken,0.000870,699728.824511,1.207639,1635465600000
1538,0xd0d6d6c5fe4a677d343cc433536bb717bae167dd,adChain,adtoken,0.000870,699728.824511,1.207639,1635552000000
1539,0xd0d6d6c5fe4a677d343cc433536bb717bae167dd,adChain,adtoken,0.000716,567012.152514,2.502585,1635638400000
1540,0xd0d6d6c5fe4a677d343cc433536bb717bae167dd,adChain,adtoken,0.000851,700821.185247,1.043412,1635679726000


In [48]:
non_dapp_token_price_history_df

Unnamed: 0,token_address,token_dapp_name,token_name,token_price,token_market_cap,token_total_volume,token_date
0,0xb9ef770b6a5e12e45983c5d80545258aa38f3b78,no_dApp,0chain,0.483325,1.933300e+07,64385.191049,1531958400000
1,0xb9ef770b6a5e12e45983c5d80545258aa38f3b78,no_dApp,0chain,0.432846,1.731383e+07,92248.916728,1532044800000
2,0xb9ef770b6a5e12e45983c5d80545258aa38f3b78,no_dApp,0chain,0.391944,1.567774e+07,131175.413170,1532131200000
3,0xb9ef770b6a5e12e45983c5d80545258aa38f3b78,no_dApp,0chain,0.377234,1.508935e+07,62006.348324,1532217600000
4,0xb9ef770b6a5e12e45983c5d80545258aa38f3b78,no_dApp,0chain,0.360793,1.443172e+07,24066.254330,1532304000000
...,...,...,...,...,...,...,...
197,0x93ed140172ff226dad1f7f3650489b8daa07ae7f,no_dApp,zzz-finance-v2,2.905059,5.810118e+04,17.242377,1635120000000
198,0x93ed140172ff226dad1f7f3650489b8daa07ae7f,no_dApp,zzz-finance-v2,2.905059,5.810118e+04,17.242377,1635206400000
199,0x93ed140172ff226dad1f7f3650489b8daa07ae7f,no_dApp,zzz-finance-v2,2.970508,5.810118e+04,251.039995,1635292800000
200,0x93ed140172ff226dad1f7f3650489b8daa07ae7f,no_dApp,zzz-finance-v2,2.970508,5.941017e+04,0.000000,1635371688000


In [50]:
# store the dapp token data as csv so that it can be uploaded to GBQ
dapp_token_price_history_df.to_csv("token_price_history_dapp_token_only.csv",index=False, sep=";")

In [49]:
# store the non dapp token data as csv so that it can be uploaded to GBQ
non_dapp_token_price_history_df.to_csv("token_price_history_non_dapp_token_only.csv",index=False, sep=";")

In [51]:
# concat both dataframes 
df_full = pd.concat([dapp_token_price_history_df,non_dapp_token_price_history_df])

df_full.to_csv("token_price_history.csv",index=False, sep=";")

# show length of df
len(df_full.index)

2313713

In [74]:
# on df is to big to upload it as google sheet -> GBQ can only upload 20MB files

df1 = df_full.iloc[:(round(len(df_full.index)/15)),:]
df1.to_csv("token_price_history_part1.csv",index=False, sep=";")
df2 = df_full.iloc[(round(len(df_full.index)/15)):(round(len(df_full.index)/15)*2), :]
df2.to_csv("token_price_history_part2.csv",index=False, sep=";")
df3 = df_full.iloc[(round(len(df_full.index)/15)*2):(round(len(df_full.index)/15)*3), :]
df3.to_csv("token_price_history_part3.csv",index=False, sep=";")
df4 = df_full.iloc[(round(len(df_full.index)/15)*3):(round(len(df_full.index)/15)*4), :]
df4.to_csv("token_price_history_part4.csv",index=False, sep=";")
df5 = df_full.iloc[(round(len(df_full.index)/15)*4):(round(len(df_full.index)/15)*5), :]
df5.to_csv("token_price_history_part5.csv",index=False, sep=";")
df6 = df_full.iloc[(round(len(df_full.index)/15)*5):(round(len(df_full.index)/15)*6), :]
df6.to_csv("token_price_history_part6.csv",index=False, sep=";")
df7 = df_full.iloc[(round(len(df_full.index)/15)*6):(round(len(df_full.index)/15)*7), :]
df7.to_csv("token_price_history_part7.csv",index=False, sep=";")
df8 = df_full.iloc[(round(len(df_full.index)/15)*7):(round(len(df_full.index)/15)*8), :]
df8.to_csv("token_price_history_part8.csv",index=False, sep=";")
df9 = df_full.iloc[(round(len(df_full.index)/15)*8):(round(len(df_full.index)/15)*9), :]
df9.to_csv("token_price_history_part9.csv",index=False, sep=";")
df10 = df_full.iloc[(round(len(df_full.index)/15)*9):(round(len(df_full.index)/15)*10), :]
df10.to_csv("token_price_history_part10.csv",index=False, sep=";")
df11 = df_full.iloc[(round(len(df_full.index)/15)*10):(round(len(df_full.index)/15)*11), :]
df11.to_csv("token_price_history_part11.csv",index=False, sep=";")
df12 = df_full.iloc[(round(len(df_full.index)/15)*11):(round(len(df_full.index)/15)*12), :]
df12.to_csv("token_price_history_part12.csv",index=False, sep=";")
df13 = df_full.iloc[(round(len(df_full.index)/15)*12):(round(len(df_full.index)/15)*13), :]
df13.to_csv("token_price_history_part13.csv",index=False, sep=";")
df14 = df_full.iloc[(round(len(df_full.index)/15)*13):(round(len(df_full.index)/15)*14), :]
df14.to_csv("token_price_history_part14.csv",index=False, sep=";")
df15 = df_full.iloc[(round(len(df_full.index)/15)*14):, :]
df15.to_csv("token_price_history_part15.csv",index=False, sep=";")

# 3. Insert data into DB (TBD)

In [None]:
# problem with OASIS... it uses the contract addresses of other tokens, 
# therefore the token address is not unique anymore and cannot simply used as table key

In [None]:
# push data to the DB - has to be inspected before - table in database still has to be created 

# sql insert command to see order of variables

dapp_token_price_history_insert = ("""INSERT INTO dapp_token_price_history (
                            token_address,
                            token_dapp_name varchar,
                            token_name,
                            token_price,
                            token_market_cap,
                            token_total_volume,
                            token_date \
                            VALUES (%s,%s, %s, %s, %s, %s, %s)
                            ON CONFLICT (token_address) DO NOTHING
""")
