In [34]:
import logging
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')

# Create a console handler and set its level
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)

# Create a formatter and set it on the console handler
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
console_handler.setFormatter(formatter)

# Add the console handler to the logger
logger.addHandler(console_handler)

def main():
    logger.info("Cloud function starting.")
    # 1. Scrape coingecko gainers.
    import scraping.scraper
    scraping.scraper.main()

    # 2. Get contract address for gainers that have them, with coingecko.
    import scraping.contract_addresses
    scraping.contract_addresses.main()

    # 3. Run SQL query on Google Big Query to find wallets that have receieved these tokens within the last 7 days.
    
# main()

In [35]:
import pandas as pd

with open('gainers.csv') as f:
    df = pd.read_csv(f)

token_contracts = [f"'{contract}'" for contract in df['contract_address']]
token_contracts_string = f"({','.join(token_contracts)})"

In [42]:
import os

# Load environment variables from .env file
from dotenv import load_dotenv
load_dotenv() 
KEY_PATH = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
PROJECT_ID = os.getenv("PROJECT_ID")

from google.cloud import bigquery
from google.oauth2 import service_account


# Authenticate to BigQuery
credentials = service_account.Credentials.from_service_account_file(
    KEY_PATH, scopes=["https://www.googleapis.com/auth/cloud-platform"],
)
client = bigquery.Client(credentials=credentials, project=credentials.project_id,)

dry_run_config = bigquery.QueryJobConfig(dry_run=True, use_query_cache=False)
wet_run_config = bigquery.QueryJobConfig(use_query_cache=False)

# Query the last 7 days of token transfers
query = f"""
SELECT DISTINCT tt.from_address
FROM `bigquery-public-data.crypto_ethereum.token_transfers` tt
WHERE tt.block_timestamp > TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 7 DAY) 
AND tt.block_timestamp < TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 1 DAY) 
AND tt.token_address IN {token_contracts_string}
LIMIT 10 
"""

logger.info("Running SQL query on Google Big Cloud.")
dry_query = client.query(query, job_config=dry_run_config)

# Check how much data will be processed
mb_processed = dry_query.total_bytes_processed / (1024 * 1024)
# Throw error if over 1024 MB
if mb_processed > 1024:
    logger.error(f"Query will process {mb_processed:.2f} MB.")
    raise Exception(f"Query will process {mb_processed:.2f} MB.")

logger.info(f"Query will process {mb_processed:.0f} MB.")

def upload_df(df,table_name,dataset_name = "cexs"):
    # Check the df is not empty, and under 1000 rows
    if df.empty:
        logger.error(f"DataFrame is empty.")
        raise Exception(f"DataFrame is empty.")
    if len(df) > 1000:
        logger.error(f"DataFrame is too large.")
        raise Exception(f"DataFrame is too large.")
    
    # Upload the df to BigQuery
    dataset_ref = client.dataset(dataset_name)
    table_ref = dataset_ref.table(table_name)

    job_config = bigquery.LoadJobConfig()
    job_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

    job = client.load_table_from_dataframe(df, table_ref, job_config=job_config)
    job.result()  # Waits for table load to complete.
    logger.info(f"Loaded {job.output_rows} rows into {dataset_name}:{table_name}.")
    

    

# wet_query = client.query(query, job_config=wet_run_config)
# df = wet_query.to_dataframe()

2023-03-31 19:47:19,657 - INFO - Running SQL query on Google Big Cloud.
2023-03-31 19:47:19,657 - INFO - Running SQL query on Google Big Cloud.
2023-03-31 19:47:20,515 - INFO - Query will process 724 MB.
2023-03-31 19:47:20,515 - INFO - Query will process 724 MB.


In [47]:
# Load ignored_wallets.csv into df
with open('ignored_wallets.csv') as f:
    ignored_wallets_df = pd.read_csv(f)


upload_df(ignored_wallets_df,"ignored-wallets")

2023-03-31 19:49:39,074 - INFO - Loaded 8 rows into cexs:ignored-wallets.
2023-03-31 19:49:39,074 - INFO - Loaded 8 rows into cexs:ignored-wallets.


1048576

In [4]:
wet_query.to_dataframe()

Unnamed: 0,name,total_people
0,James,272793
1,John,235139
2,Michael,225320
3,Robert,220399
4,David,219028
5,Mary,209893
6,William,173092
7,Jose,157362
8,Christopher,144196
9,Maria,131056
