In [10]:
import sys
import os

# Add the root directory to the Python environment
root_dir = os.path.abspath(os.path.join(os.getcwd(), ".."))
sys.path.append(root_dir)

In [11]:
import pickle
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
from src import uniswap, uniprice_2
import warnings
from src.graph import create_graph
from src.algo_per_coin import safe_literal_eval
import json
import re
import concurrent.futures

In [12]:
# Suppress warnings from pandas
pd.options.mode.chained_assignment = None  # default='warn'
warnings.filterwarnings("ignore", category=pd.errors.DtypeWarning)

In [13]:
# Load config to mimic what is happening in the real code
CONFIG_PATH = "./config.json"
with open(os.path.join(root_dir, CONFIG_PATH), "r") as f:
    config = json.load(f)
len(config)

5

In [14]:
# The block number of our starting snapshot
experiment_start_block_number = int(config['experiments']['start_block_number'])
experiment_start_block_number

22816000

In [15]:
# Specify the target block number for which the graph should be constructed
end_block_number = 22816300
start_block_number = 22816000

In [16]:
# Find the recent available graph
available_graphs = []

for file_name in os.listdir(os.path.join(config['paths']['data'], 'snapshots')):
    match = re.search(r'\d+', file_name)
    if match:
        bn = match.group()
        available_graphs.append(int(bn))

# Find the closest available graph block number less than target_block_number
closest_block_number = max([b for b in available_graphs if b < end_block_number], default=None)
closest_block_number

22816000

In [17]:
# Read all update files until target_block_number

df_updates = pd.DataFrame()

for file_name in os.listdir(os.path.join(config['paths']['data'], 'updates')):
    match = re.search(r'\d+', file_name)
    if match:
        bn = int(match.group())
        if bn >= closest_block_number and bn <= end_block_number:
            df_updates = pd.concat([df_updates, pd.read_csv(os.path.join(config['paths']['data'], 'updates', f'updates_{bn}.csv.gz'))])

df_updates = df_updates.reset_index().drop(columns='index').sort_values('block_number')
df_updates.head()

Unnamed: 0,pool_id,pool_status,block_number,token0,token1,event,version,feeTier,sqrtPrice,tick
1216,0x243f9f22d4cc14f722ec910c2ec1234da9dd0cc1,updated,22816000,"{'decimals': '18', 'id': '0xd60abfb751db36514a...","{'decimals': '18', 'id': '0xf19308f923582a6f7c...",swap,v3,10000.0,2425864724789309052729756020238,68435.0
1218,0xefdbf897ce8ccec59e8ea34dcc49c29de4c3ae6f,updated,22816000,"{'decimals': '18', 'id': '0x68b36248477277865c...","{'decimals': '18', 'id': '0xc02aaa39b223fe8d0a...",swap,v3,3000.0,52118724218423725909644744,-146539.0
1219,0x16440e9ae70c0868d7bf5369348693cbb804d2cc,updated,22816000,"{'decimals': '27', 'id': '0x5702a4487da07c827c...","{'decimals': '18', 'id': '0xc02aaa39b223fe8d0a...",swap,v3,10000.0,51307581568959472953649,-285015.0
1217,0x161362f161526bb49ba0b51352d21ef90feadb28,updated,22816000,"{'decimals': '9', 'id': '0x9ac9468e7e3e1d19408...","{'decimals': '18', 'id': '0xd60abfb751db36514a...",swap,v3,10000.0,20018832104387363694047848308554533,248809.0
1215,0x25215d9ba4403b3da77ce50606b54577a71b7895,updated,22816000,"{'decimals': '18', 'id': '0x96a5399d07896f757b...","{'decimals': '18', 'id': '0xf19308f923582a6f7c...",swap,v3,10000.0,263716472384351630064829515968,24052.0


In [23]:
df = pd.read_csv(os.path.join(config['paths']['data'], 'snapshots', f'snapshot_{closest_block_number}.csv.gz'))
df = df.drop_duplicates('id')

df['token0Price'] = pd.to_numeric(df['token0Price'], errors='coerce')
df['token1Price'] = pd.to_numeric(df['token1Price'], errors='coerce')
df['totalValueLockedUSD'] = pd.to_numeric(df['totalValueLockedUSD'], errors='coerce')
df['other'] = df['other'].apply(safe_literal_eval)

df = df.dropna(subset=['other', 'token0Price', 'token1Price'])

# Check if 'feeTier' column exists, if not, create it
if 'feeTier' not in df.columns:
    df['feeTier'] = df.apply(
        lambda row: int(row['other'].get('feeTier', 0)) if row['version'] in ['v3', 'v4'] 
        else 3000 if row['version'] == 'v2' 
        else None, 
        axis=1
    )
    df['feeTier'] = df['feeTier'].apply(float)

if 'reserve0' not in df.columns:
    df['reserve0'] = pd.to_numeric(df['other'].apply(lambda x: x.get('reserve0', None) if x is not None else None), errors='coerce')

if 'reserve1' not in df.columns:
    df['reserve1'] = pd.to_numeric(df['other'].apply(lambda x: x.get('reserve1', None) if x is not None else None), errors='coerce')

df.set_index('id', inplace=True)
df.head()

Unnamed: 0_level_0,createdAtBlockNumber,token0Price,token1Price,liquidity,totalValueLockedUSD,other,token0_decimals,token0_id,token0_name,token0_symbol,token1_decimals,token1_id,token1_name,token1_symbol,version,block_number,feeTier,reserve0,reserve1
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
0x76b36afc7f1ef4bbf3301b602c9f6ca2ff6c3bd8,13844117.0,1.660335e+17,0.0,934796603.601451,8269.914769,"{'totalSupply': '26538.175287407292876187', 'r...",9.0,0x5d8038644608d1f849ed2c6863a2ea667e53371a,INUGAMI,INUGAMI,18.0,0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2,Wrapped Ether,WETH,v2,22815924.0,3000.0,3.809035e+17,2.294137
0x9fe0f9ef9c17a1afa2c29078ec3323d4cf7a9ddc,17787279.0,2.100182e-09,476149200.0,38571.818436,8269.085571,"{'totalSupply': '37947.331922020551982986', 'r...",18.0,0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2,Wrapped Ether,WETH,18.0,0xcd3b1f5b3bad4ef75f2bc840b005b55f7afbe4aa,X Æ A-12 Musk,LilX,v2,22815924.0,3000.0,1.767659,841669600.0
0x6b8faec9d9e4c5d1b3f74b7640840544d378f6a4,22460490.0,34414340.0,2.905765e-08,9317.689528,8268.901696,"{'totalSupply': '0.282842712474618009', 'reser...",9.0,0xbbb4099798b46a82283ef3b1aa3007cf4c17870f,Tomato Frog,TFROG,18.0,0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2,Wrapped Ether,WETH,v2,22815924.0,3000.0,54661050.0,1.588322
0x537bb3598a20937c4bfdb413c6d46c13cd0a5dd6,20428614.0,183827900000.0,5.4398e-12,639462.487731,8268.738322,"{'totalSupply': '18.345353635185121808', 'rese...",9.0,0x3be7bbe6d61eda1524a904e5dde46a22afeb35e8,COCONUT,COCONUT,18.0,0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2,Wrapped Ether,WETH,v2,22815924.0,3000.0,274170500000.0,1.491452
0xb5b8aee2af5b8d485bac4ed80c71dd8594096dfa,17137011.0,143088400000000.0,6.9e-15,19508999.450781,8267.930282,"{'totalSupply': '577.061521850140341555', 'res...",9.0,0x80b7a77d280a4ef3365cd34316231b7ee6e8b0c5,Pepe Tate,PATE,18.0,0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2,Wrapped Ether,WETH,v2,22815924.0,3000.0,233365800000000.0,1.630921


In [28]:
df_updates.value_counts(['version', 'event'])

version  event          
v2       swap               865
v3       swap               780
v4       swap               190
v3       burn                26
         mint                12
v2       burn                 4
v4       modifyLiquidity      4
v2       mint                 1
Name: count, dtype: int64

In [42]:
for block_number in range(start_block_number, end_block_number):
    # Get all updates for this specific block
    block_updates = df_updates[df_updates.block_number == block_number]
    
    for i, row in block_updates.iterrows():
        
        if row['pool_id'] not in df.index:
            continue
            
        pool_id = row['pool_id']
        
        token0 = safe_literal_eval(row['token0'])
        token1 = safe_literal_eval(row['token1'])
        
        if row['version'] == 'v2':
            # Get token decimals
            token0_decimals = df.loc[pool_id, 'token0_decimals']
            token1_decimals = df.loc[pool_id, 'token1_decimals']
            
            if row['event'] == 'swap':
                # Update reserves: add inputs, subtract outputs, keep in smallest units
                df.at[pool_id, 'reserve0'] = df.loc[pool_id, 'reserve0'] + float(token0['amount0'])
                df.at[pool_id, 'reserve1'] = df.loc[pool_id, 'reserve1'] + float(token1['amount1'])
                
                # Calculate prices: token0Price = token1/token0, adjusted for decimals
                df.at[pool_id, 'token0Price'] = (df.loc[pool_id, 'reserve1'] / df.loc[pool_id, 'reserve0']) * (10 ** (token0_decimals - token1_decimals))
                df.at[pool_id, 'token1Price'] = (df.loc[pool_id, 'reserve0'] / df.loc[pool_id, 'reserve1']) * (10 ** (token1_decimals - token0_decimals))
                
            elif row['event'] == 'mint':
                # Add liquidity to reserves, keep in smallest units
                
                # df.at[pool_id, 'reserve0'] = df.loc[pool_id, 'reserve0'] + float(token0['amount0'])
                # df.at[pool_id, 'reserve1'] = df.loc[pool_id, 'reserve1'] + float(token1['amount1'])
                
                # # Update prices based on new reserves, adjusted for decimals
                # df.at[pool_id, 'token0Price'] = (df.loc[pool_id, 'reserve1'] / df.loc[pool_id, 'reserve0']) * (10 ** (token0_decimals - token1_decimals))
                # df.at[pool_id, 'token1Price'] = (df.loc[pool_id, 'reserve0'] / df.loc[pool_id, 'reserve1']) * (10 ** (token1_decimals - token0_decimals))
                
                # TODO: We need amount0 and amount1, but we don't save them in update.py
                
                pass
                
            elif row['event'] == 'burn':
                # Remove liquidity from reserves, keep in smallest units
                # df.at[pool_id, 'reserve0'] = df.loc[pool_id, 'reserve0'] - float(token0['amount0'])
                # df.at[pool_id, 'reserve1'] = df.loc[pool_id, 'reserve1'] - float(token1['amount1'])
                
                # # Update prices based on new reserves, adjusted for decimals
                # df.at[pool_id, 'token0Price'] = (df.loc[pool_id, 'reserve1'] / df.loc[pool_id, 'reserve0']) * (10 ** (token0_decimals - token1_decimals))
                # df.at[pool_id, 'token1Price'] = (df.loc[pool_id, 'reserve0'] / df.loc[pool_id, 'reserve1']) * (10 ** (token1_decimals - token0_decimals))
                
                # TODO: We need amount0 and amount1, but we don't save them in update.py
                
                pass
            else:
                raise NotImplementedError(f"Event {row['event']} for version {row['version']} not implemented")
                
        elif row['version'] == 'v3':
            # Get token decimals
            token0_decimals = df.loc[pool_id, 'token0_decimals']
            token1_decimals = df.loc[pool_id, 'token1_decimals']
            
            if row['event'] == 'swap':
                # Update pool state with new price and tick
                df.at[pool_id, 'sqrtPriceX96'] = float(row['sqrtPrice'])
                df.at[pool_id, 'tick'] = float(row['tick'])
                
                # Calculate prices: token0Price = (sqrtPriceX96 / 2^96)^2, adjust for decimals
                token0_price = ((float(row['sqrtPrice']) / (2**96)) ** 2) * (10 ** (token0_decimals - token1_decimals))
                df.at[pool_id, 'token0Price'] = token0_price
                df.at[pool_id, 'token1Price'] = 1 / token0_price if token0_price != 0 else 0
                
            elif row['event'] == 'mint':
                pass
                
            elif row['event'] == 'burn':
                pass                
                
            elif row['event'] in ['collect', 'flash']:
                # These events don't affect reserves or prices
                pass
                
            else:
                raise NotImplementedError(f"Event {row['event']} for version {row['version']} not implemented")
                
        elif row['version'] == 'v4':
            # Get token decimals
            token0_decimals = df.loc[pool_id, 'token0_decimals']
            token1_decimals = df.loc[pool_id, 'token1_decimals']
            
            if row['event'] == 'swap':
                # Update pool state with new price and tick
                df.at[pool_id, 'sqrtPriceX96'] = float(row['sqrtPrice'])
                df.at[pool_id, 'tick'] = float(row['tick'])
                
                # Calculate prices: token0Price = (sqrtPriceX96 / 2^96)^2, adjust for decimals
                token0_price = ((float(row['sqrtPrice']) / (2**96)) ** 2) * (10 ** (token0_decimals - token1_decimals))
                df.at[pool_id, 'token0Price'] = token0_price
                df.at[pool_id, 'token1Price'] = 1 / token0_price if token0_price != 0 else 0
                
            elif row['event'] == 'modifyLiquidity':
                # Prices unchanged as modifyLiquidity doesn't affect sqrtPriceX96 or tick
                pass
                
            else:
                raise NotImplementedError(f"Event {row['event']} for version {row['version']} not implemented")
