In [9]:
import sys
from pathlib import Path
sys.path.append(str(Path.cwd().parent / "src"))  # Bootstrap path to allow `services` import
from services.env import setup_project
setup_project()
# # Add the project root or src folder to the Python path
# ROOT_DIR = Path.cwd().parent
# # Path(__file__).resolve().parents[1]  # Go up from notebooks/
# sys.path.append(str(ROOT_DIR / "src"))

✅ Project path added to sys.path: d:\portfolio_repo\arb_airdrop\src


In [10]:
#API connection and dependency install
import os
from flipside import Flipside
from dotenv import find_dotenv, load_dotenv
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from services.config import DATA_DIR, API_KEY

# Confirm and define dotenv path
# dotenv_path = find_dotenv()
# print(f"Loading .env from: {dotenv_path}")

# #load env data from path
# load_dotenv(dotenv_path, override=True)

# #store API_KEY from dotenv file
# import os
# API_KEY = os.getenv("FLIP_API_KEY")
# # print(f"API Key loaded: {API_KEY}")

#instantiate flipside client
flipside = Flipside(API_KEY, "https://api-v2.flipsidecrypto.xyz")

In [None]:
# import os

# os.makedirs('data/processed/transfers_batches', exist_ok=True)


Load airdrop recipient wallets as a list. Query wallet transfers in batches from ez_token_transfers

In [None]:
recipients_df = pd.read_csv(DATA_DIR / "arb_drop_data")

batch_size = 1000
wallet_list = recipients_df['user_address'].unique()

batches_dir = DATA_DIR / "transfers_batches"
batches_dir.mkdir(parents=True, exist_ok=True)

for i in range(0, len(wallet_list), batch_size):
    batch = wallet_list[i:i+batch_size].tolist()
    batch_str = "','".join(batch)
    
    sql = f"""
        SELECT block_number, block_timestamp, contract_address, from_address, to_address, amount
        FROM arbitrum.core.ez_token_transfers
        WHERE (from_address IN ('{batch_str}') OR to_address IN ('{batch_str}'))
        AND contract_address = LOWER('0x912CE59144191C1204E64559FE8253a0e49E6548')
    """
    
    wallet_transfers = flipside.query(sql)
    
    columns = wallet_transfers.columns
    rows = wallet_transfers.rows  
    wallet_transfers_df = pd.DataFrame(rows, columns=columns)
    batch_path = batches_dir / f"transfers_batch{i}.parquet"
    wallet_transfers_df.to_parquet(batch_path, index=False)
    print(f"Saved batch {i} to {i+batch_size} with {len(wallet_transfers_df)} rows")

Stitch together files for transfers

In [8]:
import glob

In [12]:
import os
import glob

base_dir = os.path.join(os.getcwd(),'transfers_batches')
pattern = os.path.join(base_dir, 'transfers_batch_*.parquet')

batch_files = glob.glob(pattern)
# print(f"Found files: {batch_files}")


In [14]:
# batch_files = glob.glob("data/processed/transfers_batches/transfers_batch_*.parquet")
transfers_dfs = [pd.read_parquet(f) for f in batch_files]
transfers_df = pd.concat(transfers_dfs, ignore_index=True)
print(f"Combined all transfer batches: {len(transfers_df)} rows")


Combined all transfer batches: 1213667 rows


In [None]:
# Load to to data folder
transfers_df.to_csv(DATA_DIR / "transfers_data.csv", index=False)
transfers_df = pd.read_csv(DATA_DIR / "transfers_data.csv")

In [11]:
recipients_df = pd.read_csv(DATA_DIR/"arb_drop_data")
#clean up needed col headers for merge
recipients_df = recipients_df.rename(columns={
    'user_address': 'wallet_address',
    'block_timestamp': 'claim_time',
    'amount': 'claimed_amount'
})
print(recipients_df.columns)

Index(['blockchain', 'airdrop', 'action_type', 'tx_hash', 'claim_time',
       'contract_address', 'wallet_address', 'claimed_amount', 'amount_usd',
       '__row_index', 'block_date'],
      dtype='object')


Merge transfers and recipients, split sent and recieved transactions to get full scope of air drop wallet balance

In [32]:
#reading in previous data to avoid rerunning transfer files

# Identify all recipient addresses for quick lookup
recipient_addresses = set(recipients_df['wallet_address'])

# Separate inbound and outbound transactions involving any airdrop recipient
inbound_df = transfers_df[transfers_df['to_address'].isin(recipient_addresses)].copy()
inbound_df['wallet'] = inbound_df['to_address']        # the recipient wallet
inbound_df['delta'] = inbound_df['amount']     # positive inflow

outbound_df = transfers_df[transfers_df['from_address'].isin(recipient_addresses)].copy()
outbound_df['wallet'] = outbound_df['from_address']    # the sender wallet
outbound_df['delta'] = -outbound_df['amount']  # negative outflow

# Combine inbound and outbound, then attach recipient info (claim time, amount)
merged_df = pd.concat([inbound_df, outbound_df], ignore_index=True)
merged_df = merged_df.merge(recipients_df, left_on='wallet', right_on='wallet_address', how='inner')

#converion to avoid type error
merged_df['claim_time'] = pd.to_datetime(merged_df['claim_time'])
merged_df['block_timestamp'] = pd.to_datetime(merged_df['block_timestamp'])
merged_df['block_timestamp'] = pd.to_datetime(merged_df['block_timestamp'], utc=True)
merged_df['claim_time'] = pd.to_datetime(merged_df['claim_time'], utc=True)

# Calculate the 30-day cutoff date for each transaction (relative to that wallet's claim time)
merged_df['day_30'] = merged_df['claim_time'] + pd.Timedelta(days=30)

#select relevant cols only
merged_df = merged_df[['wallet','block_timestamp','from_address','to_address','amount','delta','claim_time','day_30']]

# print("Merged transactions:", merged_df.shape)
# display(merged_df[['wallet','block_timestamp','from_address','to_address','amount','delta','claim_time','day_30']].head(5))


In [None]:
merged_df.to_csv(DATA_DIR/"transfers_with_claim_info.csv", index=False)

In [None]:
merged_df = pd.read_csv(DATA_DIR/"transfers_with_claim_info.csv")

Filter for 30 days

In [None]:
filtered_df = merged_df[
    (merged_df['block_timestamp'] >= merged_df['claim_time']) & 
    (merged_df['block_timestamp'] <= merged_df['day_30'])
].copy()

filtered_df['net_delta'] = merged_df['delta'] 

filtered_df['block_timestamp'] = pd.to_datetime(filtered_df['block_timestamp'], utc=True)

# Convert timestamps to date (daily buckets)
filtered_df['date'] = filtered_df['block_timestamp'].dt.normalize()  # or .dt.floor('D')

filtered_df.to_csv(DATA_DIR/"transfers_within_30days.csv", index=False)

In [49]:
display(filtered_df.head())

Unnamed: 0.1,Unnamed: 0,wallet,block_timestamp,from_address,to_address,amount,delta,claim_time,day_30,net_delta,date
4,4,0xc7e69ea5d4d6ea1f5cbeb97dbd6c20a8efd98ab1,2023-03-23 22:01:52+00:00,0x67a24ce4321ab3af51c2d0a4801c3e111d88c9d9,0xc7e69ea5d4d6ea1f5cbeb97dbd6c20a8efd98ab1,875.0,875.0,2023-03-23 22:01:52+00:00,2023-04-22 22:01:52+00:00,875.0,2023-03-23 00:00:00+00:00
5,5,0xfbb18a7fa16f4cb03316fff1827022a11628df4b,2023-03-23 22:10:48+00:00,0x67a24ce4321ab3af51c2d0a4801c3e111d88c9d9,0xfbb18a7fa16f4cb03316fff1827022a11628df4b,875.0,875.0,2023-03-23 22:10:48+00:00,2023-04-22 22:10:48+00:00,875.0,2023-03-23 00:00:00+00:00
6,6,0x2f5f679f7188cd9b8deb2741dc23f067b44e1518,2023-03-23 22:10:35+00:00,0x67a24ce4321ab3af51c2d0a4801c3e111d88c9d9,0x2f5f679f7188cd9b8deb2741dc23f067b44e1518,3500.0,3500.0,2023-03-23 22:10:35+00:00,2023-04-22 22:10:35+00:00,3500.0,2023-03-23 00:00:00+00:00
7,7,0x0a03b0449f726fca74b37c8e6d8266bfc3feccc8,2023-03-23 22:12:26+00:00,0x67a24ce4321ab3af51c2d0a4801c3e111d88c9d9,0x0a03b0449f726fca74b37c8e6d8266bfc3feccc8,1500.0,1500.0,2023-03-23 22:12:26+00:00,2023-04-22 22:12:26+00:00,1500.0,2023-03-23 00:00:00+00:00
8,8,0x216990612f9bc39e4e2ab5e5e4774111ba5e6aeb,2023-03-23 22:02:16+00:00,0x67a24ce4321ab3af51c2d0a4801c3e111d88c9d9,0x216990612f9bc39e4e2ab5e5e4774111ba5e6aeb,1125.0,1125.0,2023-03-23 22:02:16+00:00,2023-04-22 22:02:16+00:00,1125.0,2023-03-23 00:00:00+00:00


Daily Wallet Balances

In [None]:

# Compute total net delta per wallet per day
daily_flow = filtered_df.groupby(['wallet','date'])['net_delta'].sum().reset_index()

# Compute rolling balance for each wallet over time (cumulative sum of daily deltas)
daily_flow['cumulative_balance'] = daily_flow.groupby('wallet')['net_delta'].cumsum()

# print("Daily flow records:", daily_flow.shape)
display(daily_flow.head(10))
# Optionally, ensure each wallet has 30 days of data (fill missing days with 0 delta if needed)


Unnamed: 0,wallet,date,net_delta,cumulative_balance
0,0x00000000009a41862f3b2b0c688b7c0d1940511e,2023-03-23 00:00:00+00:00,0.0,0.0
1,0x0000000000dfd67ffd6c24251348f7c4f933cab4,2023-03-23 00:00:00+00:00,0.0,0.0
2,0x0000000000e189dd664b9ab08a33c4839953852c,2023-03-23 00:00:00+00:00,0.0,0.0
3,0x000000000279ef217428b1c3906ec8124784b70f,2023-03-23 00:00:00+00:00,0.0,0.0
4,0x0000000009572a244a6c2d06ffe7be30e3bd2aec,2023-03-23 00:00:00+00:00,0.0,0.0
5,0x0000000009572a244a6c2d06ffe7be30e3bd2aec,2023-04-05 00:00:00+00:00,10.0,10.0
6,0x000000001786891880872aa0895d9175f99dc9f5,2023-03-23 00:00:00+00:00,0.0,0.0
7,0x00000000747b9910b5d295b231429d47060c3995,2023-03-23 00:00:00+00:00,0.0,0.0
8,0x000000009fca059dda53d07589298ea6842bd8f9,2023-03-23 00:00:00+00:00,0.0,0.0
9,0x00000000e8334445e02d819eafd75addf2133a5f,2023-03-23 00:00:00+00:00,0.0,0.0


In [None]:
# daily_balances.head(200)

daily_flow.to_csv(DATA_DIR/"daily_balances.csv", index=False)

30 Day Transfers, and Wallet Snapshot

In [None]:
# Get the final balance at or before day 30 for each wallet
final_balances = daily_flow.groupby('wallet')['cumulative_balance'].last().reset_index()
final_balances.rename(columns={'cumulative_balance': 'cumulative_balance_day_30'}, inplace=True)

# Merge with recipients to get claimed_amount and compute retention ratio
final_df = final_balances.merge(recipients_df[['wallet_address','claimed_amount']], left_on='wallet', right_on='wallet_address', how='left')
final_df['balance_retention_ratio'] = final_df['cumulative_balance_day_30'] / final_df['claimed_amount']

final_df.to_csv(DATA_DIR/"wallet_30_day_balance_snapshot.csv", index=False)

# print(final_df.info())


QC/EDA work

In [44]:
print(final_df.columns)

Index(['wallet', 'cumulative_balance_day_30', 'wallet_address',
       'claimed_amount', 'balance_retention_ratio'],
      dtype='object')


In [45]:
display(final_df.head())

Unnamed: 0,wallet,cumulative_balance_day_30,wallet_address,claimed_amount,balance_retention_ratio
0,0x00000000009a41862f3b2b0c688b7c0d1940511e,0.0,0x00000000009a41862f3b2b0c688b7c0d1940511e,4250,0.0
1,0x0000000000dfd67ffd6c24251348f7c4f933cab4,0.0,0x0000000000dfd67ffd6c24251348f7c4f933cab4,1750,0.0
2,0x0000000000e189dd664b9ab08a33c4839953852c,0.0,0x0000000000e189dd664b9ab08a33c4839953852c,2250,0.0
3,0x000000000279ef217428b1c3906ec8124784b70f,0.0,0x000000000279ef217428b1c3906ec8124784b70f,3250,0.0
4,0x0000000009572a244a6c2d06ffe7be30e3bd2aec,10.0,0x0000000009572a244a6c2d06ffe7be30e3bd2aec,625,0.016


In [None]:
display(latest_balance.head())

Unnamed: 0,wallet_address,block_timestamp,delta,cumulative_balance,claim_time,claimed_amount,day_30
37,0x00000000009a41862f3b2b0c688b7c0d1940511e,2023-03-23 14:51:03+00:00,0.0,0.0,2023-03-23 13:38:43+00:00,4250,2023-04-22 13:38:43+00:00
41,0x0000000000dfd67ffd6c24251348f7c4f933cab4,2023-03-23 13:28:56+00:00,-1750.0,0.0,2023-03-23 13:28:55+00:00,1750,2023-04-22 13:28:55+00:00
43,0x0000000000e189dd664b9ab08a33c4839953852c,2023-03-23 13:26:55+00:00,-2250.0,0.0,2023-03-23 13:26:41+00:00,2250,2023-04-22 13:26:41+00:00
45,0x000000000279ef217428b1c3906ec8124784b70f,2023-03-23 13:28:01+00:00,-3250.0,0.0,2023-03-23 13:27:58+00:00,3250,2023-04-22 13:27:58+00:00
48,0x0000000009572a244a6c2d06ffe7be30e3bd2aec,2023-04-05 07:13:32+00:00,10.0,10.0,2023-03-23 18:41:59+00:00,625,2023-04-22 18:41:59+00:00


In [None]:
display(final_df.head())

Unnamed: 0,wallet_address,claimed_amount,cumulative_balance,block_timestamp
37,0x00000000009a41862f3b2b0c688b7c0d1940511e,4250,0.0,2023-03-23 14:51:03+00:00
41,0x0000000000dfd67ffd6c24251348f7c4f933cab4,1750,0.0,2023-03-23 13:28:56+00:00
43,0x0000000000e189dd664b9ab08a33c4839953852c,2250,0.0,2023-03-23 13:26:55+00:00
45,0x000000000279ef217428b1c3906ec8124784b70f,3250,0.0,2023-03-23 13:28:01+00:00
48,0x0000000009572a244a6c2d06ffe7be30e3bd2aec,625,10.0,2023-04-05 07:13:32+00:00


In [None]:
display(merged_df.head())

Unnamed: 0,wallet_address,block_timestamp,delta,cumulative_balance,claim_time,claimed_amount,day_30
0,0x00000000009a41862f3b2b0c688b7c0d1940511e,2023-03-21 23:34:22+00:00,0.0,0.0,2023-03-23 13:38:43+00:00,4250,2023-04-22 13:38:43+00:00
1,0x00000000009a41862f3b2b0c688b7c0d1940511e,2023-03-21 23:34:23+00:00,0.0,0.0,2023-03-23 13:38:43+00:00,4250,2023-04-22 13:38:43+00:00
2,0x00000000009a41862f3b2b0c688b7c0d1940511e,2023-03-21 23:34:24+00:00,0.0,0.0,2023-03-23 13:38:43+00:00,4250,2023-04-22 13:38:43+00:00
3,0x00000000009a41862f3b2b0c688b7c0d1940511e,2023-03-22 21:24:45+00:00,0.0,0.0,2023-03-23 13:38:43+00:00,4250,2023-04-22 13:38:43+00:00
4,0x00000000009a41862f3b2b0c688b7c0d1940511e,2023-03-22 21:24:46+00:00,0.0,0.0,2023-03-23 13:38:43+00:00,4250,2023-04-22 13:38:43+00:00


In [None]:
transfers_df = pd.read_csv(DATA_DIR/"transfers_data.csv")
display(transfers_df.head())

Unnamed: 0.1,Unnamed: 0,block_number,block_timestamp,contract_address,from_address,to_address,amount,__row_index
0,0,226688053,2024-06-28T18:32:32.000Z,0x912ce59144191c1204e64559fe8253a0e49e6548,0x9eecf6e29f78c120567f7dcc28e3450872957af9,0xbffc62c0d48b8ab01d1a561e533c0f59256e6a00,0.32,0
1,1,226710648,2024-06-28T20:06:40.000Z,0x912ce59144191c1204e64559fe8253a0e49e6548,0x9eecf6e29f78c120567f7dcc28e3450872957af9,0xa9f5fc8c95e762537764fb5bf7f7e7dbde5f69ba,0.32,1
2,2,124716453,2023-08-25T04:53:41.000Z,0x912ce59144191c1204e64559fe8253a0e49e6548,0xb0f6ca40411360c03d41c5ffc5f179b8403cdcf8,0xf9a94f3c17f6fa72bb55d010211253c51fdc5332,7.334514,2
3,3,105837351,2023-06-28T16:54:23.000Z,0x912ce59144191c1204e64559fe8253a0e49e6548,0x1ebb73f5f47bcc3d7dc1dabf7284875e3ae40e07,0x6bdd1ad85ed3c9c2530d372dd8c37abdc5a22905,213.806204,3
4,4,105834958,2023-06-28T16:44:26.000Z,0x912ce59144191c1204e64559fe8253a0e49e6548,0x4e3cf9f44af72eb6726921ffbf3488074e24835a,0x1ebb73f5f47bcc3d7dc1dabf7284875e3ae40e07,300.616083,4


In [9]:
display(recipients_df.head())

Unnamed: 0,blockchain,airdrop,action_type,tx_hash,block_timestamp,contract_address,user_address,amount,amount_usd,__row_index,block_date
0,arbitrum,arbitrum,claim,0x9f41ea6bfcbe6d972ff3645bd2530a2ace6d61b1eee2...,2023-03-28 23:57:35,0x912ce59144191c1204e64559fe8253a0e49e6548,0x90fd36be4761a951ca9e9171c17fcfaa741b7da0,1125,1340.765842,0,2023-03-28
1,arbitrum,arbitrum,claim,0x64eb0a513100b0a5148fb702424b1fa5534e1ae3c954...,2023-03-28 03:40:54,0x912ce59144191c1204e64559fe8253a0e49e6548,0xd37cbdb6e64ad8872800188d2934675723c6a046,1750,1999.893927,1,2023-03-28
2,arbitrum,arbitrum,claim,0x7ebcc011b46a7e7566999a84ffd9144dcbb92509b571...,2023-03-28 12:01:17,0x912ce59144191c1204e64559fe8253a0e49e6548,0x3250dabb584f7fea1bafaff6000ffbbd2f419a15,1500,1693.045551,2,2023-03-28
3,arbitrum,arbitrum,claim,0x4e0b97bc90e577a461c3803c4c36c3b5c60f016cd9ab...,2023-03-28 03:28:22,0x912ce59144191c1204e64559fe8253a0e49e6548,0xc551bd0275f187ce742f23dafcb9ef31ee82b836,875,999.946963,3,2023-03-28
4,arbitrum,arbitrum,claim,0x70d4e5f53bac3e1ce8bd2e4f8867dd33a91847cdcb2b...,2023-03-28 03:42:52,0x912ce59144191c1204e64559fe8253a0e49e6548,0x8793bd0ca22eaac2088afd56858a929ed4d16359,1125,1285.646096,4,2023-03-28
