In [2]:
import os
import re
import pandas as pd
from datetime import datetime

OUTPUT_FILE = 'C:/Users/YuweiCao/Documents/GitHub/Project/Project/etherscan/result'
api_key = "VQAIR728IM4Z8RZKPYBR4ESM5I3WBZK2C1" # my free API key, you can get one at https://etherscan.io/myapikey
base_url = "https://api.etherscan.io/v2/api" # We're using the v2 API 2024/12/12
ADDRESS = "0x5be9a4959308A0D0c7bC0870E319314d8D957dBB" # Address of the contract we want to get the source code of

In [3]:
def highlight_three_records(grouped_df):
    for tx_hash, group in grouped_df:
        if len(group) == 3:
            print(f"⚠️ High Alert: Transaction Hash {tx_hash} contains 3 records:")
            print(group)


def format_number(value):
    if value < 1_000:
        return f"{int(value)}"
    elif value < 1_000_000:
        return f"{value / 1_000:.1f}K"
    elif value < 1_000_000_000:
        return f"{value / 1_000_000:.1f}M"
    else:
        return f"{value / 1_000_000_000:.1f}B"


def unify_values_in_output(output: str, unit: str = "M") -> str:
    def replacer(match):
        value = int(match.group())
        return format_number(value, unit)

    formatted_output = re.sub(r'\b\d+\b', replacer, output)
    return formatted_output


In [4]:
global BASE_TOKENS
BASE_TOKENS = {"USDT", "USDC", "USDE"}
ADDRESS = ADDRESS.lower()

csv_file = 'C:/Users/YuweiCao/Documents/GitHub/Project/Project/etherscan/result/erc20_transfers.csv'
transaction_data = pd.read_csv(csv_file)
# print(transaction_data.head())
transaction_data['dateTime'] = pd.to_datetime(transaction_data['dateTime'])
transaction_data = transaction_data.sort_values(by=['dateTime', 'hash']).reset_index(drop=True)
# make sure the date data is correct

duplicate_hashes = transaction_data[transaction_data.duplicated(subset=['hash'], keep=False)]

highlight_three_records(duplicate_hashes.groupby('hash')) # just a check

output_records = []

for hash_val, group in duplicate_hashes.groupby('hash'):
    base_tokens = group[group['tokenSymbol'].isin(BASE_TOKENS)]
    other_tokens = group[~group['tokenSymbol'].isin(BASE_TOKENS)]
    
    if not base_tokens.empty and not other_tokens.empty:
        if base_tokens['to'].iloc[0] == ADDRESS:
            transaction_type = "SELL"
        else:
            transaction_type = "BUY"

        base_token_info = f"{base_tokens['value'].sum()} {base_tokens['tokenSymbol'].iloc[0]}"
        other_token_info = f"{other_tokens['value'].sum()} {other_tokens['tokenSymbol'].iloc[0]}"

        record = f"{group['timeStamp'].iloc[0]} W {transaction_type} {other_token_info} of {base_token_info} at {group['dateTime'].iloc[0]}"
        output_records.append({
            "formatted_record": record,
            "timeStamp": group['timeStamp'].iloc[0],
            "dateTime": group['dateTime'].iloc[0]
        })

output_df = pd.DataFrame(output_records)

output_df = output_df.sort_values(by='dateTime').reset_index(drop=True)

print("\nFormatted Transactions:")
for record in output_df['formatted_record']:
    print(record)


Formatted Transactions:
1734205847 W BUY 257808363098851238232132 ENA of 250008766782 USDC at 2024-12-14 19:50:47
1734205871 W BUY 128893003367469683383588 ENA of 125003842726 USDC at 2024-12-14 19:51:11
1734206291 W BUY 123253485163948854476800 ENA of 125000000000 USDC at 2024-12-14 19:58:11
1734264011 W BUY 179750061800199429860 AAVE of 64704751697 USDC at 2024-12-15 12:00:11
1734264827 W BUY 72076335871397514311 AAVE of 25930638795 USDC at 2024-12-15 12:13:47
1734303107 W BUY 134216187628345433752477 ONDO of 250000000000 USDC at 2024-12-15 22:51:47


In [5]:
matched_records = []
matched_indices = []  # 用于记录已匹配行的索引

skip_next = False
for i in range(len(transaction_data) - 1):
    if skip_next:
        skip_next = False
        continue

    current_row = transaction_data.iloc[i]
    next_row = transaction_data.iloc[i + 1]

    if ((current_row['to'] == ADDRESS and next_row['from'] == ADDRESS) or
        (current_row['from'] == ADDRESS and next_row['to'] == ADDRESS)):

        if current_row['tokenSymbol'] in BASE_TOKENS or next_row['tokenSymbol'] in BASE_TOKENS:
            if current_row['to'] == ADDRESS:
                transaction_type_1 = "\"SELL\"" 
            else:
                transaction_type_1 = "\"BUY\""

            if next_row['to'] == ADDRESS:
                transaction_type_2 = "\"SELL\"" 
            else:
                transaction_type_2 = "\"BUY\""

            record_1 = f"{current_row['timeStamp']} W {transaction_type_1} {current_row['value']} {current_row['tokenSymbol']} at {current_row['dateTime']}"
            record_2 = f"{next_row['timeStamp']} W {transaction_type_2} {next_row['value']} {next_row['tokenSymbol']} at {next_row['dateTime']}"

            matched_records.append({"formatted_record": record_1, "dateTime": current_row['dateTime']})
            matched_records.append({"formatted_record": record_2, "dateTime": next_row['dateTime']})

            matched_indices.extend([i, i + 1])

            skip_next = True

# 从 transaction_data 中删除已匹配的行
remaining_data = transaction_data.drop(index=matched_indices).reset_index(drop=True)

# 将匹配结果转换为 DataFrame
matched_df = pd.DataFrame(matched_records)

# 合并第一类和第二类数据
combined_df = pd.concat([output_df, matched_df], ignore_index=True)

# 按时间排序
combined_df = combined_df.sort_values(by='dateTime').reset_index(drop=True)

# 输出最终结果
print("\nFinal Combined Transactions (Sorted by Time):")
for record in combined_df['formatted_record']:
    print(record)




Final Combined Transactions (Sorted by Time):
1734203075 W "SELL" 1029373122 USDT at 2024-12-14 19:04:35
1734205847 W BUY 257808363098851238232132 ENA of 250008766782 USDC at 2024-12-14 19:50:47
1734205847 W "BUY" 250008766782 USDC at 2024-12-14 19:50:47
1734205847 W "SELL" 257808363098851238232132 ENA at 2024-12-14 19:50:47
1734205871 W BUY 128893003367469683383588 ENA of 125003842726 USDC at 2024-12-14 19:51:11
1734205871 W "BUY" 125003842726 USDC at 2024-12-14 19:51:11
1734206003 W "SELL" 36999000000 USDT at 2024-12-14 19:53:23
1734206291 W BUY 123253485163948854476800 ENA of 125000000000 USDC at 2024-12-14 19:58:11
1734206291 W "BUY" 125000000000 USDC at 2024-12-14 19:58:11
1734255047 W "SELL" 300000000 USDT at 2024-12-15 09:30:47
1734264011 W "BUY" 64704751697 USDC at 2024-12-15 12:00:11
1734264011 W BUY 179750061800199429860 AAVE of 64704751697 USDC at 2024-12-15 12:00:11
1734264755 W "SELL" 1744000000 USDT at 2024-12-15 12:12:35
1734264827 W BUY 72076335871397514311 AAVE of 259

In [6]:
single_records = []

for i, row in remaining_data.iterrows():
    if row['tokenSymbol'] in BASE_TOKENS:
        if row['from'] == ADDRESS:
            transaction_type = "single SELL"
        elif row['to'] == ADDRESS:
            transaction_type = "single BUY"
    else:
        if row['from'] == ADDRESS:
            transaction_type = "single BUY"
        elif row['to'] == ADDRESS:
            transaction_type = "single SELL"

    record = f"{row['timeStamp']} W {transaction_type} {row['value']} {row['tokenSymbol']} at {row['dateTime']}"
    single_records.append({
        "formatted_record": record,
        "dateTime": row['dateTime'],
        "timeStamp": row['timeStamp'],
        "hash": row['hash']
    })

single_df = pd.DataFrame(single_records)

final_combined_df = pd.concat([output_df, matched_df, single_df], ignore_index=True)

final_combined_df = final_combined_df.sort_values(by='dateTime').reset_index(drop=True)
for record in final_combined_df['formatted_record']:
    print(record)


1734140135 W single BUY 100000000 USDT at 2024-12-14 01:35:35
1734140519 W single BUY 1400000000 USDT at 2024-12-14 01:41:59
1734142559 W single BUY 35925000 USDT at 2024-12-14 02:15:59
1734143207 W single BUY 50765898 USDT at 2024-12-14 02:26:47
1734155339 W single BUY 23068490 USDT at 2024-12-14 05:48:59
1734156323 W single BUY 127030000 USDT at 2024-12-14 06:05:23
1734158243 W single BUY 32527089 USDT at 2024-12-14 06:37:23
1734160163 W single BUY 1400000000 USDT at 2024-12-14 07:09:23
1734163331 W single BUY 583699388 USDC at 2024-12-14 08:02:11
1734163967 W single BUY 574338722 USDT at 2024-12-14 08:12:47
1734166451 W single BUY 1500000000 USDT at 2024-12-14 08:54:11
1734166799 W single SELL 1488000000000000000000 Groyper at 2024-12-14 08:59:59
1734167243 W single SELL 1352000000000000000000 Groyper at 2024-12-14 09:07:23
1734167291 W single SELL 311000000000000000000 Groyper at 2024-12-14 09:08:11
1734167411 W single SELL 2316000000000000000000 Groyper at 2024-12-14 09:10:11
1734