In [5]:
import pandas as pd
from utils import merge_files

In [6]:
def create_interactions(transaction_info):
    # Ensure block_timestamp is in datetime format for sorting
    transaction_info['block_timestamp'] = pd.to_datetime(transaction_info['block_timestamp'])
    
    # Sort by 'to_address' and 'block_timestamp'
    transaction_info = transaction_info.sort_values(by=['to_address', 'block_timestamp'])

    # Create a new column 'contract_interact' which mimics ROW_NUMBER() OVER (PARTITION BY to_address ORDER BY block_timestamp)
    transaction_info['contract_interact'] = transaction_info.groupby('to_address').cumcount() + 1

    # Select relevant columns and rename 'transaction_hash' to 'interact_hash'
    # interactions = transaction_info[['transaction_hash']].rename(columns={'transaction_hash': 'interact_hash'})
    # interactions['contract_interact'] = transaction_info['contract_interact']

    return transaction_info

def time_slice_df(df, num_rows=50, time_col='block_timestamp', sus_col='is_sus', how='last'):
    # Convert 'time_col' to datetime format
    df[time_col] = pd.to_datetime(df[time_col])

    # Sort by 'time_col' in descending order (from last to first)
    df = df.sort_values(by=time_col, ascending=False).reset_index()

    # Find the index of the first row where 'is_sus' == 1
    sus_index = df[df[sus_col] == 1].index[0] if not df[df[sus_col] == 1].empty else 0
    # print(df.iloc[sus_index])

    # Slice the DataFrame starting from the 'sus_index'
    df_sliced = df.iloc[sus_index:sus_index + num_rows]

    return df_sliced


In [9]:
import glob
import os

folder_path = '../dataset/real-time/test_after_attack_preprocessed'  # Change this to your folder path
# Use glob to get all CSV files in the folder
csv_files = glob.glob(f'{folder_path}/*.csv')
prefix = '50_'
# Loop through the CSV files
for file in csv_files:
    # Read each CSV file into a DataFrame
    df = pd.read_csv(file)
    print(f'Processing {file}')
    df_top_50 = time_slice_df(df)
    df_top_50 = create_interactions(df_top_50)

    new_dir = folder_path.replace('../','../'+prefix)
    new_path = file.replace('../','../'+prefix)
    os.makedirs(new_dir, exist_ok=True)
    df_top_50.to_csv(new_path,index=False)

Processing ../dataset/real-time/test_after_attack_preprocessed\after_attack_barley.csv
Processing ../dataset/real-time/test_after_attack_preprocessed\after_attack_chainpaint.csv
Processing ../dataset/real-time/test_after_attack_preprocessed\after_attack_curve.csv
Processing ../dataset/real-time/test_after_attack_preprocessed\after_attack_earningfarm.csv
Processing ../dataset/real-time/test_after_attack_preprocessed\after_attack_gooddollar.csv
Processing ../dataset/real-time/test_after_attack_preprocessed\after_attack_nfttrader.csv
Processing ../dataset/real-time/test_after_attack_preprocessed\after_attack_peapods.csv
Processing ../dataset/real-time/test_after_attack_preprocessed\after_attack_rugged.csv
Processing ../dataset/real-time/test_after_attack_preprocessed\after_attack_sturdy.csv


In [8]:

# import glob
# import os

# folder_path = '../50_dataset/real-time/train_preprocessed'  # Change this to your folder path

# # Use glob to get all CSV files in the folder
# csv_files = glob.glob(f'{folder_path}/*.csv')
# prefix = '50_'
# # Loop through the CSV files
# for file in csv_files:
#     # Read each CSV file into a DataFrame
#     df = pd.read_csv(file)
#     # print(f'Processing {file}')
#     # df_top_50 = time_slice_df(df)

#     # new_dir = folder_path.replace('../','../'+prefix)
#     # new_path = file.replace('../','../'+prefix)
#     # os.makedirs(new_dir, exist_ok=True)
#     new_contract_interact = create_interactions(df)
#     new_contract_interact.to_csv(file,index=False)