# Standardizing Event Data for DeRisk

DeRisk currently works with event data in a raw format where relevant data (like the user, token, amount, etc.) is saved in a dictionary or list-like structure in one of the columns. To better extract information from the database, it is helpful to have a unified data structure where all relevant pieces of information are saved in separate columns. This allows for easy querying of all events of a given type (deposit, withdrawal, liquidation) for a given user, or for a lending protocol.

The following steps outline the process to take a sample of events and convert them to a standardized format that can be used to store information about any type of event and for any lending protocol.


In [1]:
#import libraries
import pandas as pd 

#display settings
pd.set_option('display.max_colwidth', None)

## 1.  Load event data from the paraquet file

In [2]:
def load_protocol_data(protocols: list[str]) -> pd.DataFrame:
    """
    Load data from Google Storage for the specified protocols and combine them into a single DataFrame.
    
    Parameters:
    protocols (list[str]): A list of protocol names to load data for.
    
    Returns:
    pd.DataFrame: A DataFrame containing the combined data from all specified protocols.
    
    """    
    combined_protocols_df = pd.DataFrame()
    
    for protocol in protocols:
        # Read from google storage
        url = f"https://storage.googleapis.com/derisk-persistent-state/{protocol}_data/events_sample.parquet"      
        print(f"Processing {protocol} from Google Storage...")
        df_protocol = pd.read_parquet(url)
        
        # Read from SQL DB
        ## Uncomment the following code block and comment the above three lines to load data from a local database
        # connection = src.db.establish_connection()
        # query = f"SELECT * FROM {protocol}_data" # Ensure table name is correct
        # print(f"Processing {protocol} from local database...")
        # df_protocol = pd.read_sql(query, con = connection)
        # connection.close()
        
        df_protocol['Protocol'] = protocol
        combined_protocols_df = pd.concat([combined_protocols_df, df_protocol], ignore_index=True)
    return combined_protocols_df

# List of protocols
PROTOCOLS = ['zklend', 'nostra_alpha', 'nostra_mainnet', 'hashstack_v0', 'hashstack_v1']

# Load the data
events = load_protocol_data(PROTOCOLS)
print(f"Combined dataframe shape: {events.shape}")

Processing zklend from Google Storage...
Processing nostra_alpha from Google Storage...
Processing nostra_mainnet from Google Storage...
Processing hashstack_v0 from Google Storage...
Processing hashstack_v1 from Google Storage...
Combined dataframe shape: (305033, 11)


In [None]:
events.head()

## 2. Define a function to transform each row
Define a function that decodes the keys and data columns, converts the hexadecimal strings to integers, and structures the information into a standardized format.

### For zklend Protocol, these are the events and their relevant data.
- for accumulators_sync event: token is in data[0], lending_accumulator is in data[1], debt_accumulator is in data[2]
- for deposit event: user is in data[0], token is in data[1], face_amount is in data[2]
- for collateral_enabled event: user is in data[0], token is in data[1]
- for collateral_disabled event: user is in data[0], token is in data[1]
- for withdrawal event: user is in data[0], token is in data[1], face_amount is in data[2]
- for borrowing event: user is in data[0], token is in data[1], raw_amount is in data[2], face_amount is in data[3]
- for repayment event: user is in data[1], token is in data[2], raw_amount is in data[3]
- for liquidation event: user is in data[1], debt_token is in data[2], debt_raw_amnt is in data[3], collateral_token is in data[5], collateral_face_amnt is in data[6]

In [None]:
# Define a function to decode bytes, convert to list, and extract specific elements based on the event
def hex_to_int(hex_str: str) -> int:
        return int(hex_str, 16)

def standardize_zklend_event(row: pd.Series) -> pd.Series:
    try:
        # Decode the bytes to a string and convert the string to an actual list
        data_str = row['data'].decode('utf-8')
        data_list = eval(data_str)
        
        if row['key_name'] == 'Deposit':
            return pd.Series({
                'user': data_list[0],
                'token': data_list[1],
                'face_amount': hex_to_int(data_list[2])
            })
        elif row['key_name'] == 'Withdrawal':
            return pd.Series({
                'user': data_list[0],
                'token': data_list[1],
                'face_amount': hex_to_int(data_list[2])
            })
        elif row['key_name'] == 'Liquidation':
            return pd.Series({
                'user': data_list[1],
                'debt_token': data_list[2],
                'debt_raw_amount': hex_to_int(data_list[3]),
                'collateral_token': data_list[5],
                'collateral_face_amount': data_list[6]
            })
        elif row['key_name'] == 'CollateralEnabled':
            return pd.Series({
                'user': data_list[0],
                'token': data_list[1]
            })
        elif row['key_name'] == 'CollateralDisabled':
            return pd.Series({
                'user': data_list[0],
                'token': data_list[1]
            })
        elif row['key_name'] == 'Borrowing':
            return pd.Series({
                'user': data_list[0],
                'token': data_list[1],
                'raw_amount': hex_to_int(data_list[2]),
                'face_amount': hex_to_int(data_list[3])
            })
        elif row['key_name'] == 'Repayment':
            return pd.Series({
                'user': data_list[1],
                'token': data_list[2],
                'raw_amount': hex_to_int(data_list[3])
            })
        elif row['key_name'] == 'AccumulatorsSync':
            return pd.Series({
                'token': data_list[0],
                'lending_accumulator': data_list[1],
                'debt_accumulator': data_list[2]
            })
        else:
            raise ValueError(f"Unrecognized key_name: {row['key_name']}") # Raising an error for unrecognized key_name
    except Exception as e:
        # Handle any parsing errors
        raise ValueError(f"Failed to extract data from row: {str(e)}") from e

# Filter the dataset for 'zklend' protocol
zklend_events = events[events['Protocol'] == 'zklend']

# Apply the extraction function to each row
standardized_zklend_events = zklend_events.apply(standardize_zklend_event, axis=1)

# Concatenate the extracted elements with the original dataframe
standardized_zklend_events = pd.concat([zklend_events[['block_hash', 'block_number', 'transaction_hash', 'timestamp', 'key_name']], standardized_zklend_events], axis=1)

# display the dataframe
standardized_zklend_events.head()

# Uncomment the following below to Save the resulting dataframe to a new CSV file
# output_file_path = 'standardized_zklend_events.csv'
# standardized_zklend_events.to_csv(output_file_path, index=False)

### For nostra_alpha protocol, the events are mapped on tokens, here are the events.
- non_interest_bearing_collateral_mint_event: deposit
- non_interest_bearing_collateral_burn_event: withdrawal
- interest_bearing_collateral_mint_event: deposit
- interest_bearing_collateral_burn_event: withdrawal
- debt_mint_event: borrowing
- debt_burn_event : repayment


In [None]:
# Define the mapping for token addresses and their types
NOSTRA_ALPHA_TOKENS_TO_TOKEN_TYPES: dict[str, str] =  { 
    '0x0553cea5d1dc0e0157ffcd36a51a0ced717efdadd5ef1b4644352bb45bd35453': 'non_interest_bearing_collateral',
    '0x047e794d7c49c49fd2104a724cfa69a92c5a4b50a5753163802617394e973833': 'non_interest_bearing_collateral',
    '0x003cd2066f3c8b4677741b39db13acebba843bbbaa73d657412102ab4fd98601': 'non_interest_bearing_collateral',
    '0x04403e420521e7a4ca0dc5192af81ca0bb36de343564a9495e11c8d9ba6e9d17': 'non_interest_bearing_collateral',
    '0x06b59e2a746e141f90ec8b6e88e695265567ab3bdcf27059b4a15c89b0b7bd53': 'non_interest_bearing_collateral',
    '0x070f8a4fcd75190661ca09a7300b7c93fab93971b67ea712c664d7948a8a54c6': 'interest_bearing_collateral',
    '0x029959a546dda754dc823a7b8aa65862c5825faeaaf7938741d8ca6bfdc69e4e': 'interest_bearing_collateral',
    '0x055ba2baf189b98c59f6951a584a3a7d7d6ff2c4ef88639794e739557e1876f0': 'interest_bearing_collateral',
    '0x01ac55cabf2b79cf39b17ba0b43540a64205781c4b7850e881014aea6f89be58': 'interest_bearing_collateral',
    '0x00687b5d9e591844169bc6ad7d7256c4867a10cee6599625b9d78ea17a7caef9': 'interest_bearing_collateral',
    '0x040b091cb020d91f4a4b34396946b4d4e2a450dbd9410432ebdbfe10e55ee5e5': 'debt',
    '0x03b6058a9f6029b519bc72b2cc31bcb93ca704d0ab79fec2ae5d43f79ac07f7a': 'debt',
    '0x065c6c7119b738247583286021ea05acc6417aa86d391dcdda21843c1fc6e9c6': 'debt',
    '0x0362b4455f5f4cc108a5a1ab1fd2cc6c4f0c70597abb541a99cf2734435ec9cb': 'debt',
    '0x075b0d87aca8dee25df35cdc39a82b406168fa23a76fc3f03abbfdc6620bb6d7': 'debt'
}

NOSTRA_ALPHA_INTEREST_RATE_MODEL_ADDRESS: str = '0x03d39f7248fb2bfb960275746470f7fb470317350ad8656249ec66067559e892'
# Define the event type mapping based on the token type and key name
NOSTRA_ALPHA_TOKEN_TYPES_TO_EVENT_TYPES: dict[str, str] = {
    'non_interest_bearing_collateral': {
        'Mint': 'deposit',
        'Burn': 'withdrawal'
    },
    'interest_bearing_collateral': {
        'Mint': 'deposit',
        'Burn': 'withdrawal'
    },
    'debt': {
        'Mint': 'borrowing',
        'Burn': 'repayment'
    }
}

def get_nostra_alpha_event_type(token_address: str, key_name: str) ->str:
    # Special case for InterestStateUpdated and the specific interest rate model address
    if token_address == NOSTRA_ALPHA_INTEREST_RATE_MODEL_ADDRESS and key_name == 'InterestStateUpdated':
        return 'interest_rate_updated'

    token_type = NOSTRA_ALPHA_TOKENS_TO_TOKEN_TYPES.get(token_address)
    if token_type:
        event_type = NOSTRA_ALPHA_TOKEN_TYPES_TO_EVENT_TYPES.get(token_type, {}).get(key_name)
        if event_type:
            return event_type
    # If no match is found, raise an error
    raise ValueError(f"Unrecognized event type for token address {token_address} and key name {key_name}")

def standardize_nostra_alpha_event(row: pd.Series) -> pd.Series:
    try:
        # Decode the bytes to a string and convert the string to an actual list
        data_str = row['data'].decode('utf-8')
        data_list = eval(data_str)
        event_type = get_nostra_alpha_event_type(row['from_address'], row['key_name'])
        
        if row['key_name'] in ['Mint', 'Burn']:
            return pd.Series({
                'user': data_list[0],
                'token': row['from_address'],
                'face_amount': hex_to_int(data_list[1]),
                'event_type': event_type
            })
        elif row['key_name'] == 'InterestStateUpdated':
            return pd.Series({
                'debt_token': data_list[0],
                'lending_rate': data_list[1],
                'borrow_rate': data_list[3],
                'lend_index': data_list[5],
                'borrow_index': data_list[7],
                'event_type': 'interest_rate_updated'
                
            })
        else:
            raise ValueError(f"Unrecognized key_name: {row['key_name']}") # Raising an error for unrecognized key_name
    except Exception as e:
        # Handle any parsing errors
        raise ValueError(f"Failed to extract data from row: {str(e)}") from e

# Filter the dataset for 'nostra_alpha' protocol
nostra_alpha_events = events[events['Protocol'] == 'nostra_alpha']

# Apply the extraction function to each row
standardized_nostra_alpha_events = nostra_alpha_events.apply(standardize_nostra_alpha_event, axis=1)

# Concatenate the extracted elements with the original dataframe
standardized_nostra_alpha_events = pd.concat([nostra_alpha_events[['block_hash', 'block_number', 'transaction_hash', 'timestamp', 'key_name']], standardized_nostra_alpha_events], axis=1)

# Show a sample of the resulting dataframe
standardized_nostra_alpha_events.head()

# Uncomment the following to save the resulting dataframe to a new CSV file
# output_file_path = 'standardized_nostra_alpha_events.csv'
# standard_nalpha.to_csv(output_file_path, index=False)



### For nostra_mainnet, the same extraction will be applied as with nostra_alpha.

In [None]:
# Define mapping for their token and types
NOSTRA_MAINNET_TOKENS_TO_TOKEN_TYPES: dict[str, str] = {
    '0x01fecadfe7cda2487c66291f2970a629be8eecdcb006ba4e71d1428c2b7605c7': 'interest_bearing_deposit',
    '0x002fc2d4b41cc1f03d185e6681cbd40cced61915d4891517a042658d61cba3b1': 'interest_bearing_deposit',
    '0x0360f9786a6595137f84f2d6931aaec09ceec476a94a98dcad2bb092c6c06701': 'interest_bearing_deposit',
    '0x022ccca3a16c9ef0df7d56cbdccd8c4a6f98356dfd11abc61a112483b242db90': 'interest_bearing_deposit',
    '0x0735d0f09a4e8bf8a17005fa35061b5957dcaa56889fc75df9e94530ff6991ea': 'interest_bearing_deposit',
    '0x00ca44c79a77bcb186f8cdd1a0cd222cc258bebc3bec29a0a020ba20fdca40e9': 'interest_bearing_deposit',
    '0x0507eb06dd372cb5885d3aaf18b980c41cd3cd4691cfd3a820339a6c0cec2674': 'interest_bearing_deposit',
    '0x026c5994c2462770bbf940552c5824fb0e0920e2a8a5ce1180042da1b3e489db': 'interest_bearing_deposit',
    '0x078a40c85846e3303bf7982289ca7def68297d4b609d5f588208ac553cff3a18': 'interest_bearing_deposit',
    '0x044debfe17e4d9a5a1e226dabaf286e72c9cc36abbe71c5b847e669da4503893': 'non_interest_bearing_collateral',
    '0x05f296e1b9f4cf1ab452c218e72e02a8713cee98921dad2d3b5706235e128ee4': 'non_interest_bearing_collateral',
    '0x0514bd7ee8c97d4286bd481c54aa0793e43edbfb7e1ab9784c4b30469dcf9313': 'non_interest_bearing_collateral',
    '0x005c4676bcb21454659479b3cd0129884d914df9c9b922c1c649696d2e058d70': 'non_interest_bearing_collateral',
    '0x036b68238f3a90639d062669fdec08c4d0bdd09826b1b6d24ef49de6d8141eaa': 'non_interest_bearing_collateral',
    '0x05eb6de9c7461b3270d029f00046c8a10d27d4f4a4c931a4ea9769c72ef4edbb': 'non_interest_bearing_collateral',
    '0x02530a305dd3d92aad5cf97e373a3d07577f6c859337fb0444b9e851ee4a2dd4': 'non_interest_bearing_collateral',
    '0x040f5a6b7a6d3c472c12ca31ae6250b462c6d35bbdae17bd52f6c6ca065e30cf': 'non_interest_bearing_collateral',
    '0x0142af5b6c97f02cac9c91be1ea9895d855c5842825cb2180673796e54d73dc5': 'non_interest_bearing_collateral',
    '0x057146f6409deb4c9fa12866915dd952aa07c1eb2752e451d7f3b042086bdeb8': 'interest_bearing_collateral',
    '0x05dcd26c25d9d8fd9fc860038dcb6e4d835e524eb8a85213a8cda5b7fff845f6': 'interest_bearing_collateral',
    '0x0453c4c996f1047d9370f824d68145bd5e7ce12d00437140ad02181e1d11dc83': 'interest_bearing_collateral', 
    '0x04f18ffc850cdfa223a530d7246d3c6fc12a5969e0aa5d4a88f470f5fe6c46e9': 'interest_bearing_collateral',
    '0x05b7d301fa769274f20e89222169c0fad4d846c366440afc160aafadd6f88f0c': 'interest_bearing_collateral',
    '0x009377fdde350e01e0397820ea83ed3b4f05df30bfb8cf8055d62cafa1b2106a': 'interest_bearing_collateral',
    '0x0739760bce37f89b6c1e6b1198bb8dc7166b8cf21509032894f912c9d5de9cbd': 'interest_bearing_collateral',
    '0x07c2e1e733f28daa23e78be3a4f6c724c0ab06af65f6a95b5e0545215f1abc1b': 'interest_bearing_collateral',
    '0x067a34ff63ec38d0ccb2817c6d3f01e8b0c4792c77845feb43571092dcf5ebb5': 'interest_bearing_collateral',
    '0x00ba3037d968790ac486f70acaa9a1cab10cf5843bb85c986624b4d0e5a82e74': 'debt',
    '0x063d69ae657bd2f40337c39bf35a870ac27ddf91e6623c2f52529db4c1619a51': 'debt',
    '0x024e9b0d6bc79e111e6872bb1ada2a874c25712cf08dfc5bcf0de008a7cca55f': 'debt',
    '0x066037c083c33330a8460a65e4748ceec275bbf5f28aa71b686cbc0010e12597': 'debt',
    '0x0491480f21299223b9ce770f23a2c383437f9fbf57abc2ac952e9af8cdb12c97': 'debt',
    '0x0348cc417fc877a7868a66510e8e0d0f3f351f5e6b0886a86b652fcb30a3d1fb': 'debt',
    '0x035778d24792bbebcf7651146896df5f787641af9e2a3db06480a637fbc9fff8': 'debt',
    '0x001258eae3eae5002125bebf062d611a772e8aea3a1879b64a19f363ebd00947': 'debt',
    '0x0292be6baee291a148006db984f200dbdb34b12fb2136c70bfe88649c12d934b': 'debt',
}
NOSTRA_MAINNET_INTEREST_RATE_MODEL_ADDRESS: str = '0x059a943ca214c10234b9a3b61c558ac20c005127d183b86a99a8f3c60a08b4ff'

# Define the event type mapping based on the token type and key name
NOSTRA_MAINNET_TOKEN_TYPES_TO_EVENT_TYPES: dict[str, str] = {
    'non_interest_bearing_collateral': {
        'Mint': 'deposit',
        'Burn': 'withdrawal'
    },
    'interest_bearing_collateral': {
        'Mint': 'deposit',
        'Burn': 'withdrawal'
    },
    'debt': {
        'Mint': 'borrowing',
        'Burn': 'repayment'
    }
}
def get_nostra_mainnet_event_type(token_address: str, key_name: str) ->str:
    # Special case for InterestStateUpdated and the specific interest rate model address
    if token_address == NOSTRA_MAINNET_INTEREST_RATE_MODEL_ADDRESS and key_name == 'InterestStateUpdated':
        return 'interest_rate_updated'

    token_type = NOSTRA_MAINNET_TOKENS_TO_TOKEN_TYPES.get(token_address)
    if token_type:
        event_type = NOSTRA_MAINNET_TOKEN_TYPES_TO_EVENT_TYPES.get(token_type, {}).get(key_name)
        if event_type:
            return event_type
    # If no match is found, raise an error
    raise ValueError(f"Unrecognized event type for token address {token_address} and key name {key_name}")

def standardize_nostra_mainnet_event(row: pd.Series) -> pd.Series:
    try:
        # Decode the bytes to a string and convert the string to an actual list
        data_str = row['data'].decode('utf-8')
        data_list = eval(data_str)
        event_type = get_nostra_mainnet_event_type(row['from_address'], row['key_name'])
        
        if row['key_name'] in ['Mint', 'Burn']:
            return pd.Series({
                'user': data_list[0],
                'token': row['from_address'],
                'face_amount': hex_to_int(data_list[1]),
                'event_type': event_type
            })
        elif row['key_name'] == 'InterestStateUpdated':
            return pd.Series({
                'debt_token': data_list[0],
                'lending_rate': data_list[1],
                'borrow_rate': data_list[3],
                'lend_index': data_list[5],
                'borrow_index': data_list[7],
                'event_type': 'interest_rate_updated'
                
            })
        else:
            raise ValueError(f"Unrecognized key_name: {row['key_name']}") # Raising an error for unrecognized key_name
    except Exception as e:
        # Handle any parsing errors
        raise ValueError(f"Failed to extract data from row: {str(e)}") from e
# Filter the dataset for 'nostra_mainnet' protocol
nostra_mainnet_events = events[events['Protocol'] == 'nostra_mainnet']

# Apply the extraction function to each row
standardized_nostra_mainnet_events = nostra_mainnet_events.apply(standardize_nostra_mainnet_event, axis=1)

# Concatenate the extracted elements with the original dataframe
standardized_nostra_mainnet_events = pd.concat([nostra_mainnet_events[['block_hash', 'block_number', 'transaction_hash', 'timestamp', 'key_name']], standardized_nostra_mainnet_events], axis=1)

# Display the resulting dataframe
standardized_nostra_mainnet_events.head()

# Uncomment the following below to Save the resulting dataframe to a new CSV file
# output_file_path = 'extracted_standard_mainnet_events.csv'
# standard_mainnet.to_csv(output_file_path, index=False)

### For hashstack_v0 protocol these are the events and their relevant data:
- for new loan: loan_id is in data[0]int, user is in data[1], debt_token is in data[2], debt_face_amount is in data[4]int, borrowed_collateral_token is in data[6],borrowed_collateral_face_amount is in data[7]int, debt_category is in data[10]int, [`collateral`] `market` is in data[14], original_collateral_face_amount is in data[17]
                
- for loan_withdrawal: id (loan_record): Located in data[0], user: Located in data[1], debt_token: Located in data[2], debt_face_amount: Located in data[4], borrowed_collateral_token: Located in data[6], borrowed_collateral_face_amount : Located in data[7], debt_category: Located in data[10]

- for loan_interest_deducted: original_collateral_token: Located in data[0], original_collateral_face_amount: Located in data[3], loan_id (inside loan_id): Located in data[11].
  
- for loan_swap: old_loan_id : Located in data[0], old_user : Located in data[1], new_loan_id: Located in data[14], new_user: Located in data[15], new_debt_token: Located in data[16], new_debt_face_amount : Located in data[18], new_borrowed_collateral_token : Located in data[20], new_borrowed_collateral_face_amount : Located in data[21], new_debt_category : Located in data[24]
  
- for collateral_added: original_collateral_token: Located in data[0], original_collateral_face_amount: Located in data[3], loan_id: Located in data[9]
  
- for loan_repaid: loan_id: Located in data[0], user: Located in data[1], debt_token: Located in data[2], borrowed_collateral_token: Located in data[6], borrowed_collateral_face_amount: Located in data[7]

- for collateral_withdrawal: original_collateral_token: Located in data[0], original_collateral_face_amount: Located in data[3], loan_id: Located in data[9]

- for liquidated_event: loan_id is in data[0]int, user is in data[1], debt_token is in data[2],  borrowed_collateral_token is in data[6], debt_category is in data[10]int
  

In [5]:
def standardize_hashstack_v0_event(row: pd.Series) -> pd.Series:
    try:
        # Decode the bytes to a string and convert the string to an actual list
        data_str = row['data'].decode('utf-8')
        data_list = eval(data_str)
        
        if row['key_name'] == 'new_loan':
            return pd.Series({
                'loan_id': hex_to_int(data_list[0]),
                'user': data_list[1],
                'debt_token': data_list[2],
                'debt_face_amount': hex_to_int(data_list[4]),
                'borrowed_collateral_token': data_list[6],
                'borrowed_collateral_face_amount': hex_to_int(data_list[7]),
                'debt_category': hex_to_int(data_list[10]),
                'original_collateral_token': data_list[14],
                'original_collateral_face_amount': data_list[17],
            })
        elif row['key_name'] == 'loan_withdrawal':
            return pd.Series({
                'loan_id': data_list[0],
                'user': data_list[1],
                'debt_token': data_list[2],
                'debt_face_amount': hex_to_int(data_list[4]),
                'borrowed_collateral_token': data_list[6],
                'borrowed_collateral_face_amount': hex_to_int(data_list[7]),
                'debt_category': data_list[10],
            })
        elif row['key_name'] == 'loan_interest_deducted':
            return pd.Series({
                'original_collateral_token': data_list[0],
                'original_collateral_face_amount': hex_to_int(data_list[3]),
                'loan_id': data_list[11],
            })
        elif row['key_name'] == 'loan_swap':
            return pd.Series({
                'old_loan_id': data_list[0],
                'old_user': data_list[1],
                'new_loan_id': data_list[14],
                'new_user': data_list[15],
                'new_debt_token': data_list[16],
                'new_debt_face_amount': hex_to_int(data_list[18]),
                'new_borrowed_collateral_token': data_list[20],
                'new_borrowed_collateral_face_amount ': hex_to_int(data_list[21]),
                'new_debt_category': data_list[24],
            })
        elif row['key_name'] == 'collateral_added':
            return pd.Series({
                'original_collateral_token': data_list[0],
                'original_collateral_face_amount': hex_to_int(data_list[3]),
                'loan_id': data_list[9],
            })
        elif row['key_name'] == 'loan_repaid':
            return pd.Series({
                'loan_id': data_list[0],
                'user': data_list[1],
                'debt_token': data_list[2],
                'borrowed_collateral_token': data_list[6],
                'borrowed_collateral_face_amount': hex_to_int(data_list[7]),
                'debt_category': data_list[10],
            })
        elif row['key_name'] == 'collateral_withdrawal':
            return pd.Series({
                'original_collateral_token': data_list[0],
                'original_collateral_face_amount': hex_to_int(data_list[3]),
                'loan_id': data_list[9],
            })
        elif row['key_name'] == 'liquidated':
            return pd.Series({
                'loan_id': data_list[0],
                'user': data_list[1],
                'debt_token': data_list[2],
                'borrowed_collateral_token': data_list[6],
                'borrowed_collateral_face_amount': hex_to_int(data_list[7]),
                'debt_category': data_list[10],
            })
        else:
            raise ValueError(f"Unrecognized key_name: {row['key_name']}") # Raising an error for unrecognized key_name
    except Exception as e:
        # Handle any parsing errors
        raise ValueError(f"Failed to extract data from row: {str(e)}") from e

# Filter the dataset for 'hashstack_v0' protocol
hashstack_v0_events = events[events['Protocol'] == 'hashstack_v0']

# Apply the extraction function to each row
standardized_hashstack_v0_events = hashstack_v0_events.apply(standardize_hashstack_v0_event, axis=1)

# Concatenate the extracted elements with the original dataframe
standardized_hashstack_v0_events = pd.concat([hashstack_v0_events[['block_hash', 'block_number', 'transaction_hash', 'timestamp', 'key_name']], standardized_hashstack_v0_events], axis=1)

# Display the resulting dataframe
standardized_hashstack_v0_events.head()

# Uncomment the following below to Save the resulting dataframe to a new CSV file
# output_file_path = 'standardized_hashstack_v0_events.csv'
# standardized_hashstack_v0_events.to_csv(output_file_path, index=False)

Unnamed: 0,block_hash,block_number,transaction_hash,timestamp,key_name,borrowed_collateral_face_amount,borrowed_collateral_token,debt_category,debt_face_amount,debt_token,...,new_debt_category,new_debt_face_amount,new_debt_token,new_loan_id,new_user,old_loan_id,old_user,original_collateral_face_amount,original_collateral_token,user
300000,0x07c8f77b0b9bb2b29fd3d3c729717b444813ab48a8e92b4b38d868b783e6cd8f,21506,0x05bb51553067fce9bed47b575dd2e7af6ed460a2ddcbd21f72680fb966b1bb53,1678096181,new_loan,20979000000000000,0x49d36570d4e46f48e99674bd3fcc84644ddd6b96f7c741b1562b82f9e004dc7,1,21000000000000000,0x49d36570d4e46f48e99674bd3fcc84644ddd6b96f7c741b1562b82f9e004dc7,...,,,,,,,,0x0,0x27147114878000,0x5b55db55f5884856860e63f3595b2ec6b2c9555f3f507b4ca728d8e427b7864
300001,0x0657249d2b3dff56c0715d8dbe01521098ac96ac4d20172e227900a3fdb049ed,21535,0x0103ce71c2dfbcec09b93f9f50a08255dc6fc17982f5e50d93360addf7d24ac3,1678122298,new_loan,17982000000000000,0x49d36570d4e46f48e99674bd3fcc84644ddd6b96f7c741b1562b82f9e004dc7,1,18000000000000000,0x49d36570d4e46f48e99674bd3fcc84644ddd6b96f7c741b1562b82f9e004dc7,...,,,,,,,,0x0,0xe4e1c0,0x5b55db55f5884856860e63f3595b2ec6b2c9555f3f507b4ca728d8e427b7864
300002,0x0657249d2b3dff56c0715d8dbe01521098ac96ac4d20172e227900a3fdb049ed,21535,0x07f08e47dab885ba7fd7c5e0313443e42ccf8d53c5ae52324d6ccc6167b25c43,1678122298,new_loan,29970000,0x53c91253bc9682c04929ca02ed00b3e423f6710d2ee7e0d5ebb06f3ecf368a8,1,30000000,0x53c91253bc9682c04929ca02ed00b3e423f6710d2ee7e0d5ebb06f3ecf368a8,...,,,,,,,,0x0,0x2386f26fc10000,0x5b55db55f5884856860e63f3595b2ec6b2c9555f3f507b4ca728d8e427b7864
300003,0x03951b3242291068d1456d3ff35c0f95c60c6d185d37f6c4d1400214122e56bb,21602,0x0448cfe386be1e6919ed40f7a602704043c9fb7ffad924c6785fcc991b5a109c,1678201433,new_loan,29970000,0x68f5c6a61780768455de69077e07e89787839bf8166decfbf92b645209c0fb8,2,30000000,0x68f5c6a61780768455de69077e07e89787839bf8166decfbf92b645209c0fb8,...,,,,,,,,0x2386f26fc10000,0x49d36570d4e46f48e99674bd3fcc84644ddd6b96f7c741b1562b82f9e004dc7,0x1e307316999c4830ada67b495d03246ab205e77853bb117da7b555122a33bc7
300004,0x0455454837ade622189fc590fcb911f9e6e08ea1669a09a3ec22ddb4773ab9df,21603,0x0606b7eb810306e25ad5eba69a56e3d6fe356021c1716cd1231957b0fd760066,1678203696,new_loan,19980000000000000,0x49d36570d4e46f48e99674bd3fcc84644ddd6b96f7c741b1562b82f9e004dc7,2,20000000000000000,0x49d36570d4e46f48e99674bd3fcc84644ddd6b96f7c741b1562b82f9e004dc7,...,,,,,,,,0x2386f26fc10000,0x49d36570d4e46f48e99674bd3fcc84644ddd6b96f7c741b1562b82f9e004dc7,0x74061d07a0fbd8ffc376dc4f593c69854c51b6c2fe5596d88452e9efdd76864


### for hashstack_v1 protocol, these are the following events and their relevant data:
- for new_loan event: loan_id is in data[0]int, user is in data[1], debt_token is in data[2], debt_face_amount is in data[3]int, borrowed_collateral_token is in data[5], borrowed_collateral_face_amount is in data[6], original_collateral_token is in data[13], original_collateral_face_amount is in data[14]int, 

- for loan_spent: old_loan_id is in data[0], old_user is in data[1], new_loan_id is located in data[12], new_user is located in data[13], new_debt_token is in data[14], new_debt_face_amount is in data[15], new_borrowed_collateral_token is in data[17], new_borrowed_collateral_face_amount is in data[18],

- for collateral_added: loan_id is in data[0]int, collateral_token is in data[1], amount is in data[2], created_at is in data[4], amount_added is in data[5], timestamp is in data[6]

- for loan_repaid: old_loan_id is in data[0], old_user is in data[1], new_loan_id is located in data[12], new_user is located in data[13], new_debt_token is in data[14], new_debt_face_amount is in data[15], new_borrowed_collateral_token is in data[17], new_borrowed_collateral_face_amount is in data[18], new_collateral_loan_id is in data[24], new_original_collateral_token is in data[25], new_original_collateral_face_amount is in data[26]

- for loan_transfered: loan_id is in data[0]int, old_user is in data[1], new_user is in data[2],

In [6]:
def standardize_hashstack_v1_event(row: pd.Series) -> pd.Series:
    try:
        # Decode the bytes to a string and convert the string to an actual list
        data_str = row['data'].decode('utf-8')
        data_list = eval(data_str)
        
        if row['key_name'] == 'new_loan':
            return pd.Series({
                'loan_id': data_list[0],
                'user': data_list[1],
                'debt_face_amount': hex_to_int(data_list[2]),
                'loan_amount': hex_to_int(data_list[3]),
                'borrowed_collateral_token': data_list[5],
                'borrowed_collateral_face_amount': hex_to_int(data_list[6]),
                'original_collateral_token': data_list[13],
                'original_collateral_face_amount': hex_to_int(data_list[14]),
            })
        elif row['key_name'] == 'loan_spent':
            return pd.Series({
                'old_loan_id': hex_to_int(data_list[0]),
                'old_user': data_list[1],
                'new_loan_id': data_list[12],
                'new_user': data_list[13],
                'new_debt_token': data_list[14],
                'new_debt_face_amount': hex_to_int(data_list[15]),
                'new_borrowed_collateral_token': data_list[17],
                'new_borrowed_collateral_face_amount': hex_to_int(data_list[18]),
            })
        elif row['key_name'] == 'collateral_added':
            return pd.Series({
                'loan_id': hex_to_int(data_list[0]),
                'original_collateral_token': data_list[1],
                'original_collateral_face_amount': hex_to_int(data_list[2]),
            })
        elif row['key_name'] == 'loan_repaid':
            return pd.Series({
                'old_loan_id': data_list[0],
                'old_user': data_list[1],
                'new_loan_id': data_list[12],
                'new_user': data_list[13],
                'new_debt_token': data_list[14],
                'new_debt_face_amount': hex_to_int(data_list[15]),
                'new_borrowed_collateral_token': data_list[17],
                'new_borrowed_collateral_face_amount': hex_to_int(data_list[18]),
                'new_collateral_loan_id': data_list[24],
                'new_original_collateral_token': data_list[25],
                'new_original_collateral_face_amount': hex_to_int(data_list[26]),
            })
        elif row['key_name'] == 'loan_transferred':
            return pd.Series({
                'loan_id': hex_to_int(data_list[0]),
                'old_user': data_list[1],
                'new_user': data_list[2],
            })
        else:
            raise ValueError(f"Unrecognized key_name: {row['key_name']}") # Raising an error for unrecognized key_name
    except Exception as e:
        # Handle any parsing errors
        raise ValueError(f"Failed to extract data from row: {str(e)}") from e

# Filter the dataset for 'hashstack_v1' protocol
hashstack_v1_events = events[events['Protocol'] == 'hashstack_v1']

# Apply the extraction function to each row
standardized_hashstack_v1_events = hashstack_v1_events.apply(standardize_hashstack_v1_event, axis=1)

# Concatenate the extracted elements with the original dataframe
standardized_hashstack_v1_events = pd.concat([hashstack_v1_events[['block_hash', 'block_number', 'transaction_hash', 'timestamp', 'key_name']], standardized_hashstack_v1_events], axis=1)

# Display the resulting dataframe
standardized_hashstack_v1_events.head()

# Uncomment the following below to Save the resulting dataframe to a new CSV file
# output_file_path = 'standardized_hashstack_v1_events.csv'
# standardized_hashstack_v1_events.to_csv(output_file_path, index=False)

Unnamed: 0,block_hash,block_number,transaction_hash,timestamp,key_name,borrowed_collateral_face_amount,borrowed_collateral_token,debt_face_amount,loan_amount,loan_id,...,new_debt_token,new_loan_id,new_original_collateral_face_amount,new_original_collateral_token,new_user,old_loan_id,old_user,original_collateral_face_amount,original_collateral_token,user
300780,0x068b04151068a7acc1f72c9b2023e176037cb6bce171a5e3abaf6fb961f4e1a2,273262,0x0676e3ae850f6fae3da1269b9c390f74c26eb1948ca2bd1e16a47fa9399ca84d,1695977183,new_loan,4500000.0,0x68f5c6a61780768455de69077e07e89787839bf8166decfbf92b645209c0fb8,529181207143938386779516858606565548513594142494163173604607576167969653191,4500000.0,0x1,...,,,,,,,,2000000.0,0x5fa6cc6185eab4b0264a4134e2d4e74be11205351c7c91196cb27d5d97f8d21,0x251e01d33f75076732f5c7c671d1e6918f3cd0f36a4dedcc35e642297f30d2a
300781,0x06c263286120af7afb020f535cd53e17da319df8435058d76450bdcd3ebaecb3,273300,0x06ebb5723abf3878214b66bd4bbe7087280707da7231b074a1bcfba8b5ec78d7,1695977763,new_loan,2400000000000000.0,0x49d36570d4e46f48e99674bd3fcc84644ddd6b96f7c741b1562b82f9e004dc7,875470120220037648359206439407140676983229848858749540447605330658467957235,2400000000000000.0,0x2,...,,,,,,,,2000000.0,0x5fa6cc6185eab4b0264a4134e2d4e74be11205351c7c91196cb27d5d97f8d21,0x251e01d33f75076732f5c7c671d1e6918f3cd0f36a4dedcc35e642297f30d2a
300782,0x07d0f1faf834b7906e4de54c440d0aa35112bc1a243c12e63e4f8b6006e857df,273372,0x0051f75ef1e08f70d1c8efe7866384d026aa0ca092ded8bd1c903aac0478b990,1695978978,loan_spent,,,,,,...,0x12b8185e237dd0340340faeb3351dbe53f8a42f5a9bf974ddf90ced56e301c7,0x1,,,0x251e01d33f75076732f5c7c671d1e6918f3cd0f36a4dedcc35e642297f30d2a,1.0,0x251e01d33f75076732f5c7c671d1e6918f3cd0f36a4dedcc35e642297f30d2a,,,
300783,0x06f650deb66bb602f02bd87f29aa7c28909b126f9d7c738f8a3ad200baa888c4,273387,0x01bf0af4c1922e2b8cc523e6e088d72885701e9416058a3e6bcc2bcf7f0bd5d0,1695979201,collateral_added,,,,,1,...,,,,,,,,3999998.0,0x5fa6cc6185eab4b0264a4134e2d4e74be11205351c7c91196cb27d5d97f8d21,
300784,0x06384cf4b11477d048623b43e45970c5a4a10653706547b76e92096f7d421774,273396,0x0389888c9ef7dfe1491a3d15afa79b565bc52dccb01d512693f786f6c61fdc0f,1695979342,collateral_added,,,,,1,...,,,,,,,,4999998.0,0x5fa6cc6185eab4b0264a4134e2d4e74be11205351c7c91196cb27d5d97f8d21,
