In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import math
import duckdb
import numpy as np
# Display all columns
pd.set_option('display.max_columns', None)

# Set max column width to a large value
pd.set_option('display.max_colwidth', None)

# Create a new DuckDB database file
con = duckdb.connect('my_database.duckdb')

In [2]:
import pandas as pd
import re

# Read the CSV file line by line
file_path = 'rdntweth_decoded.csv'
lines = []

with open(file_path, 'r') as file:
    lines = file.readlines()

# Initialize dictionaries to store lines for each transaction type
transaction_dict = {}

# Function to parse fields without combining text between single quotes
def parse_line(line):
    fields = line.split(',')
    fields = [field.strip() for field in fields]
    return fields

# Iterate over each line to categorize it based on the 6th field
for line in lines:
    fields = parse_line(line.strip())
    if len(fields) > 5:  # Ensure there are at least 6 fields after cleaning
        transaction_type = fields[5]
        if transaction_type not in transaction_dict:
            transaction_dict[transaction_type] = []
        transaction_dict[transaction_type].append(fields)

# Create separate DataFrames for each transaction type
dataframes = {}
for transaction_type, data in transaction_dict.items():
    # Determine the number of columns for this transaction type
    num_columns = max(len(fields) for fields in data)
    # Pad the data with empty strings to ensure all rows have the same number of columns
    padded_data = [fields + [''] * (num_columns - len(fields)) for fields in data]
    # Create DataFrame
    df = pd.DataFrame(padded_data, columns=[f'Field_{i+1}' for i in range(num_columns)])
    dataframes[transaction_type] = df

# Print out the names of the created DataFrames
for transaction_type in dataframes.keys():
    print(f'Created DataFrame: {transaction_type}')

SwapFeePercentageChanged = dataframes['SwapFeePercentageChanged']


Created DataFrame: SwapFeePercentageChanged
Created DataFrame: ProtocolFeePercentageCacheUpdated
Created DataFrame: Transfer
Created DataFrame: Approval


In [3]:
SwapFeePercentageChanged

Unnamed: 0,Field_1,Field_2,Field_3,Field_4,Field_5,Field_6,Field_7,Field_8,Field_9,Field_10,Field_11,Field_12,Field_13,Field_14,Field_15,Field_16
0,18466412,29,52,0x4e9d7051cfb7dc6c3dd69ea9ce8fd2bcde17da490dbb37e294fdf73656e11bf7,0xcF7b51ce5755513d4bE016b0e28D6EDEffa1d52a,SwapFeePercentageChanged,5000000000000000,,,,,,,,,


In [4]:
selected_columns = {'Field_1': 'block_number', 'Field_3': 'log_index', 'Field_4': 'transaction_hash', 'Field_6':'event', 'Field_7': 'fee'}
SwapFeePercentageChanged = SwapFeePercentageChanged[list(selected_columns.keys())].rename(columns=selected_columns)

In [5]:
SwapFeePercentageChanged

Unnamed: 0,block_number,log_index,transaction_hash,event,fee
0,18466412,52,0x4e9d7051cfb7dc6c3dd69ea9ce8fd2bcde17da490dbb37e294fdf73656e11bf7,SwapFeePercentageChanged,5000000000000000


In [6]:
con.execute('CREATE TABLE rdntweth_fee_change AS SELECT * FROM SwapFeePercentageChanged').fetchdf()

# Commit the changes
con.commit()

# Close the connection
con.close()