In [None]:
# Import Dependencies
import pandas as pd
from sql_actions import PostInsert
from datetime import datetime, timedelta

In [None]:
import getpass

In [None]:
# Get Username and Password
user = getpass.getuser()
pw = getpass.getpass()

# Import Data
---
This section we will import the CSVs into Pandas DataFrames to do data carpentry.

# Contract Data

For the contracts data, not much data wrangling is involved. Only the ordering and changing of datatypes will be involved.

In [None]:
# Load contracts data
contracts_df = pd.read_csv('data/FRAX-USD.csv')

# Check data
contracts_df.head()

In [None]:
# Order columns
con_order = ["address","base","code","created_at","dynamic","factory",
             "id","name","namespace","updated_at"]

# Order columns for data insert
contracts_df = contracts_df[con_order]

# Unix epoch start time
start = datetime(1970, 1, 1)  

# Convert to string
contracts_df[["address","code"]] = contracts_df[["address","code"]].astype(str)
contracts_df.updated_at = pd.to_datetime(contracts_df.updated_at, unit='s', utc=True)
contracts_df.created_at = pd.to_datetime(contracts_df.created_at, unit='s', utc=True)

In [None]:
# Check Data
contracts_df.head()

In [None]:
# Read Insert SQL
frax_price_insert = open('insert_sql/contracts_insert.sql','r').read()

In [None]:
# Insert Data 
PostInsert.fast_insert_data(contracts_df, contract_insert, 50000, user, pw) # Uncomment to rerun.

# Transaction Data
---

In [None]:
# Load transaction data --> Rerun if memory issues occur
transactions_df = pd.read_csv('data/frax-ethereum-transactions.csv')

In [None]:
# Create transaction_id column
transactions_df['transaction_id'] = transactions_df.index + 1

In [None]:
# Order Transaction data
transaction_order = ["transaction_id","access_list","block_hash","block_number","block_time","data",
                     "from","gas_limit","gas_price","gas_used","hash","index",
                     "max_fee_per_gas","max_priority_fee_per_gas","nonce",
                     "priority_fee_per_gas","success","to","type","value"]

transactions_df = transactions_df[transaction_order]

In [None]:
# Create block/transaction-id data
trans_block = transactions_df[['transaction_id','block_hash']]

In [None]:
# Drop block_hash column
transaction_final = transactions_df.drop(['block_hash'],axis=1)

In [None]:
# Read Insert SQL
trans_insert = open('insert_sql/transactions_insert.sql','r').read()

In [None]:
# Insert Transaction data
PostInsert.fast_insert_data(transaction_final, trans_insert, 50000, user, pw) # Uncomment to rerun.

# Log Data
---

In [None]:
# Load Logs Data --> Rerun if memory issues occur
logs_df = pd.read_csv('data/frax-ethereum-logs.csv')

In [None]:
# Create log_id column
logs_df['log_id'] = logs_df.index + 1

In [None]:
# Order columns
log_order = ["log_id","block_hash","block_time","contract_address","data","index","topic1" 
            ,"topic2","topic3","topic4","tx_hash","tx_index"]

logs_df = logs_df[log_order]

In [None]:
# Create block/log-id data
logs_block = logs_df[['log_id','block_hash']]

In [None]:
# Drop block-hash column
logs_final = logs_df.drop(['block_hash'], axis=1)

In [None]:
# Read Insert SQL
logs_insert = open('insert_sql/logs_insert.sql','r').read()

In [None]:
# Insert Logs data
PostInsert.fast_insert_data(logs_final, logs_insert, 50000, user, pw) # Uncomment to rerun.

# Traces Data
---

In [None]:
# Reading in seperately due to size  --> Rerun if memory issues occur
traces_v1_df = pd.read_csv('data/frax-ethereum-traces.csv')

In [None]:
traces_v2_df = pd.read_csv('data/frax-ethereum-traces-v2.csv')  # --> Rerun if memory issues occur

In [None]:
# Combine dataframes
traces_df = pd.concat([traces_v1_df, traces_v2_df]).reset_index(drop=True)

In [None]:
# Create trace_id column
traces_df['trace_id'] = traces_df.index + 1

In [None]:
# Column Order
traces_order =["trace_id", "block_hash", "block_number", "block_time", "call_type"  
                ,"error","from","gas","gas_used","input","output","sub_traces" 
                ,"success","to","tx_hash","tx_index","tx_success","type","value"]

traces_df = traces_df[traces_order]

In [None]:
# Create block/trace-id data
traces_block = traces_df[['trace_id','block_hash']]

In [None]:
# Drop block-hash column
traces_final = traces_df.drop(['block_hash'], axis=1)

In [None]:
# Read Insert SQL
traces_insert = open('insert_sql/traces_insert.sql','r').read()

In [None]:
# Insert traces data
PostInsert.fast_insert_data(traces_final, traces_insert, 50000, user, pw) # Uncomment to rerun.

# Main Staging
---

In [None]:
# Combine dataframes
main_df = pd.concat([trans_block['block_hash'], 
                     logs_block["block_hash"], 
                     traces_block["block_hash"]]).reset_index(drop=True)

In [None]:
# Drop duplicate hash values
main_df.drop_duplicates(inplace=True)

In [None]:
main_df = pd.merge(main_df, trans_block, on="block_hash", how='outer')

In [None]:
main_df = pd.merge(main_df, traces_block, on='block_hash', how='outer')

In [None]:
main_df = pd.merge(main_df, logs_block, on='block_hash', how='outer')

In [None]:
# Check data
main_df.info()

In [None]:
# Read Insert SQL
main_insert = open('insert_sql/main_insert.sql','r').read()

In [None]:
# Insert data
PostInsert.fast_insert_data(main_df, main_insert, 50000, user, pw) # Uncomment to rerun.

# Date table
---

In [None]:
# Load Date Data --> Rerun if memory issues occur
date_df = pd.read_csv('data/date_table.csv')

In [None]:
'''# Create date_id column
logs_df['date_id'] = date_df.index + 1'''

In [None]:
frax_price_insert = open('insert_sql/date_table.sql','r').read()

In [None]:
# Insert date data
PostInsert.fast_insert_data(date_table_df, date_table_insert, 50000, user, pw) # Uncomment to rerun.

# FRAX Price Data
---

In [None]:
# Load FRAX Price Data --> Rerun if memory issues occur
frax_price_df = pd.read_csv('data/frax_price.csv')

In [None]:
frax_price_insert = open('insert_sql/frax_price.sql','r').read()

In [None]:
# Insert frax price data
PostInsert.fast_insert_data(frax_price_df, frax_price_insert, 50000, user, pw) # Uncomment to rerun.

In [None]:
'''
# FXS Price Data
---

# Load FXS Price Data --> Rerun if memory issues occur
fxs_price_df = pd.read_csv('data/fxs_price.csv')

fxs_price_insert = open('insert_sql/fxs_price.sql','r').read()

# Insert fxs price data
# PostInsert.fast_insert_data(fxs_price_df, fxs_price_insert, 50000, user, pw) # Uncomment to rerun.'''