# Ethereum transactions analysis

### Read the eth_transactions.json files

In [1]:
import os
import json

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
PATH = '../data/'

In [3]:
pd.set_option('display.max_columns', 100, 'display.max_rows', 100, 'display.max_colwidth', 100, 'display.float_format', lambda x: '%.f' % x)

In [4]:
file_dir = os.listdir(PATH)
file_list = [os.path.join(PATH, file) for file in file_dir if file.endswith('parquet')]

In [5]:
df_raw = pd.concat([pd.read_parquet(file) for file in file_list])

In [6]:
df_raw.shape

(5493838, 20)

In [7]:
df_raw.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5493838 entries, 0 to 2791715
Data columns (total 20 columns):
 #   Column                Dtype         
---  ------                -----         
 0   hash                  object        
 1   blockHash             object        
 2   blockNumber           int64         
 3   from                  object        
 4   gas                   int64         
 5   gasPrice              int64         
 6   input                 object        
 7   nonce                 int64         
 8   r                     object        
 9   s                     object        
 10  to                    object        
 11  transactionIndex      int64         
 12  type                  int64         
 13  v                     int64         
 14  value                 float64       
 15  accessList            object        
 16  chainId               float64       
 17  maxFeePerGas          float64       
 18  maxPriorityFeePerGas  float64       
 19  

### Data fields
- `hash` - Hash of the transaction
- `blockHash` - Hash of the block.
- `blockNumber` - Block number.
- `from` - Address of the sender
- `gas` - Gas provided by the sender
- `gasPrice` - Gas price provided by the sender in Wei
- `input` - The data sent along with the transaction
- `nonce` - The number of transactions made by the sender prior to this one
- `r` - ECDSA signature r
- `s` - ECDSA signature s
- `to` - Address of the receiver,
- `transactionIndex` - Integer of the transactions index position in the block.
- `type` - Might be transaction type (unable to find this in Alchemy doc but the value looks the same as `transaction_type` in bigquery public dataset)
- `v` - ECDSA recovery id
- `value` - Value transferred in Wei
- `accessList` - Not sure
- `chainId` - Value used in replay-protected transaction signing as introduced by EIP-155.
- `maxFeePerGas` - Refer to https://docs.alchemy.com/alchemy/guides/eip-1559/maxpriorityfeepergas-vs-maxfeepergas
- `maxPriorityFeePerGas` - Refer to https://docs.alchemy.com/alchemy/guides/eip-1559/maxpriorityfeepergas-vs-maxfeepergas
- `block_timestamp` - Timestamp of the block

References:  
The "bigquery-public-data.crypto_ethereum.transactions" column description  
https://ethereum.org/en/developers/docs/apis/json-rpc/  
https://docs.alchemy.com/alchemy/apis/ethereum/eth-gettransactionbyhash  


In [9]:
df_raw.query('hash == "0xfedacdb532ec5525686557a9bca04daa357ba754ccd70bea7cf0459572c189cf"').T

Unnamed: 0,0
hash,0xfedacdb532ec5525686557a9bca04daa357ba754ccd70bea7cf0459572c189cf
blockHash,0x5502093582eef8f1c9088f8db764ab59f403787954dee81eeaf681bf8ba681a7
blockNumber,15064948
from,0xf71e4a144cda4498277f9ad89b6501ec6c83c27c
gas,21000
gasPrice,33084805662
input,0x
nonce,535
r,0xdc2fccbf51c8dde791b9ec027d4d53aab9bf6d5c10f963135a1bff925203dba7
s,0x1c0b133e837b36281fd6bdb9abb907c2060db9f6ab1824205037f3069458cafe
