In [1]:
! pip install web3

Collecting web3
  Downloading web3-7.10.0-py3-none-any.whl.metadata (5.6 kB)
Collecting eth-abi>=5.0.1 (from web3)
  Downloading eth_abi-5.2.0-py3-none-any.whl.metadata (3.8 kB)
Collecting eth-account>=0.13.1 (from web3)
  Downloading eth_account-0.13.7-py3-none-any.whl.metadata (3.7 kB)
Collecting eth-hash>=0.5.1 (from eth-hash[pycryptodome]>=0.5.1->web3)
  Downloading eth_hash-0.7.1-py3-none-any.whl.metadata (4.2 kB)
Collecting eth-typing>=5.0.0 (from web3)
  Downloading eth_typing-5.2.1-py3-none-any.whl.metadata (3.2 kB)
Collecting eth-utils>=5.0.0 (from web3)
  Downloading eth_utils-5.3.0-py3-none-any.whl.metadata (5.7 kB)
Collecting hexbytes>=1.2.0 (from web3)
  Downloading hexbytes-1.3.0-py3-none-any.whl.metadata (3.3 kB)
Collecting types-requests>=2.0.0 (from web3)
  Downloading types_requests-2.32.0.20250328-py3-none-any.whl.metadata (2.3 kB)
Collecting pyunormalize>=15.0.0 (from web3)
  Downloading pyunormalize-16.0.0-py3-none-any.whl.metadata (4.0 kB)
Collec

In [2]:
from web3 import Web3
import datetime
import pandas as pd
import pickle
import os

In [3]:
# -------------------------------
# 1. Connect to the Ethereum Blockchain
# -------------------------------
# Replace with your Ethereum node endpoint (e.g., Infura)
infura_url = "https://mainnet.infura.io/v3/ac8ff00d02044bf0bdfa8df9c12ff8ec"
w3 = Web3(Web3.HTTPProvider(infura_url))

if not w3.is_connected():
    raise Exception("Failed to connect to the Ethereum node.")
else:
    print("Connected to Ethereum blockchain.")

Connected to Ethereum blockchain.


In [4]:
# -------------------------------
# 2. Load All Trained Model Pipelines from Kaggle Input
# -------------------------------
model_dir = "/kaggle/input/comp517_models/scikitlearn/default/1"
pipelines = {}
for fname in os.listdir(model_dir):
    if fname.startswith("pipeline_") and fname.endswith(".pkl"):
        fullpath = os.path.join(model_dir, fname)
        # e.g. fname = "pipeline_MLP.pkl" → name = "MLP"
        name = fname.replace("pipeline_", "").replace(".pkl", "").replace("_", "+")
        with open(fullpath, "rb") as f:
            pipelines[name] = pickle.load(f)

print(f"Loaded {len(pipelines)} pipelines: {list(pipelines.keys())}")

Loaded 7 pipelines: ['RF+XGB', 'MLP', 'RF+MLP', 'XGB', 'RF+MLP+XGB', 'MLP+XGB', 'RF']


In [5]:
# -------------------------------
# 3. Fetch Latest Block and Its Transactions
# -------------------------------
latest_block_number = w3.eth.block_number
block = w3.eth.get_block(latest_block_number, full_transactions=True)
transactions = block.transactions

# Prepare block-level time features
block_dt = datetime.datetime.utcfromtimestamp(block.timestamp)
time_feats = {
    "hour":   block_dt.hour,
    "day":    block_dt.day,
    "month":  block_dt.month,
    "year":   block_dt.year
}

In [6]:
# -------------------------------
# 4. Build a DataFrame of New Transactions
# -------------------------------
records = []
for tx in transactions:
    records.append({
        "BlockHeight": tx["blockNumber"],
        "Value":       float(w3.from_wei(tx["value"], "ether")),
        "From":        tx["from"],
        "To":          tx["to"] or "0x0",
        **time_feats
    })
df_new = pd.DataFrame(records)
print(f"Prepared DataFrame with {len(df_new)} transactions.")

Prepared DataFrame with 229 transactions.


In [7]:
# -------------------------------
# 5. Predict & Summarize for Each Model
# -------------------------------
summary = []
for name, pipe in pipelines.items():
    preds = pipe.predict(df_new)               # pipeline expects the same raw-feature columns
    valid_count = int((preds == 0).sum())
    error_count = int((preds == 1).sum())
    summary.append({
        "Model":       name,
        "Valid (0)":  valid_count,
        "Error (1)":  error_count,
        "Total":      len(preds)
    })

summary_df = pd.DataFrame(summary)
print("\nPrediction Summary:")
print(summary_df)


Prediction Summary:
        Model  Valid (0)  Error (1)  Total
0      RF+XGB        229          0    229
1         MLP        229          0    229
2      RF+MLP        229          0    229
3         XGB        229          0    229
4  RF+MLP+XGB        229          0    229
5     MLP+XGB        229          0    229
6          RF        229          0    229


In [8]:
# -------------------------------
# 6. Save the Summary to CSV
# -------------------------------
summary_df.to_csv("prediction_summary.csv", index=False)
print("\nSaved summary to prediction_summary.csv")


Saved summary to prediction_summary.csv
