In [None]:
from google.colab import auth
auth.authenticate_user()
print('Authenticated')

from google.colab import auth
auth.authenticate_user()
print('Authenticated')

In [None]:
project_id = 'project-data-enginee'

from google.cloud import bigquery

client = bigquery.Client(project=project_id)

**Subqueries/Nested Queries**

In [None]:
df = client.query('''
  SELECT
  `hash` AS transaction_hash,
  block_hash
FROM
  `bigquery-public-data.crypto_bitcoin.transactions`
WHERE
  TIMESTAMP_TRUNC(block_timestamp, DAY) = TIMESTAMP("2024-08-06")
  AND block_hash IN (
  SELECT
    `hash`
  FROM
    `bigquery-public-data.crypto_bitcoin.blocks`
  WHERE
    TIMESTAMP_TRUNC(timestamp, DAY) = TIMESTAMP("2024-08-06")
    AND transaction_count = 12)
LIMIT
  5''').to_dataframe()

df.head()

Joins

In [None]:
df = client.query('''
SELECT
  t.transaction_hash,
  t.block_hash,
  tb.transaction_count
FROM (
  SELECT
    `hash` AS transaction_hash,
    block_hash
  FROM
    `bigquery-public-data.crypto_bitcoin.transactions`
  WHERE
    TIMESTAMP_TRUNC(block_timestamp, DAY) = TIMESTAMP("2024-08-06")) AS t
INNER JOIN (
  SELECT
    `hash` AS block_hash,
    transaction_count
  FROM
    `bigquery-public-data.crypto_bitcoin.blocks`
  WHERE
    TIMESTAMP_TRUNC(timestamp, DAY) = TIMESTAMP("2024-08-06")
    AND transaction_count = 12) 
    AS tb
ON
  t.block_hash = tb.block_hash
ORDER BY
  t.transaction_hash
LIMIT
  5''').to_dataframe()

df.head()

Unions

In [None]:
df = client.query('''
SELECT
  `hash` AS block_hash,
  `timestamp` AS block_timestamp,
  `number` AS block_number
FROM
  `bigquery-public-data.crypto_bitcoin.blocks`
WHERE
  TIMESTAMP_TRUNC(timestamp, DAY) = TIMESTAMP("2024-08-06")

UNION DISTINCT

SELECT
  block_hash,
  block_timestamp,
  block_number
FROM
  `bigquery-public-data.crypto_bitcoin.transactions`
WHERE
  TIMESTAMP_TRUNC(block_timestamp, DAY) = TIMESTAMP("2024-08-06")
LIMIT 5''').to_dataframe()

df.head()

Aggregate Functions

In [None]:
df = client.query('''
SELECT
  block_hash,
  COUNT(`hash`) AS total_transaction
FROM
  `bigquery-public-data.crypto_bitcoin.transactions`
WHERE
  TIMESTAMP_TRUNC(block_timestamp, DAY) = TIMESTAMP("2024-08-06")
  AND block_hash IN (
    SELECT
      `hash`
    FROM
      `bigquery-public-data.crypto_bitcoin.blocks`
    WHERE
      TIMESTAMP_TRUNC(timestamp, DAY) = TIMESTAMP("2024-08-06"))
GROUP BY
  block_hash
Limit 5''').to_dataframe()

df.head()

Window Functions

In [None]:
df = client.query('''
SELECT
  block_hash,
  total_transaction,
  ROW_NUMBER() OVER (ORDER BY total_transaction DESC) AS rank
FROM (
  SELECT
    block_hash,
    COUNT(`hash`) AS total_transaction
  FROM
    `bigquery-public-data.crypto_bitcoin.transactions`
  WHERE
    TIMESTAMP_TRUNC(block_timestamp, DAY) = TIMESTAMP("2024-08-06")
    AND block_hash IN (
      SELECT
        `hash`
      FROM
        `bigquery-public-data.crypto_bitcoin.blocks`
      WHERE
        TIMESTAMP_TRUNC(timestamp, DAY) = TIMESTAMP("2024-08-06"))
  GROUP BY
    block_hash
) AS transaction_counts
ORDER BY rank
Limit 5''').to_dataframe()

df.head()


Common Table Expressions (CTEs)

In [None]:
df = client.query('''
WITH
  transaction_block AS (
    SELECT
      `hash` AS transaction_hash,
      block_hash
    FROM
      `bigquery-public-data.crypto_bitcoin.transactions`
    WHERE
      TIMESTAMP_TRUNC(block_timestamp, DAY) = TIMESTAMP("2024-08-06")
  ),
  block_parent AS (
    SELECT
      `hash` AS block_hash,
      version
    FROM
      `bigquery-public-data.crypto_bitcoin.blocks`
    WHERE
      TIMESTAMP_TRUNC(timestamp, DAY) = TIMESTAMP("2024-08-06")
      AND version = 541065216
  )
SELECT
  transaction_block.transaction_hash,
  transaction_block.block_hash,
  block_parent.version
FROM
  transaction_block
INNER JOIN
  block_parent
ON
  transaction_block.block_hash = block_parent.block_hash
ORDER BY
  transaction_block.transaction_hash
LIMIT
  5''').to_dataframe()

df.head()

Pivot

In [None]:
df = client('''
SELECT *
FROM (
  SELECT
    block_hash,
    CASE WHEN is_coinbase THEN 'Coinbase' ELSE 'Non-Coinbase' END AS transaction_type
  FROM
    `bigquery-public-data.crypto_bitcoin.transactions`
  WHERE
    TIMESTAMP_TRUNC(block_timestamp, DAY) = TIMESTAMP("2024-08-06")
) AS src
PIVOT (
  COUNT(transaction_type)
  FOR transaction_type IN ('Coinbase' AS coinbase_transactions, 'Non-Coinbase' AS non_coinbase_transactions)
) AS pvt
ORDER BY block_hash
LIMIT 5;''').to_dataframe()

df.head()

String Manipulation

In [None]:
df = client('''
SELECT
  block_hash,
  SUBSTR(block_hash, 1, 10) AS short_block_hash,
  transaction_hash
FROM
  `bigquery-public-data.crypto_bitcoin.transactions`
WHERE
  TIMESTAMP_TRUNC(block_timestamp, DAY) = TIMESTAMP("2024-08-06")
LIMIT
  5;
''').to_dataframe()

df.head()

Date and Time

In [None]:
df = client('''
SELECT
  block_hash,
  block_timestamp,
  EXTRACT(YEAR FROM block_timestamp) AS transaction_year,
  EXTRACT(MONTH FROM block_timestamp) AS transaction_month,
  EXTRACT(DAY FROM block_timestamp) AS transaction_day
FROM
  `bigquery-public-data.crypto_bitcoin.transactions`
WHERE
  TIMESTAMP_TRUNC(block_timestamp, DAY) = TIMESTAMP("2024-08-06")
LIMIT
  5;
''').to_dataframe()

df.head()

Case Statement

In [None]:
df = client('''
SELECT
  block_hash,
  transaction_hash,
  size,
  CASE
    WHEN size < 200 THEN 'Small'
    WHEN size BETWEEN 200 AND 300 THEN 'Medium'
    ELSE 'Large'
  END AS transaction_size_category
FROM
  `bigquery-public-data.crypto_bitcoin.transactions`
WHERE
  TIMESTAMP_TRUNC(block_timestamp, DAY) = TIMESTAMP("2024-08-06")
LIMIT
  5;
''').to_dataframe()

df.head()

Recrusive Queries

In [None]:
df = client('''
WITH RECURSIVE block_hierarchy AS (
  SELECT
    `hash` AS block_hash,
    block_number,
    1 AS level
  FROM
    `bigquery-public-data.crypto_bitcoin.blocks`
  WHERE
    block_number = 367853

  UNION ALL

  SELECT
    b.`hash` AS block_hash,
    b.block_number,
    bh.level + 1
  FROM
    `bigquery-public-data.crypto_bitcoin.blocks` AS b
  INNER JOIN
    block_hierarchy AS bh ON b.block_number = bh.block_number + 1
)

SELECT
  block_hash,
  block_number,
  level
FROM
  block_hierarchy
ORDER BY
  level, block_number;

''').to_dataframe()

df.head()

UDF


In [None]:
df = client('''
CREATE TEMP FUNCTION 
    calculate_fee(input_value FLOAT64, output_value FLOAT64) as (
        input_value - output_value
    );

SELECT
  t.hash AS transaction_hash,
  t.input_value,
  t.output_value,
  calculate_fee(t.input_value, t.output_value) AS transaction_fee
FROM
  `bigquery-public-data.crypto_bitcoin.transactions` AS t
WHERE
  t.block_number = 367853
ORDER BY
  t.hash;
''').to_dataframe()

df.head()