In [19]:
import sys
sys.path.append("../src")
from bigbrotr import Bigbrotr
from event import Event
from relay import Relay
from relay_metadata import RelayMetadata
import utils
import pandas as pd

# Utils

In [None]:
private_key, public_key = utils.generate_nostr_keypair()
e = utils.generate_event(private_key, public_key, 1, [], "test")
assert utils.verify_sig(e['id'], e['pubkey'], e['sig']), "Signature verification failed"
assert utils.calc_event_id(e['pubkey'], e['created_at'], e['kind'], e['tags'], e['content']) == e['id'], "Event ID calculation failed"
e = Event(e['id'], e['pubkey'], e['created_at'], e['kind'], e['tags'], e['content'], e['sig'])
e

# Database

In [163]:
bigbrotr = Bigbrotr(
    host="localhost",
    port=5431,
    user="admin",
    password="admin",
    dbname="bigbrotr"
)

In [164]:
bigbrotr.connect()

In [24]:
query = "SELECT * FROM relay_metadata"
bigbrotr.execute(query)
rows = bigbrotr.fetchall()
# for row in rows:
#     print(row)
len(rows)

70

In [165]:
query = "SELECT COUNT(*) FROM events WHERE kind = 1"
bigbrotr.execute(query)
rows = bigbrotr.fetchall()
print(f"Number of events in the database: {rows[0][0]}")

Number of events in the database: 2012171


In [170]:
query = "SELECT pg_size_pretty(pg_database_size(current_database())) AS db_size"
bigbrotr.execute(query)
result = bigbrotr.fetchall()
print(f"Database size: {result[0][0]}")

# print the size of all tables and indexes
query = """
SELECT
    pg_size_pretty(pg_total_relation_size(relid)) AS size,
    relname AS table_name
FROM
    pg_stat_user_tables
ORDER BY
    pg_total_relation_size(relid) DESC
"""
bigbrotr.execute(query)
result = bigbrotr.fetchall()
print("Size of all tables and indexes:")
for row in result:
    print(f"{row[1]}: {row[0]}")

print()
# for each table show the size of all columns
for table in ['events', 'event_contents', 'relay_metadata', 'relays']:
    query = f"""
    SELECT 
        'SELECT ' || 
        string_agg(
            'AVG(pg_column_size("' || column_name || '")) AS ' || column_name || '_bytes',
            ', '
        ) || 
        ' FROM {table};'
    AS query
    FROM information_schema.columns
    WHERE table_name = '{table}' AND table_schema = 'public';  -- cambia schema se necessario
    """
    bigbrotr.execute(query)
    result = bigbrotr.fetchall()
    print(f"Average size of each column in the {table} table:")
    for row in result:
        query = row[0]
        bigbrotr.execute(query)
        result = bigbrotr.fetchall()
        for r in result:
            print(r)

Database size: 12 GB
Size of all tables and indexes:
events: 5288 MB
events_relays: 4571 MB
event_contents: 1912 MB
relays: 1320 kB
relay_metadata: 576 kB

Average size of each column in the events table:
(Decimal('65.0000000000000000'), Decimal('65.0000000000000000'), Decimal('8.0000000000000000'), Decimal('4.0000000000000000'), Decimal('369.5635715703669455'), Decimal('132.0000000000000000'))
Average size of each column in the event_contents table:
(Decimal('65.0000000000000000'), Decimal('136.2278012119135816'))
Average size of each column in the relay_metadata table:
(Decimal('30.3234421364985163'), Decimal('8.0000000000000000'), Decimal('1.00000000000000000000'), Decimal('1.00000000000000000000'), Decimal('1.00000000000000000000'), Decimal('1.00000000000000000000'), Decimal('1.00000000000000000000'), Decimal('4.0000000000000000'), Decimal('4.0000000000000000'), Decimal('4.0000000000000000'), Decimal('20.8238095238095238'), Decimal('49.7359855334538879'), Decimal('1.000000000000000

In [166]:
query = "SELECT pg_size_pretty(pg_database_size(current_database())) AS db_size;"
bigbrotr.execute(query)
rows = bigbrotr.fetchall()
rows

[('12 GB',)]

In [27]:
bigbrotr.close()

# relay_urls

In [None]:
import pandas as pd
relays_url = pd.read_csv("../seed/relays_url.csv")
relays_url = relays_url.groupby('relay_url').agg({'count': 'sum'}).reset_index()
relays_url = relays_url.sort_values(by='count', ascending=False).reset_index(drop=True)
relays_url.to_csv("../seed/relays_url.csv", index=False)