### Exploratory Analysis of DuckDB br_funds.db

In [None]:
import duckdb
import pandas as pd

In [None]:
# Connect to your local DuckDB database file
conn = duckdb.connect('../data/br_funds.db')


# List all tables in the database
tables = conn.execute("SHOW TABLES").fetchall()
print("Available tables:", tables)


# close the connection when done
conn.close()

### Analysis of assets table

In [None]:
conn = duckdb.connect('../data/br_funds.db')

df = conn.execute("SELECT * FROM assets LIMIT 20").fetchdf()

conn.close()

df.head()

In [None]:
id = df["asset_id"][2]
print("Asset ID of first record:", id)

In [None]:
conn = duckdb.connect('../data/br_funds.db')

values = conn.execute("SELECT DISTINCT financial_instrument FROM assets").fetchall()
print("Distinct asset classes:", values)

conn.close()

### Asset Class Meanings:
- INVESTMENT_FUND: Shares or units in other mutual funds/ETFs (fund of funds)
- EQUITY: Stocks/shares in companies
- FIXED_INCOME: Bonds, treasury notes, or other debt securities
- CASH: Cash holdings, money market instruments, or cash equivalents
- DERIVATIVES: Options, futures, swaps, or other derivative contracts (used for hedging or speculation)
- UNSPECIFIED: Assets that don't fit the other categories or missing classification data

In [None]:
conn = duckdb.connect('../data/br_funds.db')

query = """
SELECT * 
FROM assets
WHERE asset_class = 'EQUITY'
"""

result = conn.execute(query).df()
conn.close()
result.head(10)



### Analysis of fund_performance_indicator table

In [None]:
conn = duckdb.connect('../data/br_funds.db')

df = conn.execute("SELECT * FROM fund_performance_indicators LIMIT 5").fetchdf()

conn.close()

df.head()

### Analysis of fund_snapshots table

In [None]:
conn = duckdb.connect('../data/br_funds.db')

df = conn.execute("SELECT * FROM fund_snapshots LIMIT 5").fetchdf()

conn.close()

df.head()

### Analysis of funds table (OF INTEREST)

In [None]:
conn = duckdb.connect('../data/br_funds.db')

df = conn.execute("SELECT * FROM funds LIMIT 5").fetchdf()

conn.close()

df.head()

In [None]:
identifier = df['identifiers'][2]
print("Fund identifier:", identifier)

In [None]:
df.describe()

In [None]:
print("\nColumns (one per line):")
for i, col in enumerate(df.columns, 1):
    print(f"{i}. {col}")

### Analysis of positions table

In [None]:
conn = duckdb.connect('../data/br_funds.db')

df = conn.execute("SELECT * FROM positions LIMIT 5").fetchdf()

conn.close()

df.head()