# Local Machine (Oscar) Specific Setup

In [1]:
from pathlib import Path
from pprint  import pprint
import sqlite3
import csv
import os
import random
os.chdir("..")

# Set Up Paths for Writing Files

In [2]:
current_directory = os.getcwd()
print(current_directory)
conn = sqlite3.connect(current_directory + '/sqlite/nport.db')
cursor = conn.cursor()
# Set cache size (in pages, where each page is usually 4KB; negative values mean in KB)
cursor.execute("PRAGMA cache_size = -262144;")  # Example: limits cache to 1 GB
# Set temp storage to use file-based storage rather than RAM
cursor.execute("PRAGMA temp_store = 1;")  # 1 = file-based storage, 2 = memory (default)
global_raw_data_folder = os.getcwd()+'/data/2024q3_nport'
print(global_raw_data_folder)

d:\GithubRepos\PIMCO-Text2SQL
d:\GithubRepos\PIMCO-Text2SQL/data/2024q3_nport


# Create Table for FUND_REPORTED_HOLDING_RAW



## Note:
- FUND_REPORTED_HOLDING contains ISSUER_CUSIP
- DEBT_SECURITY_REF_INSTRUMENT, DESC_REF_INDEX_COMPONENT, and DESC_REF_OTHER all contain CUSIP
- I retrieved the unique cusip values for each table to see which had the broadest range that I could sample from.


---



*   Unique cusip count from FUND_REPORTED_HOLDING:  356276
*   Unique cusip count from DEBT_SECURITY_REF_INSTRUMENT:  552
*   Unique cusip count from DESC_REF_INDEX_COMPONENT:  2890
*   Unique cusip count from DESC_REF_OTHER:  3307

I have chosen to sample from FUND_REPORTED_HOLDING.

In [3]:
# Create table from .tsv file
def create_table(filename):
    if filename.endswith('.tsv'):
        table_name = 'FUND_REPORTED_HOLDING_RAW'
        file_path = global_raw_data_folder +'/'+ filename

        with open(file_path, 'r', newline='') as file:
            reader = csv.reader(file, delimiter='\t')
            column_names = next(reader)  # Get the first row as column names

            create_table_query = f"CREATE TABLE IF NOT EXISTS {table_name} ({', '.join([f'{col} TEXT' for col in column_names])});"
            cursor.execute(create_table_query)

            # Insert data into the table
            for row in reader:
                insert_query = f"INSERT INTO {table_name} ({', '.join(column_names)}) VALUES ({', '.join(['?'] * len(column_names))});"
                cursor.execute(insert_query, row)

In [4]:
create_table("FUND_REPORTED_HOLDING.tsv")

# Randomly sample 1000 CUSIPS AS SAMPLED_CUSIPS

In [5]:
# Randomly sample 1000 distinct CUSIPS as table SAMPLE_CUSIPS
cursor.execute('''CREATE TABLE SAMPLED_CUSIPS AS
                SELECT DISTINCT ISSUER_CUSIP
                FROM FUND_REPORTED_HOLDING_RAW
                WHERE ISSUER_CUSIP != 'N/A'
                ORDER BY RANDOM()
                LIMIT 1000;''')

<sqlite3.Cursor at 0x250112ef0c0>

# Filter FUND_REPORTED_HOLDING with FUND_REPORTED_HOLDING_RAW JOIN SAMPLED_CUSIPS

In [6]:
cursor.execute('''
                CREATE TABLE FUND_REPORTED_HOLDING AS
                SELECT *
                FROM FUND_REPORTED_HOLDING_RAW frhr
                JOIN SAMPLED_CUSIPS s ON frhr.ISSUER_CUSIP = s.ISSUER_CUSIP
''')

<sqlite3.Cursor at 0x250112ef0c0>

# Create tables only containing rows that align with sampled cusips

In [7]:
def create_sampled_table(filename, primary_key):
    if filename.endswith('.tsv'):
        table_name = os.path.splitext(filename)[0]
        file_path = global_raw_data_folder + '/'+filename
        if not os.path.isfile(file_path):
            return f"Exiting early because '{filename}'.tsv does not exist in '{global_raw_data_folder}"
        # Retrieve primary keys from the fund_reported_holdings table
        primary_key_query = f"SELECT {primary_key} FROM FUND_REPORTED_HOLDING;"
        cursor.execute(primary_key_query)
        primary_keys = {row[0] for row in cursor.fetchall()}  # Store keys in a set for faster lookup

        # After fetching primary keys
        # print("Primary Keys from DATABASE:", primary_keys)
        # print("Number of Primary Keys:", len(primary_keys))
        # unique_primary_keys = set(primary_keys)
        # print("Number of Unique Primary Keys:", len(unique_primary_keys))
        
        with open(file_path, 'r', newline='') as file:
            reader = csv.reader(file, delimiter='\t')
            column_names = next(reader)  # Get the first row as column names

            create_table_query = f"CREATE TABLE IF NOT EXISTS {table_name} ({', '.join([f'{col} TEXT' for col in column_names])});"
            cursor.execute(create_table_query)

            # Insert only rows that match the primary keys
            for row in reader:
                # Retrieve primary key value from the current row
                row_primary_key = row[column_names.index(primary_key)]

                # Check if the row's primary key exists in the fetched primary keys
                if row_primary_key in primary_keys:
                    insert_query = f"INSERT INTO {table_name} ({', '.join(column_names)}) VALUES ({', '.join(['?'] * len(column_names))});"
                    cursor.execute(insert_query, row)

In [8]:
ac_tables = [
    "SUBMISSION",
    "REGISTRANT",
    "FUND_REPORTED_INFO",
    "INTEREST_RATE_RISK",
    "BORROWER",
    "BORROW_AGGREGATE",
    "MONTHLY_TOTAL_RETURN",
    "MONTHLY_RETURN_CAT_INSTRUMENT",
    "FUND_VAR_INFO",
    "EXPLANATORY_NOTE"
]

hid_tables = [
    "IDENTIFIERS",
    "DEBT_SECURITY",
    "DEBT_SECURITY_REF_INSTRUMENT",
    "CONVERTIBLE_SECURITY_CURRENCY",
    "REPURCHASE_AGREEMENT",
    "REPURCHASE_COUNTERPARTY",
    "REPURCHASE_COLLATERAL",
    "DERIVATIVE_COUNTERPARTY",
    "SWAPTION_OPTION_WARNT_DERIV",
    "DESC_REF_INDEX_BASKET",
    "DESC_REF_INDEX_COMPONENT",
    "DESC_REF_OTHER",
    "FUT_FWD_NONFOREIGNCUR_CONTRACT",
    "FWD_FOREIGNCUR_CONTRACT_SWAP",
    "NONFOREIGN_EXCHANGE_SWAP",
    "FLOATING_RATE_RESET_TENOR",
    "OTHER_DERIV",
    "OTHER_DERIV_NOTIONAL_AMOUNT",
    "SECURITIES_LENDING",
]

for table in ac_tables:
    filename = f"{table}.tsv"
    create_sampled_table(filename, "ACCESSION_NUMBER")
    print("Created sampled table using ACCESSION_NUMBER", filename)

for table in hid_tables:
    filename = f"{table}.tsv"
    create_sampled_table(filename, "HOLDING_ID")
    print("Created sampled table using HOLDING_ID", filename)


Created sampled table using ACCESSION_NUMBER SUBMISSION.tsv
Created sampled table using ACCESSION_NUMBER REGISTRANT.tsv
Created sampled table using ACCESSION_NUMBER FUND_REPORTED_INFO.tsv
Created sampled table using ACCESSION_NUMBER INTEREST_RATE_RISK.tsv
Created sampled table using ACCESSION_NUMBER BORROWER.tsv
Created sampled table using ACCESSION_NUMBER BORROW_AGGREGATE.tsv
Created sampled table using ACCESSION_NUMBER MONTHLY_TOTAL_RETURN.tsv
Created sampled table using ACCESSION_NUMBER MONTHLY_RETURN_CAT_INSTRUMENT.tsv
Created sampled table using ACCESSION_NUMBER FUND_VAR_INFO.tsv
Created sampled table using ACCESSION_NUMBER EXPLANATORY_NOTE.tsv
Created sampled table using HOLDING_ID IDENTIFIERS.tsv
Created sampled table using HOLDING_ID DEBT_SECURITY.tsv
Created sampled table using HOLDING_ID DEBT_SECURITY_REF_INSTRUMENT.tsv
Created sampled table using HOLDING_ID CONVERTIBLE_SECURITY_CURRENCY.tsv
Created sampled table using HOLDING_ID REPURCHASE_AGREEMENT.tsv
Created sampled table

In [9]:
# Drop irrelevant tables
cursor.execute("DROP TABLE FUND_REPORTED_HOLDING_RAW;")
cursor.execute("DROP TABLE SAMPLED_CUSIPS;")

<sqlite3.Cursor at 0x250112ef0c0>

# Drop columns missing more than 85% values

In [10]:
def drop_columns(table_name):
    temp_cursor= conn.cursor()

    temp_cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name=?", (table_name,))
    result = temp_cursor.fetchone()

    # Check if the result is not None
    if not result:
        return

    temp_cursor.execute(f"SELECT COUNT(*) FROM {table_name}")
    total_rows = temp_cursor.fetchone()[0]

    temp_cursor.execute(f"PRAGMA table_info({table_name})")
    columns = temp_cursor.fetchall()

    columns_to_drop = []
    columns_to_keep = []


    for column in columns:
        column_name = column[1]

        # Skip columns that end with ':1'
        if column_name.endswith(':1'):
            print(f"Skipping column {column_name} in table {table_name}")
            continue

        temp_cursor.execute(f"SELECT COUNT({column_name}) FROM {table_name} WHERE {column_name} = ''")
        null_count = temp_cursor.fetchone()[0]

        if null_count > 0.85 * total_rows:
            columns_to_drop.append(column_name)
        else:
            columns_to_keep.append(column_name)

    if columns_to_drop:
        # Create new table with remaining columns
        new_table_name = f"{table_name}_new"
        columns_definition = ', '.join(columns_to_keep)
        temp_cursor.execute(f"CREATE TABLE {new_table_name} ({columns_definition})")

        # Copy columns to new table
        temp_cursor.execute(f"INSERT INTO {new_table_name} ({columns_definition}) SELECT {columns_definition} FROM {table_name}")

        # Drop old table
        temp_cursor.execute(f"DROP TABLE {table_name}")

        # Rename new table to original table name
        temp_cursor.execute(f"ALTER TABLE {new_table_name} RENAME TO {table_name}")

        print(f"Dropped columns {', '.join(columns_to_drop)} from table {table_name}")
    else:
        print(f"No columns to drop from table {table_name}")
    
    temp_cursor.close()
    return columns_to_drop, columns_to_keep




In [11]:
tables = ['FUND_REPORTED_HOLDING','REGISTRANT', 'SUBMISSION', 'FUND_REPORTED_INFO', 'INTEREST_RATE_RISK', 'BORROWER', 'BORROW_AGGREGATE', 'MONTHLY_TOTAL_RETURN', 'MONTHLY_RETURN_CAT_INSTRUMENT', 'FUND_VAR_INFO', 'EXPLANATORY_NOTE', 'IDENTIFIERS', 'DEBT_SECURITY_REF_INSTRUMENT', 'CONVERTIBLE_SECURITY_CURRENCY', 'REPURCHASE_AGREEMENT', 'REPURCHASE_COUNTERPARTY', 'REPURCHASE_COLLATERAL', 'DERIVATIVE_COUNTERPARTY', 'SWAPTION_OPTION_WARNT_DERIV', 'DESC_REF_INDEX_BASKET', 'DESC_REF_INDEX_COMPONENT', 'DESC_REF_OTHER', 'FUT_FWD_NONFOREIGNCUR_CONTRACT', 'FWD_FOREIGNCUR_CONTRACT_SWAP', 'NONFOREIGN_EXCHANGE_SWAP', 'FLOATING_RATE_RESET_TENOR', 'OTHER_DERIV', 'OTHER_DERIV_NOTIONAL_AMOUNT', 'SECURITIES_LENDING']
columns_to_drop = {}
columns_to_keep = {}
for table in tables:
    columns_to_drop[table],columns_to_keep[table] = drop_columns(table)
print(columns_to_drop)
print(columns_to_keep)

Skipping column ISSUER_CUSIP:1 in table FUND_REPORTED_HOLDING
Dropped columns OTHER_UNIT_DESC, EXCHANGE_RATE, OTHER_ASSET, OTHER_ISSUER, DERIVATIVE_CAT from table FUND_REPORTED_HOLDING
No columns to drop from table REGISTRANT
Dropped columns FILE_NUM from table SUBMISSION
No columns to drop from table FUND_REPORTED_INFO
No columns to drop from table INTEREST_RATE_RISK
No columns to drop from table BORROWER
Dropped columns OTHER_DESC from table BORROW_AGGREGATE
No columns to drop from table MONTHLY_TOTAL_RETURN
No columns to drop from table MONTHLY_RETURN_CAT_INSTRUMENT
No columns to drop from table FUND_VAR_INFO
No columns to drop from table EXPLANATORY_NOTE
Dropped columns IDENTIFIER_TICKER, OTHER_IDENTIFIER, OTHER_IDENTIFIER_DESC from table IDENTIFIERS
Dropped columns TICKER, OTHER_IDENTIFIER, OTHER_DESC from table DEBT_SECURITY_REF_INSTRUMENT
No columns to drop from table CONVERTIBLE_SECURITY_CURRENCY
No columns to drop from table REPURCHASE_AGREEMENT
No columns to drop from table R

In [12]:
# View tables currently in database
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
tables = cursor.fetchall()
tables = [table[0] for table in tables]
print(len(tables))
print(tables)

30
['REGISTRANT', 'FUND_REPORTED_INFO', 'INTEREST_RATE_RISK', 'BORROWER', 'MONTHLY_TOTAL_RETURN', 'MONTHLY_RETURN_CAT_INSTRUMENT', 'FUND_VAR_INFO', 'EXPLANATORY_NOTE', 'DEBT_SECURITY', 'CONVERTIBLE_SECURITY_CURRENCY', 'REPURCHASE_AGREEMENT', 'REPURCHASE_COUNTERPARTY', 'REPURCHASE_COLLATERAL', 'DERIVATIVE_COUNTERPARTY', 'DESC_REF_INDEX_COMPONENT', 'FUT_FWD_NONFOREIGNCUR_CONTRACT', 'FWD_FOREIGNCUR_CONTRACT_SWAP', 'FLOATING_RATE_RESET_TENOR', 'OTHER_DERIV', 'OTHER_DERIV_NOTIONAL_AMOUNT', 'FUND_REPORTED_HOLDING', 'SUBMISSION', 'BORROW_AGGREGATE', 'IDENTIFIERS', 'DEBT_SECURITY_REF_INSTRUMENT', 'SWAPTION_OPTION_WARNT_DERIV', 'DESC_REF_INDEX_BASKET', 'DESC_REF_OTHER', 'NONFOREIGN_EXCHANGE_SWAP', 'SECURITIES_LENDING']


In [13]:
def align_columns(table_name, table_name_new):
# Get the existing columns of the new table
    temp_cursor= conn.cursor()
    temp_cursor.execute(f"PRAGMA table_info({table_name_new});")
    new_table_columns = [col[1] for col in temp_cursor.fetchall()]

    # Drop irrelevant columns in table_name_new
    drop_columns = columns_to_drop.get(table_name, [])
    if drop_columns:
        aligned_columns = [col for col in new_table_columns if col not in drop_columns]
        aligned_table_name = f"{table_name_new}_aligned"
        aligned_columns_definition = ', '.join([f"{col} TEXT" for col in aligned_columns])

        temp_cursor.execute(f"CREATE TABLE {aligned_table_name} ({aligned_columns_definition})")
        temp_cursor.execute(f"INSERT INTO {aligned_table_name} SELECT {', '.join(aligned_columns)} FROM {table_name_new}")
        temp_cursor.execute(f"DROP TABLE {table_name_new}")
        temp_cursor.execute(f"ALTER TABLE {aligned_table_name} RENAME TO {table_name_new}")
        print(f"Dropped columns {drop_columns} from table '{table_name_new}'")

    # Add missing columns to align with baseline schema
    baseline_columns = columns_to_keep.get(table_name, [])
    temp_cursor.execute(f"PRAGMA table_info({table_name_new});")
    updated_columns = [col[1] for col in temp_cursor.fetchall()]
    missing_columns = set(baseline_columns) - set(updated_columns)

    for col in missing_columns:
        temp_cursor.execute(f"ALTER TABLE {table_name_new} ADD COLUMN {col} TEXT;")
        print(f"Added missing column '{col}' to table '{table_name_new}'")
    temp_cursor.close()

# Add QUARTER column to tables from 2024q3

In [14]:
def add_quarter_column(raw_data_folder, table_name):
    temp_cursor= conn.cursor()
    # Extract the folder name from the path
    print(raw_data_folder)
    folder_name = raw_data_folder.split('/')[-1].split('_')[0]
    print(folder_name)
    temp_cursor.execute(f"PRAGMA table_info({table_name})")
    columns = [column_info[1] for column_info in temp_cursor.fetchall()]
    
    if "QUARTER" not in columns:

        # Add the 'quarter' column to the table
        temp_cursor.execute(f'ALTER TABLE "{table_name}" ADD COLUMN QUARTER TEXT')

    # Update all rows in the new 'quarter' column with the folder name
    temp_cursor.execute(f'UPDATE "{table_name}" SET QUARTER = ?', (folder_name,))
    temp_cursor.execute(f"SELECT QUARTER FROM {table_name}")
    print("""
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
""")
    print(temp_cursor.fetchall())
    print("""
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
""")
    print(f"Added column 'QUARTER' to table '{table_name}' with value '{folder_name}' for all rows.")
    temp_cursor.close()



In [15]:
for table in tables:
  add_quarter_column(global_raw_data_folder, table)
  print(global_raw_data_folder)

d:\GithubRepos\PIMCO-Text2SQL/data/2024q3_nport
2024q3

|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||

[('2024q3',), ('2024q3',), ('2024q3',), ('2024q3',), ('2024q3',), ('2024q3',), ('2024q3',), ('2024q3',), ('2024q3',), ('2024q3',), ('2024q3',), ('2024q3',), ('2024q3',), ('2024q3',), ('2024q3',), ('2024q3',), ('2024q3',), ('2024q3',), ('2024q3',), ('2024q3',), ('2024q3',), ('2024q3',), ('2024q3',), ('2024q3',), ('2024q3',), ('2024q3',), ('2024q3',), ('2024q3',), ('2024q3',), ('2024q3',), ('2024q3',),

# Loop

In [16]:
def create_sampled_table_and_join(filename, raw_data_folder, primary_key):
        temp_cursor = conn.cursor()
        table_name = os.path.splitext(filename)[0]  # Extract table name without extension
        file_path = raw_data_folder +'/'+ filename  # Construct full file path
        print(f"filename: {filename}")
        print(f"raw_data_folder: {raw_data_folder}")
        print(f"primary_key: {primary_key}")
        print(f"derived file_path: {file_path}")
        print(f"table_name: {table_name}")
        if not os.path.isfile(file_path):
            print(f"Exiting early because '{filename}'.tsv does not exist in '{raw_data_folder}")
            return
        # Retrieve primary keys from the FUND_REPORTED_HOLDING table
        print(f"Now on {file_path}")
        primary_key_query = f"SELECT {primary_key} FROM FUND_REPORTED_HOLDING;"
        temp_cursor.execute(primary_key_query)
        primary_keys = {row[0] for row in temp_cursor.fetchall()}  # Store keys in a set for faster lookup

        with open(file_path, 'r', newline='') as file:
            reader = csv.reader(file, delimiter='\t')
            column_names = next(reader)  # Get the first row as column names

            # Create the new table with "_NEW" suffix
            create_table_query = f"CREATE TABLE IF NOT EXISTS {table_name}_NEW ({', '.join([f'{col} TEXT' for col in column_names])});"
            print(f"Creating table for {table_name} |||| with columns {', '.join([f'{col} TEXT' for col in column_names])}")
            temp_cursor.execute(create_table_query)
            
            # Insert only rows that match the primary keys
            for row in reader:
                row_primary_key = row[column_names.index(primary_key)]  # Retrieve primary key value from the current row

                if row_primary_key in primary_keys:  # Check if the row's primary key exists
                    insert_query = f"INSERT INTO {table_name}_NEW ({', '.join(column_names)}) VALUES ({', '.join(['?'] * len(column_names))});"
                    temp_cursor.execute(insert_query, row)

        # Drop columns with more than 85% missing values
        align_columns(table_name,f"{table_name}_NEW")
        # Add a 'quarter' column with corresponding quarter
        add_quarter_column(raw_data_folder, f"{table_name}_NEW")

        # Join new data into original table
        join_query = f"INSERT INTO {table_name} SELECT * FROM {table_name}_NEW;"
        temp_cursor.execute(join_query) ######################################################

        # Drop the temporary new table
        drop_table_query = f"DROP TABLE {table_name}_NEW;"
        temp_cursor.execute(drop_table_query)

        print(f"Successfully processed '{filename}' and updated '{table_name}'.")
        temp_cursor.close()
        





In [17]:
directory_path = os.getcwd()+"/data"
print("directory_path:", directory_path)
# List all folders in the directory
folder_names = [name for name in os.listdir(directory_path) if os.path.isdir(os.path.join(directory_path, name))]
folder_names.pop()
print(folder_names)
for folder in folder_names:

    raw_data_folder = os.getcwd()+f"/data/{folder}"
    print("Processing ", raw_data_folder)
    print("="*40)

    # Separate query because ac_tables does not contain FUND_REPORTED_HOLDING
    create_sampled_table_and_join(f"FUND_REPORTED_HOLDING.tsv", raw_data_folder, "ACCESSION_NUMBER")
    for table in ac_tables:
        create_sampled_table_and_join(f"{table}.tsv", raw_data_folder, "ACCESSION_NUMBER")

    for table in hid_tables:
        create_sampled_table_and_join(f"{table}.tsv", raw_data_folder, "HOLDING_ID")

    print("="*40)
    print()
    print()

directory_path: d:\GithubRepos\PIMCO-Text2SQL/data
['2019q4_nport', '2020q1_nport', '2020q2_nport', '2020q3_nport', '2020q4_nport', '2021q1_nport', '2021q2_nport', '2021q3_nport', '2021q4_nport', '2022q1_nport', '2022q2_nport', '2022q3_nport', '2022q4_nport', '2023q1_nport', '2023q2_nport', '2023q3_nport', '2023q4_nport', '2024q1_nport', '2024q2_nport']
Processing  d:\GithubRepos\PIMCO-Text2SQL/data/2019q4_nport
filename: FUND_REPORTED_HOLDING.tsv
raw_data_folder: d:\GithubRepos\PIMCO-Text2SQL/data/2019q4_nport
primary_key: ACCESSION_NUMBER
derived file_path: d:\GithubRepos\PIMCO-Text2SQL/data/2019q4_nport/FUND_REPORTED_HOLDING.tsv
table_name: FUND_REPORTED_HOLDING
Now on d:\GithubRepos\PIMCO-Text2SQL/data/2019q4_nport/FUND_REPORTED_HOLDING.tsv
Creating table for FUND_REPORTED_HOLDING |||| with columns ACCESSION_NUMBER TEXT, HOLDING_ID TEXT, ISSUER_NAME TEXT, ISSUER_LEI TEXT, ISSUER_TITLE TEXT, ISSUER_CUSIP TEXT, BALANCE TEXT, UNIT TEXT, OTHER_UNIT_DESC TEXT, CURRENCY_CODE TEXT, CURREN

In [18]:
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")

# Fetch all results and print each table name
lalalalala = cursor.fetchall()
print("Tables in the database:")
for table in lalalalala:
    print(table[0])


Tables in the database:
REGISTRANT
FUND_REPORTED_INFO
INTEREST_RATE_RISK
BORROWER
MONTHLY_TOTAL_RETURN
MONTHLY_RETURN_CAT_INSTRUMENT
FUND_VAR_INFO
EXPLANATORY_NOTE
DEBT_SECURITY
CONVERTIBLE_SECURITY_CURRENCY
REPURCHASE_AGREEMENT
REPURCHASE_COUNTERPARTY
REPURCHASE_COLLATERAL
DERIVATIVE_COUNTERPARTY
DESC_REF_INDEX_COMPONENT
FUT_FWD_NONFOREIGNCUR_CONTRACT
FWD_FOREIGNCUR_CONTRACT_SWAP
FLOATING_RATE_RESET_TENOR
OTHER_DERIV
OTHER_DERIV_NOTIONAL_AMOUNT
FUND_REPORTED_HOLDING
SUBMISSION
BORROW_AGGREGATE
IDENTIFIERS
DEBT_SECURITY_REF_INSTRUMENT
SWAPTION_OPTION_WARNT_DERIV
DESC_REF_INDEX_BASKET
DESC_REF_OTHER
NONFOREIGN_EXCHANGE_SWAP
SECURITIES_LENDING


In [None]:
cursor.execute("PRAGMA table_info(REGISTRANT);")

# Fetch all results and print column names
columns = cursor.fetchall()
print("Column names in 'REGISTRANT' table:")
for column in columns:
    print(column[1])  # column[1] contains the name of each column

for table in lalalalala:
    cursor.execute(f"SELECT * FROM {table} LIMIT 10;")

    # Fetch the first 10 rows
    rows = cursor.fetchall()

    # Print the rows
    print(f"First 10 rows of '{table}' table:")
    for row in rows:
        print(row)


In [20]:
conn.commit()

conn.close()
