In [None]:
import os
import sqlite3
from datetime import datetime

# Path to the folder containing SEC filings
base_dir = 'sec-edgar-filings'

# Database connection
conn = sqlite3.connect('sec_filings.db')
c = conn.cursor()

# Create tables
c.execute('''
CREATE TABLE IF NOT EXISTS Documents (
    doc_id INTEGER PRIMARY KEY AUTOINCREMENT,
    file_name TEXT,
    file_path TEXT,
    date_loaded TIMESTAMP
)
''')

c.execute('''
CREATE TABLE IF NOT EXISTS TextualData (
    text_id INTEGER PRIMARY KEY AUTOINCREMENT,
    doc_id INTEGER,
    text_content TEXT,
    FOREIGN KEY (doc_id) REFERENCES Documents (doc_id)
)
''')

c.execute('''
CREATE TABLE IF NOT EXISTS NumericalData (
    data_id INTEGER PRIMARY KEY AUTOINCREMENT,
    doc_id INTEGER,
    stock_name TEXT,
    share_count INTEGER,
    share_value REAL,
    FOREIGN KEY (doc_id) REFERENCES Documents (doc_id)
)
''')

def process_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read()

    # Example of how you might extract and separate textual and numerical data
    # For now, we just save the whole content into TextualData for simplicity
    # This part needs to be adjusted according to the actual content structure and parsing needs

    # Insert document metadata
    c.execute('INSERT INTO Documents (file_name, file_path, date_loaded) VALUES (?, ?, ?)',
              (os.path.basename(file_path), file_path, datetime.now()))
    doc_id = c.lastrowid

    # Insert textual data (placeholder for actual parsing logic)
    c.execute('INSERT INTO TextualData (doc_id, text_content) VALUES (?, ?)', (doc_id, content))

    # Commit changes periodically
    conn.commit()

# Walk through the directory structure
for root, dirs, files in os.walk(base_dir):
    for file in files:
        if file.endswith('.txt'):
            process_file(os.path.join(root, file))

# Close the database connection
conn.close()

print("Database has been populated with files and preliminary parsing.")
