In [1]:
# main.py - Usage example
from pathlib import Path
from utils import DatabaseConnections
from preprocessing import PDFProcessor


def main():
    """Main function demonstrating the refactored PDF processing."""
    # Initialize processor
    processor = PDFProcessor()
    
    # Setup database connections
    db_conn = DatabaseConnections()
    
    try:
        # PostgreSQL connection
        postgres_conn = db_conn.connect_postgres()
        postgres_cursor = postgres_conn.cursor()
        
        # Neo4j connection (optional)
        neo4j_driver = db_conn.connect_neo4j()
        
        # Process PDFs from config
        processor.process_multiple_pdfs(postgres_cursor, neo4j_driver)
        
    except Exception as e:
        print(f"Error during processing: {e}")
    finally:
        # Clean up connections
        if 'postgres_cursor' in locals():
            postgres_cursor.close()
        if 'postgres_conn' in locals():
            postgres_conn.close()
        if 'neo4j_driver' in locals():
            neo4j_driver.close()


if __name__ == "__main__":
    main()

  from .autonotebook import tqdm as notebook_tqdm


🔄 Processing: Operation_Overload


Created a chunk of size 747, which is longer than the specified 500
Created a chunk of size 1734, which is longer than the specified 500


📄 Created 422 chunks for Operation_Overload
✅ Neo4j connection successful
💾 Saved clean text -> /home/fm-pc-lt321/Desktop/suyesh/coding Mountain/fina/codingMountain/clean/Operation_Overload_clean.txt
💾 Saved indicators -> /home/fm-pc-lt321/Desktop/suyesh/coding Mountain/fina/codingMountain/raw/Operation_Overload_indicators.json
✅ Processed 'Operation_Overload' (422 chunks, 90 pages)
🔄 Processing: Storm-1516 Technical Report


Created a chunk of size 1084, which is longer than the specified 500
Created a chunk of size 634, which is longer than the specified 500
Created a chunk of size 963, which is longer than the specified 500
Created a chunk of size 519, which is longer than the specified 500
Created a chunk of size 630, which is longer than the specified 500
Created a chunk of size 728, which is longer than the specified 500
Created a chunk of size 1012, which is longer than the specified 500
Created a chunk of size 764, which is longer than the specified 500
Created a chunk of size 535, which is longer than the specified 500
Created a chunk of size 530, which is longer than the specified 500
Created a chunk of size 727, which is longer than the specified 500
Created a chunk of size 668, which is longer than the specified 500
Created a chunk of size 663, which is longer than the specified 500
Created a chunk of size 516, which is longer than the specified 500
Created a chunk of size 758, which is longer t

📄 Created 337 chunks for Storm-1516 Technical Report
✅ Neo4j connection successful
💾 Saved clean text -> /home/fm-pc-lt321/Desktop/suyesh/coding Mountain/fina/codingMountain/clean/Storm-1516 Technical Report_clean.txt
💾 Saved indicators -> /home/fm-pc-lt321/Desktop/suyesh/coding Mountain/fina/codingMountain/raw/Storm-1516 Technical Report_indicators.json
✅ Processed 'Storm-1516 Technical Report' (337 chunks, 39 pages)
🔄 Processing: Doppelgänger Campaign Report


Created a chunk of size 532, which is longer than the specified 500
Created a chunk of size 1037, which is longer than the specified 500
Created a chunk of size 532, which is longer than the specified 500
Created a chunk of size 766, which is longer than the specified 500
Created a chunk of size 744, which is longer than the specified 500
Created a chunk of size 673, which is longer than the specified 500
Created a chunk of size 514, which is longer than the specified 500
Created a chunk of size 1360, which is longer than the specified 500
Created a chunk of size 868, which is longer than the specified 500
Created a chunk of size 1136, which is longer than the specified 500
Created a chunk of size 951, which is longer than the specified 500
Created a chunk of size 1374, which is longer than the specified 500
Created a chunk of size 624, which is longer than the specified 500
Created a chunk of size 831, which is longer than the specified 500
Created a chunk of size 1233, which is longe

📄 Created 175 chunks for Doppelgänger Campaign Report
✅ Neo4j connection successful
💾 Saved clean text -> /home/fm-pc-lt321/Desktop/suyesh/coding Mountain/fina/codingMountain/clean/Doppelgänger Campaign Report_clean.txt
💾 Saved indicators -> /home/fm-pc-lt321/Desktop/suyesh/coding Mountain/fina/codingMountain/raw/Doppelgänger Campaign Report_indicators.json
✅ Processed 'Doppelgänger Campaign Report' (175 chunks, 28 pages)
📊 Processing complete: 3/3 PDFs processed successfully
