In [None]:
import os
import json
import traceback

# Load all CVE reports into a structured format and save to JSONL
def load_cve_database_and_save_jsonl(cve_directory, output_jsonl):
    if not os.path.exists(cve_directory):
        print(f"Error: The directory {cve_directory} does not exist.")
        return
    
    print(f"Starting to process files in directory: {cve_directory}")
    cve_data = []
    total_files = 0
    processed_files = 0
    skipped_files = 0

    for root, _, files in os.walk(cve_directory):
        for file in files:
            total_files += 1
            if file.endswith(".json"):
                file_path = os.path.join(root, file)
                print(f"Processing file: {file_path}")
                try:
                    with open(file_path, 'r', encoding='utf-8') as f:
                        data = json.load(f)
                        cve_id = data.get("cveMetadata", {}).get("cveId", None)
                        title = data.get("containers", {}).get("cna", {}).get("title", "No Title")
                        description = " ".join(
                            [desc.get("value", "") for desc in data.get("containers", {}).get("cna", {}).get("descriptions", [])]
                        )
                        cve_data.append({
                            "cve_id": cve_id,
                            "title": title,
                            "description": description
                        })
                        processed_files += 1
                except json.JSONDecodeError as e:
                    print(f"Error decoding JSON in file: {file_path}")
                    print("Error details:", e)
                    print(traceback.format_exc())
                    skipped_files += 1
                except Exception as e:
                    print(f"Unexpected error processing file: {file_path}")
                    print("Error details:", e)
                    print(traceback.format_exc())
                    skipped_files += 1
    
    # Save to JSONL format
    print(f"Saving data to {output_jsonl}")
    try:
        with open(output_jsonl, 'w', encoding='utf-8') as jsonl_file:
            for entry in cve_data:
                json.dump(entry, jsonl_file)
                jsonl_file.write('\n')
        print(f"Data successfully saved to {output_jsonl}")
    except Exception as e:
        print(f"Error saving data to {output_jsonl}")
        print("Error details:", e)
        print(traceback.format_exc())

    # Summary of the process
    print("Processing Summary:")
    print(f"Total files found: {total_files}")
    print(f"Files successfully processed: {processed_files}")
    print(f"Files skipped due to errors: {skipped_files}")

# Example usage
cve_directory = "2025-03-26_all_CVEs_at_midnight.zip\\cves\\cves"
output_jsonl = "cve_database.jsonl"
load_cve_database_and_save_jsonl(cve_directory, output_jsonl)
