In [None]:
import os
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.document_loaders import TextLoader
import json

# 1. Load API Key from .env file
load_dotenv()

# --- Configuration ---
# Use this for standard OpenAI API
# llm = ChatOpenAI(model="gpt-4o")

# Or configure for Azure OpenAI Service
llm = ChatOpenAI(
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    api_version="2024-02-01",
    azure_deployment=os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME"), # Your GPT-4o deployment name
    model="gpt-4o"
)


# 2. Load the unstructured design document
loader = TextLoader("./system-spec.txt")
document = loader.load()

# 3. Create the detailed prompt template
# This is the most important part to refine!
prompt_template = """
You are an expert cybersecurity architect specializing in threat modeling.
Your task is to read the provided system design document and extract the core components for a Data Flow Diagram (DFD) in JSON format.

The JSON output must contain lists for 'assets', 'processes', and 'data_flows'.
- An 'asset' is a data store where data rests (e.g., database, log file).
- A 'process' is a component that acts on data (e.g., API, service).
- A 'data_flow' describes data movement between a source and a destination process/asset.

System Design Document:
---
{document_text}
---

Based on the document, generate the JSON output.
"""

prompt = ChatPromptTemplate.from_template(prompt_template)
output_parser = StrOutputParser()

# 4. Create the chain to link everything together
chain = prompt | llm | output_parser

# 5. Invoke the chain and print the output
print("--- Invoking LLM Chain ---")
response = chain.invoke({"document_text": document[0].page_content})
print("--- LLM Output ---")
print(response)

# 6. (Optional but recommended) Validate if the output is valid JSON
try:
    parsed_json = json.loads(response)
    print("\n--- JSON Validation: Success ---")
    # You can now work with the parsed_json object
except json.JSONDecodeError as e:
    print(f"\n--- JSON Validation: Failed --- \nError: {e}")