In [22]:

import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI

In [2]:
# Initialize and constants

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key looks good so far")
else:
    print("There might be a problem with your API key? Please visit the troubleshooting notebook!")
    
MODEL = 'gpt-4o-mini'
openai = OpenAI()

API key looks good so far


In [4]:
# System prompt
system_prompt = "You are an expert in cybersecurity and threat modeling \
Your task is to generate structured Data Flow Diagrams (DFDs) with a focus on identifying potential threats"

system_prompt += """\nFollow these standards: 
    - Identify all external entities (e.g., user, third-party services, attackers). 
    - Identify all processes (e.g., application components, APIs, services). 
    - Identify all data stores (e.g., databases, file storage, logs). 
    - Identify all data flows (connections between entities, processes, and stores). 
    - Clearly specify trust boundaries (separating external actors and internal systems). 
    - Use STRIDE methodology (Spoofing, Tampering, Repudiation, Information Disclosure, Denial of Service, Elevation of Privilege) to highlight threats for each data flow or process.
"""

system_prompt += """\nOutput must contain: 
    1. A valid Mermaid or PlantUML diagram code block. 
    2. A threat analysis table in Markdown. \n
Example Output (Mermaid) \n
flowchart TD;
  subgraph Internet;
    User[External Entity: Customer];
  end;

  subgraph App[Web Application];
    WebApp[Process: E-commerce Website];
    DB[(Data Store: MySQL Database)];
  end;

  subgraph ExternalServices;
    PayPal[External Entity: PayPal API];
  end;

  User -->|Login Credentials| WebApp;
  WebApp -->|User Details Query| DB;
  WebApp -->|Payment Request| PayPal;
"""
  


In [50]:
print(system_prompt)

You are an expert in cybersecurity and threat modeling Your task is to generate structured Data Flow Diagrams (DFDs) with a focus on identifying potential threats
Follow these standards: 
    - Identify all external entities (e.g., user, third-party services, attackers). 
    - Identify all processes (e.g., application components, APIs, services). 
    - Identify all data stores (e.g., databases, file storage, logs). 
    - Identify all data flows (connections between entities, processes, and stores). 
    - Clearly specify trust boundaries (separating external actors and internal systems). 
    - Use STRIDE methodology (Spoofing, Tampering, Repudiation, Information Disclosure, Denial of Service, Elevation of Privilege) to highlight threats for each data flow or process.

Output must contain: 
    1. A valid Mermaid or PlantUML diagram code block. 
    2. A threat analysis table in Markdown. 

Example Output (Mermaid) 

flowchart TD;
  subgraph Internet;
    User[External Entity: Custo

In [5]:
def user_prompt_dfd():
    usecase = input("Enter the use case for the DFD:")
    user_prompt = f"""
    Generate a Data Flow Diagram (DFD) in Mermaid syntax for the following system use case:

    {usecase}

    Requirements:
    1. Identify external entities, processes, and data stores.
    2. Show all data flows with appropriate labels.
    3. Group elements under trust boundaries (e.g., Internet, Application, External Services).
    4. After the diagram, provide a Threat Analysis (STRIDE) in a Markdown table with:
    - Element
    - STRIDE category
    - Threat description
    - Mitigation

    The output must contain:
    - A runnable Mermaid code block (```mermaid ... ```).
    - A Markdown table for the STRIDE threat analysis.
                        """
    return user_prompt

In [58]:
print(user_prompt_dfd())


    Generate a Data Flow Diagram (DFD) in Mermaid syntax for the following system use case:

    We want a warehouse system to track incoming and outgoing goods. Staff should be able to update stock levels, and managers can view reports. The system should also connect to suppliers for reordering. Security considerations should include preventing unauthorized changes to inventory records, data leaks, or service disruption.

    Requirements:
    1. Identify external entities, processes, and data stores.
    2. Show all data flows with appropriate labels.
    3. Group elements under trust boundaries (e.g., Internet, Application, External Services).
    4. After the diagram, provide a Threat Analysis (STRIDE) in a Markdown table with:
    - Element
    - STRIDE category
    - Threat description
    - Mitigation

    The output must contain:
    - A runnable Mermaid code block (```mermaid ... ```).
    - A Markdown table for the STRIDE threat analysis.
                        


In [6]:
def messages_for():
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_dfd()}
    ]

In [7]:
messages_for()

[{'role': 'system',
  'content': 'You are an expert in cybersecurity and threat modeling Your task is to generate structured Data Flow Diagrams (DFDs) with a focus on identifying potential threats\nFollow these standards: \n    - Identify all external entities (e.g., user, third-party services, attackers). \n    - Identify all processes (e.g., application components, APIs, services). \n    - Identify all data stores (e.g., databases, file storage, logs). \n    - Identify all data flows (connections between entities, processes, and stores). \n    - Clearly specify trust boundaries (separating external actors and internal systems). \n    - Use STRIDE methodology (Spoofing, Tampering, Repudiation, Information Disclosure, Denial of Service, Elevation of Privilege) to highlight threats for each data flow or process.\n\nOutput must contain: \n    1. A valid Mermaid or PlantUML diagram code block. \n    2. A threat analysis table in Markdown. \n\nExample Output (Mermaid) \n\nflowchart TD;\n  

In [8]:
def generate_dfd_and_threats():
    response = openai.chat.completions.create(
        model=MODEL,
        messages=messages_for(),
        temperature=0.2,
        max_tokens=1500,
    )
    return response.choices[0].message.content

In [12]:
result = generate_dfd_and_threats()

In [10]:
print(result)

```mermaid
flowchart TD;
  subgraph Internet;
    Patient[External Entity: Patient];
    Doctor[External Entity: Doctor];
  end;

  subgraph App[Healthcare Booking System];
    WebApp[Process: Booking Application];
    DB[(Data Store: Patient Records)];
    NotificationService[Process: Notification Service];
  end;

  Patient -->|Login Credentials| WebApp;
  Patient -->|View Available Doctors| WebApp;
  WebApp -->|Doctor List| Patient;
  Patient -->|Schedule Appointment| WebApp;
  WebApp -->|Store Patient Record| DB;
  WebApp -->|Send Notification| NotificationService;
  NotificationService -->|Notify Doctor| Doctor;

  %% Trust boundaries
  classDef external fill:#f9f,stroke:#333,stroke-width:2px;
  classDef internal fill:#bbf,stroke:#333,stroke-width:2px;
  class Internet external;
  class App internal;
```

### Threat Analysis (STRIDE)

| Element                     | STRIDE Category         | Threat Description                                      | Mitigation                      

In [None]:
parts = result.split("### Threat Analysis (STRIDE)")
print(parts[0])
display(Markdown(parts[1]))

```mermaid
flowchart TD;
  subgraph Internet;
    Customer[External Entity: Customer];
    Attacker[External Entity: Attacker];
  end;

  subgraph App[Mobile Application];
    MobileApp[Process: Food Delivery App];
    OrderProcessing[Process: Order Processing];
    NotificationService[Process: Notification Service];
    DB[(Data Store: Menu Database)];
    OrderDB[(Data Store: Order Database)];
  end;

  subgraph ExternalServices;
    PaymentGateway[External Entity: Third-Party Payment Service];
  end;

  Customer -->|Browse Menu| MobileApp;
  Customer -->|Place Order| MobileApp;
  MobileApp -->|Menu Query| DB;
  MobileApp -->|Order Details| OrderProcessing;
  OrderProcessing -->|Payment Request| PaymentGateway;
  PaymentGateway -->|Payment Confirmation| OrderProcessing;
  OrderProcessing -->|Notify Restaurant| NotificationService;
  NotificationService -->|Order Notification| Restaurant[External Entity: Restaurant];
  OrderProcessing -->|Store Order Details| OrderDB;
  
  Attacker --



| Element                     | STRIDE Category           | Threat Description                                           | Mitigation                                      |
|----------------------------|--------------------------|------------------------------------------------------------|------------------------------------------------|
| Customer                    | Spoofing                 | Fake users can create accounts to place fraudulent orders.  | Implement strong authentication (e.g., 2FA).  |
| MobileApp                   | Tampering                | An attacker may modify requests to manipulate orders.      | Use HTTPS and validate inputs on the server.   |
| PaymentGateway              | Information Disclosure    | Sensitive payment details could be intercepted.            | Use encryption for data in transit (TLS).      |
| OrderProcessing             | Repudiation              | Users may deny placing an order after the fact.            | Maintain logs of all transactions and actions.  |
| MobileApp                   | Denial of Service        | Attackers may overwhelm the app with requests.             | Implement rate limiting and DDoS protection.   |
| OrderProcessing             | Elevation of Privilege   | Unauthorized access to order processing functions.         | Enforce role-based access control (RBAC).       |

use this for DFD viz : https://www.eraser.io/make/free-mermaid-diagram-maker

or in Draw.io