In [33]:
%%capture
!pip install nvdlib streamlit google-generativeai datetime



In [34]:
from nvdlib import searchCVE
import streamlit as st
import openai
import datetime
from google.colab import userdata
import time

used AI to generate a more comprehensive keyword list to use it for the filter to help the company reduce the costs **;)**

In [35]:
KEYWORDS = [
    # --- General Terms ---
    "SCADA", "ICS", "Industrial Control", "HMI", "PLC", "RTU", "DCS",
    "SIS", "Process Control", "Operational Technology",

    # --- Protocols (The languages machines speak) ---
    "Modbus", "DNP3", "Profinet", "Profibus", "EtherNet/IP", "BACnet",
    "OPC UA", "IEC 61850", "EtherCAT", "CIP", "MMS",

    # --- Major Vendors (The big players) ---
    "Siemens", "Rockwell", "Schneider", "ABB", "Honeywell",
    "Emerson", "Mitsubishi", "Omron", "Yokogawa", "General Electric", "Fanuc",

    # --- Specific Product Lines (High probability of being OT) ---
    "Simatic", "WinCC", "Tia Portal",  # Siemens
    "Logix", "FactoryTalk", "Rslinx",  # Rockwell/Allen-Bradley
    "DeltaV", "Ovation",               # Emerson
    "Triconex", "Foxboro",             # Schneider
    "Centum", "ProSafe",               # Yokogawa
    "Wonderware", "Citect"             # AVEVA/Schneider
]

## Function to detect potential threats
this also filters out unrelated threats before sending to LLM to check

In [36]:
def is_potential_ot(description):
  if not description:
    return False
  return any(keyword.lower() in description.lower() for keyword in KEYWORDS)

## using Gemini to analyze the descriptions of the CVEs

In [None]:
import json
import os
from google import genai

genai.configure(api_key=os.environ.get("GEMINI_API_KEY"))

def analyze_with_gemini(description):
  model = genai.GenerativeModel(
      model_name='gemini-1.5-flash',
      generation_config={'response_mime_type':'application/json'} # set the output format
  )

  prompt=f"""
  You are an expert OT threat analyst with 160 IQ.
  Throughly analyze the following CVE description.
  Return ONLY a JSON object with exactly these keys:
  1. "ot_related" : boolean (True if OT/ICS/SCADA related, False otherwise).
  2. "reason" : string (an expert-level, detailed explanation of why. If "ot_related" is True, explain why this vulnerability is dangerous).

  ---------------------
  Description:
  ---------------------
  {description}
  """
  try:
    response = model.generate_content(prompt)
    return json.loads(response.text) #used this to convert from JSON object to python dict

  except Exception as e:
    print(f"AI error: {e}")
    return {"ot_related":False, "reason":"Error processing request"} # safe output

## Main Program

In [47]:
seen_cves = set()
approved_cves=dict()
dashboard_data=dict()
NVD_API = userdata.get('NVD_API')
while True:
  end = datetime.datetime.now()
  start = end - datetime.timedelta(days=1)
  r = searchCVE(pubStartDate=start, pubEndDate=end, key=NVD_API)
  print(f"Fetched {len(r)} records.")

  for cve in r[:5]: # Print first 5 to check
      print(f"{cve.id}: {cve.descriptions[0].value}")
  for cve in r:
    if cve.id not in seen_cves and cve.vulnStatus != "Rejected": #check if this is a new cve and wasn't rejected as a vulnerability
      seen_cves.add(cve.id) #to avoid reprocessing the same cve

      try:
        description = cve.descriptions[0].value #to get the description
        if is_potential_ot(description):  # check if the CVE can be OT related
            print(f"New potential threat is detected: {cve.id}")
            print(f"sending {cve.id} description to LLM for analysis...")
            response = analyze_with_gemini(description)

            if response['ot_related'] == True: # check if the response is OT related to keep/ignore it
              print(f"""
              ###################################
              APPROVED: {cve.id} is an OT threat!
              ###################################
              """)

              try:
                cvss = cve.metrics.cvssMetricV31[0].cvssData.baseScore #new CVEs might not have a severity score. Implemented a safety check
              except:
                cvss = 'N/A' # if no severity score. set the severity value to 'N/A'

              #initialized this to save every new approved CVE
              approved_cves[cve.id] = {'cvss':cvss,
                                       'description':f"{description}",
                                       'ai_insight':f'{response['reason']}'}
              new_entry = {cve.id}|approved_cves[cve.id] #concatenating the dict to output a separate new output to pass to the dashboard
        else:
          continue

      except:
        continue
  time.sleep(600)

Fetched 360 records.
CVE-2023-36331: Incorrect access control in the /member/orderList API of xmall v1.1 allows attackers to arbitrarily access other users' order details via manipulation of the query parameter userId.
CVE-2025-51567: A SQL Injection was found in the /exam/user/profile.php page of kashipara Online Exam System V1.0, which allows remote attackers to execute arbitrary SQL command to get unauthorized database access via the rname, rcollage, rnumber, rgender and rpassword parameters in a POST HTTP request.
CVE-2025-66802: Sourcecodester Covid-19 Contact Tracing System 1.0 is vulnerable to RCE (Remote Code Execution). The application receives a reverse shell (php) into imagem of the user enabling RCE.
CVE-2021-41074: A CSRF issue in index.php in QloApps hotel eCommerce 1.5.1 allows an attacker to change the admin's email address via a crafted HTML document.
CVE-2025-67147: Multiple SQL Injection vulnerabilities exist in amansuryawanshi Gym-Management-System-PHP 1.0 via the '

KeyboardInterrupt: 

##checking the data

In [69]:
r[0].metrics.cvssMetricV31[0].cvssData.baseScore
r[0].metrics.cvssMetricV31[0].cvssData.baseSeverity

'HIGH'

In [48]:
cves_to_llm

{'CVE-2024-14021': 'LlamaIndex (run-llama/llama_index) versions up to and including 0.11.6 contain an unsafe deserialization vulnerability in BGEM3Index.load_from_disk() in llama_index/indices/managed/bge_m3/base.py. The function uses pickle.load() to deserialize multi_embed_store.pkl from a user-supplied persist_dir without validation. An attacker who can provide a crafted persist directory containing a malicious pickle file can trigger arbitrary code execution when the victim loads the index from disk.',
 'CVE-2026-22695': 'LIBPNG is a reference library for use in applications that read, create, and manipulate PNG (Portable Network Graphics) raster image files. From 1.6.51 to 1.6.53, there is a heap buffer over-read in the libpng simplified API function png_image_finish_read when processing interlaced 16-bit PNGs with 8-bit output format and non-minimal row stride. This is a regression introduced by the fix for CVE-2025-65018. This vulnerability is fixed in 1.6.54.',
 'CVE-2026-22805