In [None]:
!pip install gradio transformers torch



In [None]:
import requests
import gradio as gr
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
import torch
import pandas as pd

# =========================
# Load Model
# =========================
model_name = "UnknownNemesis/cyner_ner_model"

def load_model():
    model = AutoModelForTokenClassification.from_pretrained(model_name)
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    device = 0 if torch.cuda.is_available() else -1
    ner = pipeline(
        "token-classification",
        model=model,
        tokenizer=tokenizer,
        aggregation_strategy="simple",
        device=device
    )
    return ner

ner_pipeline = load_model()

# =========================
# Fetch CVE data from NIST NVD API
# =========================
def fetch_nvd_data(keyword, max_results=15):
    url = f"https://services.nvd.nist.gov/rest/json/cves/2.0?keywordSearch={keyword}&resultsPerPage={max_results}"
    response = requests.get(url)
    if response.status_code != 200:
        return []
    data = response.json()
    results = []
    for item in data.get("vulnerabilities", []):
        cve = item["cve"]
        cve_id = cve["id"]
        desc = next((d["value"] for d in cve["descriptions"] if d["lang"] == "en"), "")
        if desc:
            results.append({"cve_id": cve_id, "description": desc})
    return results

# =========================
# Extract Entities
# =========================
def extract_entities(text):
    entities = ner_pipeline(text)
    grouped = {}
    for e in entities:
        label = e["entity_group"]
        grouped.setdefault(label, set()).add(e["word"].strip())
    return grouped

# =========================
# Main Processing Function
# =========================
def analyze_vulnerabilities(keyword):
    if not keyword.strip():
        return "⚠️ Please enter a keyword.", None

    records = fetch_nvd_data(keyword)
    if not records:
        return f"⚠️ No CVEs found for keyword '{keyword}'.", None

    # Prepare table data
    table_rows = []
    for rec in records:
        entities = extract_entities(rec["description"])
        table_rows.append({
            "CVE ID": rec["cve_id"],
            "Description": rec["description"],
            **{label: ", ".join(sorted(words)) for label, words in entities.items()}
        })

    df = pd.DataFrame(table_rows)
    return f"✅ Found {len(records)} CVE(s) related to **{keyword}**.", df

# =========================
# Gradio Interface
# =========================
with gr.Blocks(title="NIST Vulnerability Info Extractor") as demo:
    gr.Markdown("# 🔍 NIST Vulnerability Info Extractor")
    gr.Markdown("Enter a product or technology name to fetch CVEs from the NIST NVD database and extract key information using an NLP NER model.")

    with gr.Row():
        keyword = gr.Textbox(label="🔑 Enter Keyword (e.g., Cisco, Windows, Apache, Linux)", placeholder="Cisco", lines=1)
        btn = gr.Button("Extract Information")

    output_msg = gr.Markdown()
    output_table = gr.Dataframe(headers=["CVE ID", "Description", "System", "Indicator", "Threat_group", "OS", "Attack_Vector", "Prerequisite", "Output"], wrap=True)

    btn.click(analyze_vulnerabilities, inputs=keyword, outputs=[output_msg, output_table])

    gr.Markdown("---")
    gr.Markdown("**Note:** Data retrieved from [NIST NVD](https://nvd.nist.gov/). Model fine-tuned for extracting environments, OS, attack vectors, prerequisites, and outcomes.")

demo.launch()


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/735M [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/23.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/970 [00:00<?, ?B/s]

Device set to use cpu


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://01e1e96a5262769556.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


