[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Jerryblessed/cybersecurityai/blob/main/project.ipynb)

In [None]:
# Install necessary libraries
!pip install networkx nx_arangodb cugraph-cu12 cudf-cu12 openai langchain_community python-telegram-bot matplotlib langgraph langchain_openai openai  nx-arangodb[llm]

In [None]:
import os
import networkx as nx
import nx_arangodb as nxadb
import torch
import cudf
import cugraph
from networkx.algorithms import community

from arango import ArangoClient
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from random import randint
import re

from langgraph.prebuilt import create_react_agent
from langgraph.checkpoint.memory import MemorySaver
from langchain_community.graphs import ArangoGraph
from langchain_community.chains.graph_qa.arangodb import ArangoGraphQAChain
from langchain_core.tools import tool

import openai
from openai import AzureOpenAI



In [None]:
# Load database credentials securely using environment variables
os.environ["DATABASE_HOST"] = os.getenv("DATABASE_HOST", "https://tutorials.arangodb.cloud:8529")
os.environ["DATABASE_USERNAME"] = os.getenv("DATABASE_USERNAME", "TUT8ipacx2t02ebj95v26ioo")
os.environ["DATABASE_PASSWORD"] = os.getenv("DATABASE_PASSWORD", "TUTb69rmmvlqygt7dceaxf8d")
os.environ["DATABASE_NAME"] = os.getenv("DATABASE_NAME", "TUTnhcypojdm6nocid8h4x3v")
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY", "9I4UEJweVUdih04Uv8AXcAxs5H8jSQRfwaugcSQYHcI882wSpFvqJQQJ99BAACL93NaXJ3w3AAABACOGkv4f")

# Azure OpenAI API details

# ------------------------------------------------------------------------------

api_base = "https://thisisoajo.openai.azure.com/"  # Replace with your Azure OpenAI resource URL
AZURE_MODEL = "gpt-4o"
AZURE_API_KEY = os.environ["OPENAI_API_KEY"]  # Using the same environment variable
api_version = "2023-06-01-preview"



In [None]:
def azure_chat(prompt):
    """Interact with GPT-4o via Azure OpenAI API."""
    client = AzureOpenAI(
        api_key=AZURE_API_KEY,
        api_version=api_version,
        base_url=f"{api_base}/openai/deployments/{AZURE_MODEL}"
    )
    response = client.chat.completions.create(
        model=AZURE_MODEL,
        messages=[
            {"role": "system", "content": "A network engineer"},
            {"role": "user", "content": prompt}
        ],
        max_tokens=500,
        temperature=0.7,
    )
    return response.choices[0].message.content.strip()

# ------------------------------------------------------------------------------
# Step 1: Create a Cybersecurity Vulnerability Graph in NetworkX

def load_vulnerabilities_from_csv(file_path="cve.csv"):
    df = pd.read_csv(file_path)
    vulnerabilities = df.to_dict(orient="records")
    return vulnerabilities

# Create a Cybersecurity Vulnerability Graph in NetworkX
def create_vulnerability_graph():
    G = nx.Graph()
    vulnerabilities = load_vulnerabilities_from_csv()
    
    
    for vuln in vulnerabilities:
      cve_id = vuln["cve_id"]
      pub_date = vuln["pub_date"]
      mod_date = vuln["mod_date"]
      cwe_code = vuln["cwe_code"]
      severity = vuln["cvss"]

      # Add CVE Node
      G.add_node(cve_id, severity=severity, summary=vuln["summary"])

      # Add CWE Node & Edge
      if pd.notna(cwe_code):  # Ensure CWE exists
          cwe_node = f"CWE-{cwe_code}"
          G.add_node(cwe_node, name=vuln["cwe_name"])
          G.add_edge(cve_id, cwe_node, relation="has_cwe")

      # Add Date Nodes & Edges
      if pd.notna(pub_date):
          G.add_node(pub_date, type="date")
          G.add_edge(cve_id, pub_date, relation="published_on")

      if pd.notna(mod_date):
          G.add_node(mod_date, type="date")
          G.add_edge(cve_id, mod_date, relation="modified_on")

      # Add CVSS Score Edge
      if pd.notna(severity):
          severity_node = f"CVSS-{severity}"
          G.add_node(severity_node, type="severity")
          G.add_edge(cve_id, severity_node, relation="has_severity")


    
    return G
G_nx = create_vulnerability_graph()


# ------------------------------------------------------------------------------
# Step 2: Store the Graph in ArangoDB using nx_arangodb connection style.
G_adb = nxadb.Graph(incoming_graph_data=G_nx, name="VulnerabilityGraph")

# (Optional) Modify nodes/edges. For example, update a vulnerability's description.
if "CVE-2024-1234" in G_adb.nodes:
    G_adb.nodes["CVE-2024-1234"]["description"] = "Critical SQL injection vulnerability."

# ------------------------------------------------------------------------------
# Step 3: GPU-Accelerated Graph Analytics with CPU Fallback.
def analyze_graph(graph):
    """Compute PageRank using NVIDIA cuGraph if GPU is available, else use NetworkX."""
    if torch.cuda.is_available():
        print("Using GPU with cuGraph")
        # Create a DataFrame of edges for cuGraph.
        gdf = cudf.DataFrame(list(graph.edges()), columns=["source", "destination"])
        G_cu = cugraph.Graph()
        G_cu.from_cudf_edgelist(gdf, source="source", destination="destination")
        pagerank_df = cugraph.pagerank(G_cu)
        # Convert the cudf DataFrame to a dictionary.
        pagerank_scores = {row["vertex"]: row["pagerank"] for row in pagerank_df.to_pandas().to_dict("records")}
    else:
        print("Using CPU with NetworkX")
        pagerank_scores = nx.pagerank(graph)
    return pagerank_scores

pagerank_scores = analyze_graph(G_nx)
print("Pagerank Scores:", pagerank_scores)

# Update the stored ArangoDB graph with PageRank values.
for node, score in pagerank_scores.items():
    if node in G_adb.nodes:
        G_adb.nodes[node]["pagerank"] = score

# ------------------------------------------------------------------------------
# Step 4: Query and Chat with the Graph using GraphRAG capabilities.
# Instead of using the built-in chat method, we now use Azure OpenAI.
azure_response = azure_chat("Hello, which vulnerability has the highest page rank value?")
print("Azure OpenAI Response:", azure_response)

# ------------------------------------------------------------------------------
# Step 5: Define a tool to convert text to AQL and then back to text using Azure OpenAI.
@tool
def text_to_aql_to_text(query: str):
    """
    This tool translates a natural language query into AQL,
    executes the query, and translates the result back into natural language.
    Uses Azure OpenAI for language processing.
    """
    result = azure_chat(query)
    return str(result)

# Optionally, test the tool:
print(text_to_aql_to_text("What vulnerabilities are present in the graph?"))


In [None]:
# Load vulnerabilities dynamically from CSV
def load_vulnerabilities_from_csv(file_path="cve.csv"):
    df = pd.read_csv(file_path)
    vulnerabilities = df.to_dict(orient="records")
    return vulnerabilities

# Create a Cybersecurity Vulnerability Graph in NetworkX
def create_vulnerability_graph():
    G = nx.Graph()
    vulnerabilities = load_vulnerabilities_from_csv()
    
    for vuln in vulnerabilities:
        G.add_node(vuln["CVE_ID"], severity=vuln["severity"], exploit=vuln["exploit"])
        G.add_edge(vuln["CVE_ID"], vuln["date_published"])
    
    return G

G_nx = create_vulnerability_graph()

# Store the Graph in ArangoDB using nx_arangodb connection style.
G_adb = nxadb.Graph(incoming_graph_data=G_nx, name="VulnerabilityGraph3rd")

if "CVE-2024-1234" in G_adb.nodes:
    G_adb.nodes["CVE-2024-1234"]["description"] = "Critical SQL injection vulnerability."

# GPU-Accelerated Graph Analytics with CPU Fallback.
def analyze_graph(graph):
    if torch.cuda.is_available():
        print("Using GPU with cuGraph")
        gdf = cudf.DataFrame(list(graph.edges()), columns=["source", "destination"])
        G_cu = cugraph.Graph()
        G_cu.from_cudf_edgelist(gdf, source="source", destination="destination")
        pagerank_df = cugraph.pagerank(G_cu)
        pagerank_scores = {row["vertex"]: row["pagerank"] for row in pagerank_df.to_pandas().to_dict("records")}
    else:
        print("Using CPU with NetworkX")
        pagerank_scores = nx.pagerank(graph)
    return pagerank_scores

pagerank_scores = analyze_graph(G_nx)
print("Pagerank Scores:", pagerank_scores)

# Update the stored ArangoDB graph with PageRank values.
for node, score in pagerank_scores.items():
    if node in G_adb.nodes:
        G_adb.nodes[node]["pagerank"] = score



In [None]:
# Query and Chat with the Graph using GraphRAG capabilities.
azure_response = azure_chat("Hello, which vulnerability has the highest page rank value?")
print("Azure OpenAI Response:", azure_response)

@tool
def text_to_aql_to_text(query: str):
    result = azure_chat(query)
    return str(result)
