In [15]:
from crewai import Agent, Task, Crew, Process
from crewai_tools import JSONSearchTool
from crewai_tools import WebsiteSearchTool
#from crewai_tools import SerperDevTool

from langchain_openai import ChatOpenAI
from langchain_anthropic import ChatAnthropic
from typing import List
import asyncio

from dotenv import dotenv_values
from dotenv import load_dotenv
import os
import requests
import json
from typing import List, Dict

In [16]:
    #vulnerability_description = "A buffer overflow vulnerability in the XYZ software allows remote attackers to execute arbitrary code."
    #vulnerability_description = "The Cisco Discovery Protocol implementation in Cisco IOS XR Software does not do improper validation of string input from certain fields which could allow an unauthenticated, adjacent attacker to execute arbitrary code or cause a reload on an affected device. The vulnerability is due to improper validation of string input from certain fields in Cisco Discovery Protocol messages. An attacker could exploit this vulnerability by sending a malicious Cisco Discovery Protocol packet to an affected device."
    #vulnerability_description = "The Widget Connector macro in Atlassian Confluence Server before version 6.6.12 (the fixed version for 6.6.x), from version 6.7.0 before 6.12.3 (the fixed version for 6.12.x), from version 6.13.0 before 6.13.3 (the fixed version for 6.13.x), and from version 6.14.0 before 6.14.2 (the fixed version for 6.14.x), allows remote attackers to achieve path traversal and remote code execution on a Confluence Server or Data Center instance via server-side template injection."
    #vulnerability_description = "The Popup Manager WordPress plugin through 1.6.6 does not have authorisation and CSRF check when creating/updating popups, and is missing sanitisation as well as escaping, which could allow unauthenticated attackers to create arbitrary popups and add Stored XSS payloads as well"
    vulnerability_description = "ProductX contains a default SSH public key in the authorized_keys file. A remote attacker could use this key to gain root privileges."

In [17]:
OUTPUT_DIR = "./data_out/"

In [18]:
# load .env file to environment
load_dotenv()

config = dotenv_values("../.env")

# Set up API keys (replace with your actual API keys)
os.environ["OPENAI_API_KEY"] = config['OPENAI_API_KEY']
os.environ["ANTHROPIC_API_KEY"] = config['ANTHROPIC_API_KEY']
#os.environ["GOOGLE_API_KEY"] = config['GOOGLE_API_KEY']
#os.environ["SERPER_API_KEY"] = config['SERPER_API_KEY']

#If using env vars
#OPENAI_API_KEY=config['OPENAI_API_KEY']
#ANTHROPIC_API_KEY=config['ANTHROPIC_API_KEY']
#GOOGLE_API_KEY=config['GOOGLE_API_KEY']

# Initialize the language model
#llm = ChatOpenAI(temperature=0)
llm = ChatAnthropic(model="claude-3-5-sonnet-20240620")

In [19]:
#==================================================
# Define the tools
#==================================================

# Configure WebsiteSearchTool to search NVD
#nvd_search_tool = WebsiteSearchTool(site_url="https://nvd.nist.gov/vuln/")
nvd_search_tool = WebsiteSearchTool(site_url="https://nvd.nist.gov/vuln/search/results?form_type=Basic&results_type=overview&query")





In [20]:
#==================================================
# Prompts
#==================================================

# Function to read the system prompt from a file
def read_system_prompt(file_path: str) -> str:
    with open(file_path, 'r') as file:
        return file.read()


keyphrases_extractor_prompt = read_system_prompt('../prompts/extract_key_entities/system.md')
assign_cwe_prompt = read_system_prompt('../prompts/assign_cwes/system.md')


In [21]:

#==================================================
# Define the agents
#==================================================

keyphrases_extractor = Agent(
    role='Keyphrases Extractor',
    goal='Extract key phrases from vulnerability descriptions',
    backstory='You are an expert in cybersecurity language and can identify crucial elements in vulnerability descriptions.',
    allow_delegation=False,
    llm=llm,
    verbose=True,  # Enable verbose mode
    memory=True,   # Enable memory if needed
    system_prompt=keyphrases_extractor_prompt
)

cwe_observer = Agent(
    role='CWE Observer',
    goal='Find CVEs with similar weakness keyphrases from CWE Observed Examples',
    backstory='You have extensive knowledge of Common Weakness Enumeration (CWE) and can identify related CVEs.',
    allow_delegation=False,
    llm=llm,
    verbose=True,  # Enable verbose mode
    memory=True,   # Enable memory if needed
)

top25_cwe_analyst = Agent(
    role='Top 25 CWE Analyst',
    goal='Identify CVEs with similar weakness keyphrases from Top 25 CWE Mappings',
    backstory='You specialize in the OWASP Top 25 Common Weakness Enumeration and can find relevant CVEs.',
    allow_delegation=False,
    llm=llm,
    verbose=True,  # Enable verbose mode
    memory=True,   # Enable memory if needed
)

# Rely on LLM training data instead of a search via NVD API
nvd_researcher = Agent(
    role='NVD Researcher',
    goal='For each [ROOTCAUSE] and [WEAKNESS] keyphrase, search the National Vulnerability Database for CVEs with similar [ROOTCAUSE] and [WEAKNESS] keyphrases.',
    backstory='You are an expert in navigating the National Vulnerability Database and can find pertinent CVEs with similar [ROOTCAUSE] and [WEAKNESS] keyphrases.',
    system_prompt="""You are an expert in navigating the National Vulnerability Database and can find pertinent CVEs with similar [ROOTCAUSE] and [WEAKNESS] keyphrases.
    You will be provided with a list of [ROOTCAUSE] and [WEAKNESS] keyphrases and you need to search the National Vulnerability Database for CVEs with similar [ROOTCAUSE] and [WEAKNESS] keyphrases.
    It does not matter if the [ROOTCAUSE] and [WEAKNESS] keyphrases are not exactly the same, as long as they are similar.
    It does not matter about the other keyphrases, they are not relevant.
    You will need to provide a list of CVEs that are similar to the [ROOTCAUSE] and [WEAKNESS] keyphrases and their CWE-IDs. 
    """,
    allow_delegation=False,
    llm=llm,
    verbose=True,
    memory=True,
    tools=[nvd_search_tool],  # Assign the custom WebsiteSearchTool to the agent

)

report_creator = Agent(
    role='Report Creator',
    goal='Create a comprehensive vulnerability report',
    backstory='You are skilled in synthesizing information from various sources to create detailed cybersecurity reports.',
    allow_delegation=False,
    llm=llm,
    system_prompt=assign_cwe_prompt,
    verbose=True,  # Enable verbose mode
    memory=True,   # Enable memory if needed

)

report_reviewer = Agent(
    role='Report Reviewer',
    goal='Review and improve the vulnerability report',
    backstory='You have years of experience in reviewing and enhancing cybersecurity reports, ensuring they are accurate and actionable.',
    allow_delegation=False,
    llm=llm,
    verbose=True,  # Enable verbose mode
    memory=True,   # Enable memory if needed
)




#==================================================
# Define the tasks
#==================================================     

extract_key_phrases = Task(
    description=f"Extract key phrases from the following vulnerability description: {vulnerability_description} and provide the details in a structured format.",
    expected_output="The output should contain: [WEAKNESS], [PRODUCT], [VERSION], [ATTACKER], [IMPACT], [VECTOR], [ROOTCAUSE].",
    output_file=os.path.join(OUTPUT_DIR, "key_phrases.txt"),
    agent=keyphrases_extractor,
)

find_similar_cves_from_cwes_observed = Task(
    description="Find CVEs with similar weakness keyphrases from CWE Observed Examples using the extracted key phrases.",
    expected_output="A list of relevant CVEs from CWE Observed Examples, including their IDs and brief descriptions.",
    async_execution=True,
    output_file=os.path.join(OUTPUT_DIR, "similar_cves_from_cwes_observed.txt"),
    agent=cwe_observer,
    context=[extract_key_phrases]
)

find_similar_cves_from_top25 = Task(
    description="Identify CVEs with similar weakness keyphrases from Top 25 CWE Mappings using the extracted key phrases.",
    expected_output="A list of relevant CVEs from Top 25 CWE Mappings, including their Weakness Enumeration CWE-IDs, and brief descriptions.",
    async_execution=True,
    output_file=os.path.join(OUTPUT_DIR, "similar_cves_from_top25.txt"),
    agent=top25_cwe_analyst,
    context=[extract_key_phrases]
)

find_similar_cves_from_nvd = Task(
    description="Discover CVEs with similar weakness keyphrases from the National Vulnerability Database using the extracted key phrases.",
    expected_output="A comprehensive list of relevant CVEs from the NVD, including their Weakness Enumeration CWE-IDs, and brief descriptions.",
    async_execution=True,
    output_file=os.path.join(OUTPUT_DIR, "similar_cves_from_nvd.txt"),
    agent=nvd_researcher,
    context=[extract_key_phrases]
)

create_vulnerability_report = Task(
    description="Create a comprehensive report based on the vulnerability description, extracted key phrases, and identified CVEs from all sources.",
    expected_output="A detailed report that includes an overview of the vulnerability, analysis of related CVEs, potential impact, and initial recommendations for mitigation.",
    output_file=os.path.join(OUTPUT_DIR, "vulnerability_report_draft.txt"),
    agent=report_creator,
    context=[find_similar_cves_from_cwes_observed, find_similar_cves_from_top25, find_similar_cves_from_nvd]
)

review_vulnerability_report = Task(
    description="Review the created report, provide feedback, and improve it if necessary.",
    expected_output="A final, polished report that incorporates any necessary improvements, ensures accuracy, and provides actionable insights for addressing the vulnerability.",
    output_file=os.path.join(OUTPUT_DIR, "vulnerability_report_final.txt"),
    agent=report_reviewer,
)



In [22]:

# Main function to run the vulnerability analysis
def analyze_vulnerability(vulnerability_description: str) -> str:
    vulnerability_analysis_crew = Crew(
        #agents=[keyphrases_extractor, cwe_observer, top25_cwe_analyst, nvd_researcher, report_creator, report_reviewer],
        #tasks=[extract_key_phrases, find_similar_cves_from_cwes_observed, find_similar_cves_from_top25, find_similar_cves_from_nvd, create_vulnerability_report,review_vulnerability_report],
        agents=[keyphrases_extractor, nvd_researcher, report_creator],
        tasks=[extract_key_phrases, find_similar_cves_from_nvd, create_vulnerability_report],
        process=Process.sequential
    )
    result = vulnerability_analysis_crew.kickoff()
    return result

# Example usage
if __name__ == "__main__":

    
    final_report = analyze_vulnerability(vulnerability_description)
    print("Final Vulnerability Analysis Report:")
    print(final_report)
    
    



[1m> Entering new CrewAgentExecutor chain...[0m
[32;1m[1;3mThought: I now can give a great answer

Final Answer: 
[WEAKNESS]: Default SSH public key present in authorized_keys file
[PRODUCT]: ProductX
[VERSION]: Not specified
[ATTACKER]: Remote attacker
[IMPACT]: Gain root privileges
[VECTOR]: SSH access using the default public key
[ROOTCAUSE]: Presence of a default SSH public key in the authorized_keys file

This structured format extracts the key phrases from the given vulnerability description, highlighting the critical elements related to the security issue. The weakness is identified as the presence of a default SSH public key in the authorized_keys file. The affected product is ProductX, though no specific version is mentioned. The potential attacker is described as a remote attacker who could exploit this vulnerability. The impact of successful exploitation would be gaining root privileges on the affected system. The attack vector involves using SSH access with the defaul