In [None]:
%load_ext autoreload
%autoreload 2

In [1]:
import json
import logging

logging.basicConfig(level=logging.WARNING)

from data.VulnerabilityReport import create_from_flama_json # this is the function that creates the VulnerabilityReport object from the json data
from ai.LLM.LLMServiceStrategy import LLMServiceStrategy 

In [3]:
from pathlib import Path

data_folder = Path('../../data/')
# We need the json data, so we are loading it from the file
with open(data_folder / Path('Dataset.json')) as f:
    data = json.load(f)

# Choose your Strategy
Right now, we support two strategies listed below.
Execute the one you like more :)

In [4]:
my_strategy = None

## Option 1) OpenAI - Remote
Uses OpenAIs GPT-4o by default.

Relatively cheap and quite good.


In [None]:
from ai.LLM.Stretegies.OpenAIService import OpenAIService

my_strategy = OpenAIService()

## Option 2) Anthropic - Remote
Uses Anthropics Claude 3 Opus by default.

Almost perfect but quite expensive.


In [None]:
from ai.LLM.Stretegies.OpenAIService import OpenAIService

my_strategy = OpenAIService()

## Option 3) OLLAMA - Local
Here, you can choose the model you want to use. If you don't know which one to choose, you can just leave it empty and the default model will be used. Available models can be found [here](https://ollama.com/library).

The performance (both in time and quality) depends heavily on the model used.

In [5]:
from ai.LLM.Stretegies.OLLAMAService import OLLAMAService # this is the service that uses ollama to generate solution

model_name=None # but can be anything from here https://ollama.com/library
my_strategy = OLLAMAService(model_name=model_name)

# LLM Service
This is the service that uses the strategy you chose. It is the top abstraction layer that will be used to generate solutions.

In [6]:
llm_service = LLMServiceStrategy(my_strategy)

# Data Setup
We instance a vulnerability report object and add findings to it. We also sort the findings by severity.
This object takes the LLM service and acts as main abstraction layer for the data.


In [7]:
n=5 # number of findings to import
vulnerability_report = create_from_flama_json(data, n = n, llm_service=llm_service) # here, we create the VulnerabilityReport object, consisting of a list of Finding objects which each have Solution objects. We pass the llm_service to the VulnerabilityReport object, but this can also just be omitted, in which case it will be created in each Finding object.
vulnerability_report.sort() # this will sort the findings by severity

<data.VulnerabilityReport.VulnerabilityReport at 0x113654940>

# Lets get going with AI - 🚀🌕

## First: We add categories to the Findings
This will help us later to improve prompts (hopefully)

In [8]:
vulnerability_report.add_category() # this will add categories to the findings

100%|██████████| 5/5 [00:04<00:00,  1.09it/s]


<data.VulnerabilityReport.VulnerabilityReport at 0x113654940>

## Then we add solutions 🎉
The function below adds long and short solutions to the findings. It also adds keywords for future research.

For this function, we have multiple layers that may catch errors. So if you see a warning but no error afterwards, the waring was caught. Only if you see an error, data wasn't generated correctly.

In [9]:
vulnerability_report.add_solution(long=True, short=True, search_term=True) # this will add solutions to the findings

100%|██████████| 5/5 [00:49<00:00,  9.89s/it]


<data.VulnerabilityReport.VulnerabilityReport at 0x113654940>

# Last but not least: Results
We save and display results in the coming cells.

In [10]:
with open(data_folder / Path(f'VulnerabilityReport_{n if n !=-1 else "all"}_' + my_strategy.get_model_name().replace(":","_") +'.json'), 'w') as f:
    f.write(str(json.dumps(vulnerability_report.to_dict())))

In [11]:
from IPython.display import HTML, Markdown
Markdown(vulnerability_report.to_html(table=True))


<h3>CVE-2019-19449</h3><table><tr><th>Name</th><th>Value</th></tr><tr><td>Title</td><td>kernel: mounting a crafted f2fs filesystem image can lead to slab-out-of-bounds read access in f2fs_build_segment_manager in fs/f2fs/segment.c</td></tr><tr><td>Source</td><td>Trivy</td></tr><tr><td>Description</td><td>In the Linux kernel 5.0.21, mounting a crafted f2fs filesystem image can lead to slab-out-of-bounds read access in f2fs_build_segment_manager in fs/f2fs/segment.c, related to init_min_max_mtime in fs/f2fs/segment.c (because the second argument to get_seg_entry is not validated).</td></tr><tr><td>Location List</td><td>linux-libc-dev:, </td></tr><tr><td>CWE IDs</td><td>CWE-125</td></tr><tr><td>CVE IDs</td><td>CVE-2019-19449</td></tr><tr><td>Severity</td><td>80</td></tr><tr><td>Priority</td><td>80</td></tr><tr><td>Category</td><td>SYSTEM</td></tr></table><h3>Solution</h3><h4>Short Description</h4> <p>Update the Linux kernel to a version that addresses the issue (CVE-2019-19449) or apply the patch provided by the kernel developers.</p><h4>Long Description</h4><p>Comprehensive Step-by-Step Solution for Updating the Linux Kernel</p><h4>Search Terms</h4> <p>kernel f2fs security vulnerability; f2fs filesystem image mount exploit; slab-out-of-bounds read access f2fs_build_segment_manager; fs/f2fs/segment.c init_min_max_mtime; unvalidated get_seg_entry argument; CVE-2019-19449; CWE-125; Linux kernel 5.0.21 update; patch for kernel developers</p><h3>CVE-2023-3772</h3><table><tr><th>Name</th><th>Value</th></tr><tr><td>Title</td><td>NULL pointer dereference in xfrm_update_ae_params()</td></tr><tr><td>Source</td><td>Trivy</td></tr><tr><td>Description</td><td>A flaw was found in the Linux kernel’s IP framework for transforming packets (XFRM subsystem). This issue may allow a malicious user with CAP_NET_ADMIN privileges to directly dereference a NULL pointer in xfrm_update_ae_params(), leading to a possible kernel crash and denial of service.</td></tr><tr><td>Location List</td><td>linux-libc-dev:, </td></tr><tr><td>CWE IDs</td><td>CWE-476</td></tr><tr><td>CVE IDs</td><td>CVE-2023-3772</td></tr><tr><td>Severity</td><td>60</td></tr><tr><td>Priority</td><td>60</td></tr><tr><td>Category</td><td>SYSTEM</td></tr></table><h3>Solution</h3><h4>Short Description</h4> <p>Upgrade to a vulnerable version of the Linux kernel or apply the patch provided by the vendor, as specified in the CVE-2023-3772 advisory.</p><h4>Long Description</h4><p>Comprehensive Solution for Upgrading to a Vulnerable Version of the Linux Kernel or Applying the Patch</p><h4>Search Terms</h4> <p>Linux kernel xfrm_update_ae_params NULL pointer dereference; Trivy vulnerability report; CWE-476; CVE-2023-3772; IP packet transformation security flaw; kernel crash denial of service; CAP_NET_ADMIN privilege escalation; Linux kernel upgrade patch application; kernel vulnerability mitigation strategies; secure network protocols</p><h3>CVE-2023-0216</h3><table><tr><th>Name</th><th>Value</th></tr><tr><td>Title</td><td>An invalid pointer dereference on read can be triggered when an applic ..., [openssl: Invalid pointer dereference in d2i_PKCS7 functions]</td></tr><tr><td>Source</td><td>Trivy</td></tr><tr><td>Description</td><td>An invalid pointer dereference on read can be triggered when an application tries to load malformed PKCS7 data with the d2i_PKCS7(), d2i_PKCS7_bio() or d2i_PKCS7_fp() functions. The result of the dereference is an application crash which could lead to a denial of service attack. The TLS implementation in OpenSSL does not call this function however third party applications might call these functions on untrusted data., Invalid pointer dereference in d2i_PKCS7 functions</td></tr><tr><td>Location List</td><td>libcrypto3:,  & libssl3:, </td></tr><tr><td>CWE IDs</td><td></td></tr><tr><td>CVE IDs</td><td>CVE-2023-0216</td></tr><tr><td>Severity</td><td>60</td></tr><tr><td>Priority</td><td>60</td></tr><tr><td>Category</td><td>CODE</td></tr></table><h3>Solution</h3><h4>Short Description</h4> <p>Upgrade OpenSSL to the latest version that includes the fix for CVE-2023-0216.</p><h4>Long Description</h4><p>Comprehensive Step-by-Step Solution</p><h4>Search Terms</h4> <p>openssl invalid pointer dereference; d2i_PKCS7 functions; PKCS7 data; malformed data; denial of service attack; application crash; untrusted data; third party applications; TLS implementation; CVE-2023-0216</p><h3>CVE-2023-27534</h3><table><tr><th>Name</th><th>Value</th></tr><tr><td>Title</td><td>[SFTP path ~ resolving discrepancy], curl: SFTP path ~ resolving discrepancy</td></tr><tr><td>Source</td><td>Trivy</td></tr><tr><td>Description</td><td>No description is available for this CVE., SFTP path ~ resolving discrepancy</td></tr><tr><td>Location List</td><td>curl:,  & libcurl:, </td></tr><tr><td>CWE IDs</td><td></td></tr><tr><td>CVE IDs</td><td>CVE-2023-27534</td></tr><tr><td>Severity</td><td>30</td></tr><tr><td>Priority</td><td>30</td></tr><tr><td>Category</td><td>PROGRAM</td></tr></table><h3>Solution</h3><h4>Short Description</h4> <p>Update the 'libcurl' library to a version that addresses the CVE-2023-27534 vulnerability.</p><h4>Long Description</h4><p>**Updating libcurl to address CVE-2023-27534 vulnerability**</p><h4>Search Terms</h4> <p>SFTP path resolution; SFTP path discrepancy; curl SFTP path; libcurl update; CVE-2023-27534; security vulnerability; programming library; software update; SFTP protocol; secure file transfer; path traversal; remote code execution; cybersecurity research</p><h3>CVE-2022-48303</h3><table><tr><th>Name</th><th>Value</th></tr><tr><td>Title</td><td>heap buffer overflow at from_header() in list.c via specially crafted checksum</td></tr><tr><td>Source</td><td>Trivy</td></tr><tr><td>Description</td><td>GNU Tar through 1.34 has a one-byte out-of-bounds read that results in use of uninitialized memory for a conditional jump. Exploitation to change the flow of control has not been demonstrated. The issue occurs in from_header in list.c via a V7 archive in which mtime has approximately 11 whitespace characters.</td></tr><tr><td>Location List</td><td>tar:, </td></tr><tr><td>CWE IDs</td><td>CWE-125</td></tr><tr><td>CVE IDs</td><td>CVE-2022-48303</td></tr><tr><td>Severity</td><td>30</td></tr><tr><td>Priority</td><td>30</td></tr><tr><td>Category</td><td>CODE</td></tr></table><h3>Solution</h3><h4>Short Description</h4> <p>Update GNU Tar to version 1.35 or later, which addresses this vulnerability.</p><h4>Long Description</h4><p>Update GNU Tar to version 1.35 or later, which addresses this vulnerability.</p><h4>Search Terms</h4> <p>heap buffer overflow; from_header() in list.c; specially crafted checksum; GNU Tar; CVE-2022-48303; CWE-125; tar vulnerability; out-of-bounds read; uninitialized memory; conditional jump; V7 archive; mtime; whitespace characters</p>

In [12]:
HTML(str(vulnerability_report).replace('\n', '<br />'))
