# Submit for review to ChatGPT4o to review in batch mode

OpenAI API is very limited in terms of API counts and tokens allowance for lower tier users.

But OpenAI supports a batch mode that is cheaper than the API and quick - though it too is limited in terms of batch size - but multiple batches can be used.

In [79]:
import csv
import os
import json
import pandas as pd
import numpy as np
from dotenv import load_dotenv
import os
import time
from openai import OpenAI
import requests
import re
import glob  # For file pattern matching

In [56]:
cwe_only_cisa_adp_input_file_path = 'data_out/extracted_cwe_info_cisa_adp_linux_only.csv'  # Entries with CWEs only
chatgpt_batch_file_path = './data_out/chatgpt_batch.csv'
directory = "./data_out/vulnrichment/"
file_name = "./data_out/vulnrichment/batch_tasks_ALL.jsonl"

In [237]:
# Read the CSV file
try:
    adp_df = pd.read_csv(cwe_only_cisa_adp_input_file_path, quoting=csv.QUOTE_ALL, escapechar='\\') # safe CSV

except FileNotFoundError:
    print("Error: CSV file not found.")
    exit(1)

adp_df

Unnamed: 0,cve_id,Container,adp_providerMetadata_orgId,adp_providerMetadata_shortName,adp_providerMetadata_dateUpdated,CVE_Description,cna_providerMetadata_orgId,cna_providerMetadata_shortName,cna_providerMetadata_dateUpdated,CWE_ID,CWE_Description,description_length
0,CVE-2021-35559,adp,134c704f-9b21-4f2e-91b3-4a467353bcc0,CISA-ADP,2024-06-25T16:05:50.566Z,Vulnerability in the Java SE Oracle GraalVM En...,,,,CWE-400,CWE-400 Uncontrolled Resource Consumption,972
1,CVE-2021-26928,adp,134c704f-9b21-4f2e-91b3-4a467353bcc0,CISA-ADP,2024-05-01T15:18:46.280Z,BIRD through does not provide functionality fo...,,,,CWE-306,CWE-306 Missing Authentication for Critical Fu...,443
2,CVE-2021-26918,adp,134c704f-9b21-4f2e-91b3-4a467353bcc0,CISA-ADP,2024-05-01T15:09:54.735Z,The ProBot bot through for Discord might allow...,,,,CWE-434,CWE-434 Unrestricted Upload of File with Dange...,720
3,CVE-2021-34983,adp,134c704f-9b21-4f2e-91b3-4a467353bcc0,CISA-ADP,2024-05-08T15:08:02.757Z,NETGEAR Multiple Routers httpd Missing Authent...,,,,CWE-120,CWE-120 Buffer Copy without Checking Size of I...,621
4,CVE-2021-33990,adp,134c704f-9b21-4f2e-91b3-4a467353bcc0,CISA-ADP,2024-07-12T15:32:38.330Z,Liferay Portal allows Command=FileUpload&Type=...,,,,CWE-78,CWE-78 Improper Neutralization of Special Elem...,281
...,...,...,...,...,...,...,...,...,...,...,...,...
1879,CVE-2024-37762,adp,134c704f-9b21-4f2e-91b3-4a467353bcc0,CISA-ADP,2024-07-08T15:13:35.601Z,MachForm up to version is affected by an authe...,,,,CWE-434,CWE-434 Unrestricted Upload of File with Dange...,118
1880,CVE-2024-37634,adp,134c704f-9b21-4f2e-91b3-4a467353bcc0,CISA-ADP,2024-06-13T20:21:11.912Z,TOTOLINK was discovered to contain a stack ove...,,,,CWE-121,CWE-121 Stack-based Buffer Overflow,91
1881,CVE-2024-37535,adp,134c704f-9b21-4f2e-91b3-4a467353bcc0,CISA-ADP,2024-06-10T12:55:15.708Z,GNOME VTE before allows an attacker to cause a...,,,,CWE-400,CWE-400 Uncontrolled Resource Consumption,138
1882,CVE-2024-37019,adp,134c704f-9b21-4f2e-91b3-4a467353bcc0,CISA-ADP,2024-06-04T14:49:54.923Z,Northern.tech Mender Enterprise before and bef...,,,,CWE-287,CWE-287 Improper Authentication,73


# Now Ask an LLM to Validate 

1. Assign a CWE based on Description
2. Provide say if it agrees with the assigned CWE


In [59]:
# load .env file to environment
load_dotenv()

config = dotenv_values(".env")

client = OpenAI(api_key=config['OPENAI_API_KEY'])

In [67]:
#https://platform.openai.com/settings/organization/billing/overview
#https://cookbook.openai.com/examples/batch_processing
caption_system_prompt = '''
You are a cybersecurity expert specializing in identifying Common Weakness Enumeration (CWE) IDs from CVE descriptions. Your goal is to analyze if you agree with the assigned CWE ID or not for multiple CVEs.
Analyze the following CVEs and their assigned CWE IDs:
For each CVE, output a JSON object containing the following information:
{
    "Agree": string, // "Yes" or "No"
    "Confidence": float // a confidence score between 0 and 1
    "Rationale": string, // Only if you do not Agree, provide a rationale why not
}
    Respond with a JSON array containing an object for each CVE, in the same order as provided.
'''

#model="gpt-4o"
#model="gpt-4-turbo"
#model="gpt-3.5-turbo-0125",
def get_chatgpt4_cwe_opinion_batch(cve_description, cwe_id):

    # Format all descriptions and CWEs into one single prompt
    response = client.chat.completions.create(
        model="gpt-4o",
        response_format={ "type": "json_object"},
        messages=[
            {
                "role": "system",
                "content": caption_system_prompt
            },
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": cve_description
                    },
                    {
                        "type": "text",
                        "text": cwe_id
                    },
                ],            
            }
        ],
    )
    
    return response.choices[0].message.content


In [64]:
# Testing on a few CVEs
for _, row in adp_df[:5].iterrows():
    response = get_chatgpt4_cwe_opinion_batch(row['CVE_Description'], row['CWE_ID'])
    print(f"RESPONSE: {response}\n\n")

RESPONSE: {
    "Agree": "Yes",
    "Confidence": 0.85,
    "Rationale": ""
}


RESPONSE: {
    "Agree": "Yes",
    "Confidence": 0.9,
    "Rationale": ""
}



RESPONSE: {
    "Agree": "No",
    "Confidence": 0.7,
    "Rationale": "CWE-434 corresponds to Unrestricted Upload of File with Dangerous Type. While the description does mention the allowance of double extensions like .html.jpg with the text/html content type, which could suggest potential unrestricted or improper file validation, the vendor's statement indicates it might not lead to an actual vulnerability. This case seems more complex, potentially implicating issues related to CWE-20 (Improper Input Validation) rather than CWE-434. The risk lies in the misconfiguration rather than a direct vulnerability in unrestricted file type uploads."
}


RESPONSE: {
    "Agree": "No",
    "Confidence": 0.95,
    "Rationale": "The description of the vulnerability indicates it is due to a lack of authentication for a critical function, spe

In [68]:
tasks = []

for index, row in adp_df.iterrows():
    cve_id=row['cve_id']
    cve_description = row['CVE_Description']
    cwe_id = row['CWE_ID']

    
    task = {
        #"custom_id": f"task-{index}",
        "custom_id": f"{cve_id}.{index}",
        "method": "POST",
        "url": "/v1/chat/completions",
        "body": {
            # This is what you would have in your Chat Completions API call
            "model": "gpt-4o",
            "temperature": 0.0,
            "max_tokens": 300,
            "response_format": {"type": "json_object"},
            "messages": [
                {
                    "role": "system",
                    "content": caption_system_prompt
                },
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": cve_description
                        },
                        {
                            "type": "text",
                            "text": cwe_id
                        },
                    ],
                }
            ]            
        }
    }

    tasks.append(task)

In [69]:
tasks

[{'custom_id': 'CVE-2021-35559.0',
  'method': 'POST',
  'url': '/v1/chat/completions',
  'body': {'model': 'gpt-4o',
   'temperature': 0.0,
   'max_tokens': 300,
   'response_format': {'type': 'json_object'},
   'messages': [{'role': 'system',
     'content': '\nYou are a cybersecurity expert specializing in identifying Common Weakness Enumeration (CWE) IDs from CVE descriptions. Your goal is to analyze if you agree with the assigned CWE ID or not for multiple CVEs.\nAnalyze the following CVEs and their assigned CWE IDs:\nFor each CVE, output a JSON object containing the following information:\n{\n    "Agree": string, // "Yes" or "No"\n    "Confidence": float // a confidence score between 0 and 1\n    "Rationale": string, // Only if you do not Agree, provide a rationale why not\n}\n    Respond with a JSON array containing an object for each CVE, in the same order as provided.\n'},
    {'role': 'user',
     'content': [{'type': 'text',
       'text': 'Vulnerability in the Java SE Oracl

In [70]:
type(tasks)

list

In [71]:
len(tasks)

1884

In [73]:
with open(file_name, 'w') as file:
    for obj in tasks:
        file.write(json.dumps(obj) + '\n')

In [74]:
# Function to chunk a list into smaller lists of a specified size
def chunk_list(data_list, chunk_size):
    for i in range(0, len(data_list), chunk_size):
        yield data_list[i:i + chunk_size]

# Write each chunk to a separate file
def write_chunks_to_files(tasks, chunk_size, base_file_name):
    os.makedirs(os.path.dirname(base_file_name), exist_ok=True)
    for i, chunk in enumerate(chunk_list(tasks, chunk_size)):
        file_name = f"{base_file_name}_{i+1}.jsonl"
        with open(file_name, 'w') as file:
            for obj in chunk:
                file.write(json.dumps(obj) + '\n')
        print(f"Written {len(chunk)} items to {file_name}")

# Example usage
chunk_size = 100
base_file_name = "./data_out/vulnrichment/adp_batch_tasks"

write_chunks_to_files(tasks, chunk_size, base_file_name)

Written 100 items to ./data_out/vulnrichment/adp_batch_tasks_1.jsonl
Written 100 items to ./data_out/vulnrichment/adp_batch_tasks_2.jsonl
Written 100 items to ./data_out/vulnrichment/adp_batch_tasks_3.jsonl
Written 100 items to ./data_out/vulnrichment/adp_batch_tasks_4.jsonl
Written 100 items to ./data_out/vulnrichment/adp_batch_tasks_5.jsonl
Written 100 items to ./data_out/vulnrichment/adp_batch_tasks_6.jsonl
Written 100 items to ./data_out/vulnrichment/adp_batch_tasks_7.jsonl
Written 100 items to ./data_out/vulnrichment/adp_batch_tasks_8.jsonl
Written 100 items to ./data_out/vulnrichment/adp_batch_tasks_9.jsonl
Written 100 items to ./data_out/vulnrichment/adp_batch_tasks_10.jsonl
Written 100 items to ./data_out/vulnrichment/adp_batch_tasks_11.jsonl
Written 100 items to ./data_out/vulnrichment/adp_batch_tasks_12.jsonl
Written 100 items to ./data_out/vulnrichment/adp_batch_tasks_13.jsonl
Written 100 items to ./data_out/vulnrichment/adp_batch_tasks_14.jsonl
Written 100 items to ./data_o

# Submit to Batch

The batches can be submitted manually, or automatically.
* Both options are coded below.

I did try submit multiple batches in parallel but the serial approach is simpler and more robust.

### Manual file by file

Change file_name and run the batch manually

In [216]:
# Generate result file name
#file_name = '../data_out/vulnrichment/adp_batch_tasks_15.jsonl'
file_name = "./data_out/vulnrichment/adp_batch_tasks_19.jsonl"
base_name, ext = os.path.splitext(file_name)
result_file_name = f"{base_name}_result.jsonl"

csv_result_file_name = f"{base_name}_result.csv"



# Uploading the file 

batch_file = client.files.create(
  file=open(file_name, "rb"),
  purpose="batch"
)

# Creating the job

batch_job = client.batches.create(
  input_file_id=batch_file.id,
  endpoint="/v1/chat/completions",
  completion_window="24h"
)

# To cancel a the job
#client.batches.cancel(batch_job.id)

result_file_name

'./data_out/vulnrichment/adp_batch_tasks_19_result.jsonl'

In [218]:
batch_job = client.batches.retrieve(batch_job.id)
print(batch_job)

Batch(id='batch_JMP8sDpj0jAzoWO0N7MOrk1F', completion_window='24h', created_at=1721597354, endpoint='/v1/chat/completions', input_file_id='file-BTyn7LbWQrtzN8TEdpqHhoX8', object='batch', status='completed', cancelled_at=None, cancelling_at=None, completed_at=1721597367, error_file_id=None, errors=None, expired_at=None, expires_at=1721683754, failed_at=None, finalizing_at=1721597363, in_progress_at=1721597354, metadata=None, output_file_id='file-cHrWOkHKf81cAJoyE5JAR5lh', request_counts=BatchRequestCounts(completed=84, failed=0, total=84))


In [219]:
result_file_id = batch_job.output_file_id
results = client.files.content(result_file_id).content

with open(result_file_name, 'wb') as file:
    file.write(results)

## Automatically Submit Batch and Wait

In [80]:

def process_file(file_path):
    base_name, ext = os.path.splitext(file_path)
    result_file_name = f"{base_name}_result{ext}"
    csv_result_file_name = f"{base_name}_result.csv"

    print(f"Processing file: {file_path}")
    print(f"Result file will be: {result_file_name}")
    print(f"CSV result file will be: {csv_result_file_name}")

    try:
        # Uploading the file
        with open(file_path, "rb") as file:
            batch_file = client.files.create(
                file=file,
                purpose="batch"
            )

        # Creating the job
        batch_job = client.batches.create(
            input_file_id=batch_file.id,
            endpoint="/v1/chat/completions",
            completion_window="24h"
        )

        print(f"Batch job created for {file_path}: {batch_job.id}")
        return batch_job.id, result_file_name

    except Exception as e:
        print(f"Error processing {file_path}: {str(e)}")
        return None, None

def wait_for_job_completion(batch_job_id):
    while True:
        status = client.batches.retrieve(batch_job_id).status
        if status == "completed":
            return True
        elif status in ["failed", "cancelled"]:
            print(f"Job {batch_job_id} {status}.")
            return False
        print(f"Waiting for job {batch_job_id} to complete. Current status: {status}")
        time.sleep(60)  # Wait for 60 seconds before checking again

def retrieve_results(batch_job_id, result_file_name):
    try:
        # Retrieving result file
        result_file_id = client.batches.retrieve(batch_job_id).output_file_id
        results = client.files.content(result_file_id).content

        with open(result_file_name, 'wb') as file:
            file.write(results)

        print(f"Results saved to {result_file_name}")

    except Exception as e:
        print(f"Error retrieving results for job {batch_job_id}: {str(e)}")

In [None]:

    
try:
    for filename in os.listdir(directory):
        if filename.endswith(".jsonl"):
            file_path = os.path.join(directory, filename)
            
            # Process file and create batch job
            job_id, result_file = process_file(file_path)
            
            if job_id and result_file:
                # Wait for job completion
                if wait_for_job_completion(job_id):
                    # Retrieve and save results
                    retrieve_results(job_id, result_file)
                else:
                    print(f"Skipping result retrieval for failed job {job_id}")
            
            print(f"Completed processing {file_path}\n")

except Exception as e:
    print(f"Error accessing directory {directory}: {str(e)}")


## Process Batch Files

In [233]:


# Define the data directory and output file
data_directory = "./data_out/vulnrichment/"
csv_result_file_name = "./data_out/vulnrichment/aggregated_results.csv"

# Initialize the overall list of results (for all files)
all_data = []

#for result_file_name in glob.glob(data_directory + "adp_batch_tasks_*_result.jsonl"):
for i in range(1, 20):  # Range 1 to 19 inclusive
    result_file_name = data_directory + f"adp_batch_tasks_{i}_result.jsonl"
    
    results = []
    with open(result_file_name, 'r') as file:
        for line in file:
            json_object = json.loads(line.strip())
            results.append(json_object)

    for res in results:
        task_id = res['custom_id']
        cve_id = task_id.split('.')[0]
        content = res['response']['body']['choices'][0]['message']['content']

        # Extract JSON part from the string
        try:
            content_cleaned = re.search(r'\{.*\}', content, re.DOTALL).group()
        except AttributeError:
            print(f"Error extracting JSON for CVE ID {cve_id}: {content}")
            continue
        
        # Ensure JSON parsing
        try:
            result = json.loads(content_cleaned)
        except json.JSONDecodeError as e:
            print(f"Error decoding JSON for CVE ID {cve_id}:")
            print(f"Raw JSON: {content}")
            print(f"Error details: {e}")
            continue

        agree = result.get('Agree')
        rationale = result.get('Rationale', '')
        confidence_score = result.get('Confidence')

        all_data.append({
            'cve_id': cve_id,
            'Agree': agree,
            'Rationale': rationale,
            'Confidence': confidence_score
        })

# Create a DataFrame from the aggregated data (after all files are processed)
merged_df = pd.DataFrame(all_data)

#merged_df['Agree'] = merged_df['Agree'].apply(lambda x: 'Yes' if x == "['Yes']" else ('No' if x == "['No']" else x))

# Save to CSV once
merged_df.to_csv(csv_result_file_name, index=False)

print("Results saved to", csv_result_file_name)

merged_df

Results saved to ./data_out/vulnrichment/aggregated_results.csv


Unnamed: 0,cve_id,Agree,Rationale,Confidence
0,CVE-2021-35559,Yes,,0.90
1,CVE-2021-26928,Yes,,0.90
2,CVE-2021-26918,Yes,,0.90
3,CVE-2021-34983,No,CWE-120 refers to 'Buffer Copy without Checkin...,0.90
4,CVE-2021-33990,No,CWE-78 refers to Improper Neutralization of Sp...,0.30
...,...,...,...,...
1879,CVE-2024-37762,Yes,,0.95
1880,CVE-2024-37634,Yes,,0.95
1881,CVE-2024-37535,Yes,,0.90
1882,CVE-2024-37019,No,The description 'Weak Authentication' is quite...,0.80


In [244]:
merged_df.Confidence.value_counts()

0.90    758
0.95    534
0.80    248
0.70    154
1.00    105
0.30     47
0.20     37
0.10      1
Name: Confidence, dtype: int64

In [234]:
cve_ids_not_in_adp_df = adp_df[~adp_df['cve_id'].isin(merged_df['cve_id'])]
cve_ids_not_in_adp_df

Unnamed: 0,cve_id,Container,adp_providerMetadata_orgId,adp_providerMetadata_shortName,adp_providerMetadata_dateUpdated,CVE_Description,cna_providerMetadata_orgId,cna_providerMetadata_shortName,cna_providerMetadata_dateUpdated,CWE_ID,CWE_Description,description_length


In [235]:
merged_df.Agree.value_counts()

Yes    1178
No      706
Name: Agree, dtype: int64

In [239]:
adp_df = adp_df.drop(columns=['cve_id'])
adp_df

Unnamed: 0,Container,adp_providerMetadata_orgId,adp_providerMetadata_shortName,adp_providerMetadata_dateUpdated,CVE_Description,cna_providerMetadata_orgId,cna_providerMetadata_shortName,cna_providerMetadata_dateUpdated,CWE_ID,CWE_Description,description_length
0,adp,134c704f-9b21-4f2e-91b3-4a467353bcc0,CISA-ADP,2024-06-25T16:05:50.566Z,Vulnerability in the Java SE Oracle GraalVM En...,,,,CWE-400,CWE-400 Uncontrolled Resource Consumption,972
1,adp,134c704f-9b21-4f2e-91b3-4a467353bcc0,CISA-ADP,2024-05-01T15:18:46.280Z,BIRD through does not provide functionality fo...,,,,CWE-306,CWE-306 Missing Authentication for Critical Fu...,443
2,adp,134c704f-9b21-4f2e-91b3-4a467353bcc0,CISA-ADP,2024-05-01T15:09:54.735Z,The ProBot bot through for Discord might allow...,,,,CWE-434,CWE-434 Unrestricted Upload of File with Dange...,720
3,adp,134c704f-9b21-4f2e-91b3-4a467353bcc0,CISA-ADP,2024-05-08T15:08:02.757Z,NETGEAR Multiple Routers httpd Missing Authent...,,,,CWE-120,CWE-120 Buffer Copy without Checking Size of I...,621
4,adp,134c704f-9b21-4f2e-91b3-4a467353bcc0,CISA-ADP,2024-07-12T15:32:38.330Z,Liferay Portal allows Command=FileUpload&Type=...,,,,CWE-78,CWE-78 Improper Neutralization of Special Elem...,281
...,...,...,...,...,...,...,...,...,...,...,...
1879,adp,134c704f-9b21-4f2e-91b3-4a467353bcc0,CISA-ADP,2024-07-08T15:13:35.601Z,MachForm up to version is affected by an authe...,,,,CWE-434,CWE-434 Unrestricted Upload of File with Dange...,118
1880,adp,134c704f-9b21-4f2e-91b3-4a467353bcc0,CISA-ADP,2024-06-13T20:21:11.912Z,TOTOLINK was discovered to contain a stack ove...,,,,CWE-121,CWE-121 Stack-based Buffer Overflow,91
1881,adp,134c704f-9b21-4f2e-91b3-4a467353bcc0,CISA-ADP,2024-06-10T12:55:15.708Z,GNOME VTE before allows an attacker to cause a...,,,,CWE-400,CWE-400 Uncontrolled Resource Consumption,138
1882,adp,134c704f-9b21-4f2e-91b3-4a467353bcc0,CISA-ADP,2024-06-04T14:49:54.923Z,Northern.tech Mender Enterprise before and bef...,,,,CWE-287,CWE-287 Improper Authentication,73


In [240]:
new_df = pd.merge(merged_df, adp_df, left_index=True, right_index=True, how='outer')
new_df

Unnamed: 0,cve_id,Agree,Rationale,Confidence,Container,adp_providerMetadata_orgId,adp_providerMetadata_shortName,adp_providerMetadata_dateUpdated,CVE_Description,cna_providerMetadata_orgId,cna_providerMetadata_shortName,cna_providerMetadata_dateUpdated,CWE_ID,CWE_Description,description_length
0,CVE-2021-35559,Yes,,0.90,adp,134c704f-9b21-4f2e-91b3-4a467353bcc0,CISA-ADP,2024-06-25T16:05:50.566Z,Vulnerability in the Java SE Oracle GraalVM En...,,,,CWE-400,CWE-400 Uncontrolled Resource Consumption,972
1,CVE-2021-26928,Yes,,0.90,adp,134c704f-9b21-4f2e-91b3-4a467353bcc0,CISA-ADP,2024-05-01T15:18:46.280Z,BIRD through does not provide functionality fo...,,,,CWE-306,CWE-306 Missing Authentication for Critical Fu...,443
2,CVE-2021-26918,Yes,,0.90,adp,134c704f-9b21-4f2e-91b3-4a467353bcc0,CISA-ADP,2024-05-01T15:09:54.735Z,The ProBot bot through for Discord might allow...,,,,CWE-434,CWE-434 Unrestricted Upload of File with Dange...,720
3,CVE-2021-34983,No,CWE-120 refers to 'Buffer Copy without Checkin...,0.90,adp,134c704f-9b21-4f2e-91b3-4a467353bcc0,CISA-ADP,2024-05-08T15:08:02.757Z,NETGEAR Multiple Routers httpd Missing Authent...,,,,CWE-120,CWE-120 Buffer Copy without Checking Size of I...,621
4,CVE-2021-33990,No,CWE-78 refers to Improper Neutralization of Sp...,0.30,adp,134c704f-9b21-4f2e-91b3-4a467353bcc0,CISA-ADP,2024-07-12T15:32:38.330Z,Liferay Portal allows Command=FileUpload&Type=...,,,,CWE-78,CWE-78 Improper Neutralization of Special Elem...,281
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1879,CVE-2024-37762,Yes,,0.95,adp,134c704f-9b21-4f2e-91b3-4a467353bcc0,CISA-ADP,2024-07-08T15:13:35.601Z,MachForm up to version is affected by an authe...,,,,CWE-434,CWE-434 Unrestricted Upload of File with Dange...,118
1880,CVE-2024-37634,Yes,,0.95,adp,134c704f-9b21-4f2e-91b3-4a467353bcc0,CISA-ADP,2024-06-13T20:21:11.912Z,TOTOLINK was discovered to contain a stack ove...,,,,CWE-121,CWE-121 Stack-based Buffer Overflow,91
1881,CVE-2024-37535,Yes,,0.90,adp,134c704f-9b21-4f2e-91b3-4a467353bcc0,CISA-ADP,2024-06-10T12:55:15.708Z,GNOME VTE before allows an attacker to cause a...,,,,CWE-400,CWE-400 Uncontrolled Resource Consumption,138
1882,CVE-2024-37019,No,The description 'Weak Authentication' is quite...,0.80,adp,134c704f-9b21-4f2e-91b3-4a467353bcc0,CISA-ADP,2024-06-04T14:49:54.923Z,Northern.tech Mender Enterprise before and bef...,,,,CWE-287,CWE-287 Improper Authentication,73


In [241]:
new_df = new_df.rename(columns={'Agree': 'gpt-4o_Agree', 'Rationale': 'gpt-4o_Rationale', 'Confidence': 'gpt-4o_Confidence',})
new_df


Unnamed: 0,cve_id,gpt-4o_Agree,gpt-4o_Rationale,gpt-4o_Confidence,Container,adp_providerMetadata_orgId,adp_providerMetadata_shortName,adp_providerMetadata_dateUpdated,CVE_Description,cna_providerMetadata_orgId,cna_providerMetadata_shortName,cna_providerMetadata_dateUpdated,CWE_ID,CWE_Description,description_length
0,CVE-2021-35559,Yes,,0.90,adp,134c704f-9b21-4f2e-91b3-4a467353bcc0,CISA-ADP,2024-06-25T16:05:50.566Z,Vulnerability in the Java SE Oracle GraalVM En...,,,,CWE-400,CWE-400 Uncontrolled Resource Consumption,972
1,CVE-2021-26928,Yes,,0.90,adp,134c704f-9b21-4f2e-91b3-4a467353bcc0,CISA-ADP,2024-05-01T15:18:46.280Z,BIRD through does not provide functionality fo...,,,,CWE-306,CWE-306 Missing Authentication for Critical Fu...,443
2,CVE-2021-26918,Yes,,0.90,adp,134c704f-9b21-4f2e-91b3-4a467353bcc0,CISA-ADP,2024-05-01T15:09:54.735Z,The ProBot bot through for Discord might allow...,,,,CWE-434,CWE-434 Unrestricted Upload of File with Dange...,720
3,CVE-2021-34983,No,CWE-120 refers to 'Buffer Copy without Checkin...,0.90,adp,134c704f-9b21-4f2e-91b3-4a467353bcc0,CISA-ADP,2024-05-08T15:08:02.757Z,NETGEAR Multiple Routers httpd Missing Authent...,,,,CWE-120,CWE-120 Buffer Copy without Checking Size of I...,621
4,CVE-2021-33990,No,CWE-78 refers to Improper Neutralization of Sp...,0.30,adp,134c704f-9b21-4f2e-91b3-4a467353bcc0,CISA-ADP,2024-07-12T15:32:38.330Z,Liferay Portal allows Command=FileUpload&Type=...,,,,CWE-78,CWE-78 Improper Neutralization of Special Elem...,281
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1879,CVE-2024-37762,Yes,,0.95,adp,134c704f-9b21-4f2e-91b3-4a467353bcc0,CISA-ADP,2024-07-08T15:13:35.601Z,MachForm up to version is affected by an authe...,,,,CWE-434,CWE-434 Unrestricted Upload of File with Dange...,118
1880,CVE-2024-37634,Yes,,0.95,adp,134c704f-9b21-4f2e-91b3-4a467353bcc0,CISA-ADP,2024-06-13T20:21:11.912Z,TOTOLINK was discovered to contain a stack ove...,,,,CWE-121,CWE-121 Stack-based Buffer Overflow,91
1881,CVE-2024-37535,Yes,,0.90,adp,134c704f-9b21-4f2e-91b3-4a467353bcc0,CISA-ADP,2024-06-10T12:55:15.708Z,GNOME VTE before allows an attacker to cause a...,,,,CWE-400,CWE-400 Uncontrolled Resource Consumption,138
1882,CVE-2024-37019,No,The description 'Weak Authentication' is quite...,0.80,adp,134c704f-9b21-4f2e-91b3-4a467353bcc0,CISA-ADP,2024-06-04T14:49:54.923Z,Northern.tech Mender Enterprise before and bef...,,,,CWE-287,CWE-287 Improper Authentication,73


In [247]:
new_df.to_csv(chatgpt_batch_file_path, index=False, quoting=csv.QUOTE_ALL, escapechar='\\') # safe CSV)