# **Evaluation of CTI-Bench dataset**

In [None]:
!pip install google-generativeai



In [None]:
import google.generativeai as genai
from google.colab import userdata
gemini_key = userdata.get('key1')
genai.configure(api_key=gemini_key)

In [None]:
# configuration du modèle
generation_config = {
  "temperature": 0.9,
  "top_p": 1,
  "top_k": 5,
  "max_output_tokens": 170,
}

In [None]:
# appel au modèle generative avec la configuration déjà fixée
model = genai.GenerativeModel(model_name= "gemini-1.0-pro",
                              generation_config = generation_config)

In [None]:
!pip install pandas



In [None]:
import pandas as pd
def read_prompts(file):
    try:
        df = pd.read_csv(file, sep='\t', encoding='utf-8')
    except UnicodeDecodeError:
        df = pd.read_csv(file, sep='\t', encoding='ISO-8859-1')
    prompts = []
    for index, row in df.iterrows():
        prompt = f"""
        You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. /
        Your task is to choose the best option among the four provided. /
        Return your answer as a single uppercase letter: A, B, C, or D. /
        **Question:** {row['Question']} /
        **Options:** A) {row['Option A']} B) {row['Option B']} C) {row['Option C']} D) {row['Option D']} /
        **Important:** The last line of your answer should contain only the single letter corresponding to the best option, with no additional text. /
        """
        prompts.append(prompt)
    return df, prompts

In [None]:
import time
import requests
from http.client import RemoteDisconnected
from google.api_core.exceptions import TooManyRequests, InternalServerError
import pandas as pd
import os
def execute_prompts(df, prompts):
    results = []
    correct_answers = df['GT'].tolist()
    total_correct = 0
    max_retries = 2500  # Maximum number of retries for connection errors

    for i, prompt in enumerate(prompts):
        retries = 0
        while retries < max_retries:
            try:
                response = model.generate_content(prompt)
                print(response.text)
                if not hasattr(response, 'text'):
                    raise ValueError(f"Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. Please check the `candidate.safety_ratings` to determine if the response was blocked. Prompt number: {i+1}")
                result = response.text.strip()
                results.append(result)
                if result == correct_answers[i]:
                    total_correct += 1
                # Add a delay between requests to avoid hitting the rate limit
                time.sleep(2)  # Adjust the sleep duration as needed
                break  # Exit the retry loop if successful
            except TooManyRequests:
                print(f"Too many requests. Retrying prompt {i+1} after a delay...")
                time.sleep(60)  # Wait for 60 seconds before retrying
            except (ConnectionError, RemoteDisconnected) as e:
                print(f"Connection error: {e}. Retrying prompt {i+1}...")
                retries += 1
                time.sleep(2)  # Adjust the sleep duration as needed
            except ValueError as e:
                print(f"ValueError: {e}. Skipping prompt number {i+1}.")
                results.append("ERROR")
                break  # Skip this prompt
            except InternalServerError as e:
                print(f"InternalServerError: {e}. Skipping prompt number {i+1}.")
                results.append("ERROR")
                break  # Skip this prompt

        if retries == max_retries:
            print(f"Failed to process prompt {i+1} after {max_retries} retries.")
            results.append("ERROR")

    df['Gemini Response'] = results

    # Calculate the total correct answers
    accuracy = total_correct / len(correct_answers)
    output_file = "/content/gemini_mcq.tsv"

    if os.path.exists(output_file) and os.path.getsize(output_file) > 0:
        try:
            existing_df = pd.read_csv(output_file, sep='\t', encoding='utf-8')
        except UnicodeDecodeError:
            existing_df = pd.read_csv(output_file, sep='\t', encoding='latin1')
        combined_df = pd.concat([existing_df, df], ignore_index=True)
    else:
        combined_df = df

    # Save the updated file with Gemini's responses
    combined_df.to_csv(output_file, sep='\t', index=False)

    # Print the total correct answers
    print(f'Total correct responses by Gemini: {total_correct}/{len(correct_answers)}')
    # Print the final accuracy
    print(f'Final accuracy: {accuracy:.2%}')



In [None]:
df, prompts = read_prompts("/content/mcq1.tsv")
execute_prompts(df, prompts)


Total correct responses by Gemini: 115/199
Final accuracy: 56.78%

In [None]:
prompt77= f"""You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. \
Your task is to choose the best option among the four provided. \
Return your answer as a single uppercase letter: A, B, C, or D. \
**Question:** Which of the following groups used spearphishing emails to lure targets into downloading a Cobalt Strike beacon? (MITRE ATT&CK T1204.001: User Execution: Malicious Link)  \
**Options:** A) APT3 B) APT32 C) APT33 D) APT28  \
**Important:** The last line of your answer should contain only the single letter corresponding to the best option, with no additional text. """
response = model.generate_content(prompt77)
print(response.text)

B


In [None]:
prompt101= f"""You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. \
Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  \
**Question:** Which malware from the provided examples can access a connected webcam and capture pictures?  \
**Options:** A) InvisiMole B) SDBbot C) Derusbi D) Pupy  \
**Important:** The last line of your answer should contain only the single letter corresponding to the best option, with no additional text.  """
response = model.generate_content(prompt101)
print(response.text)

B


In [None]:
prompt160= f"""You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. \
Your task is to choose the best option among the four provided. \
Return your answer as a single uppercase letter: A, B, C, or D.  \
**Question:** Which group used a virus that propagates by infecting executables stored on shared drives according to the provided document?  \
**Options:** A) Darkhotel B) Miner-C C) Conti D) H1N1  \
**Important:** The last line of your answer should contain only the single letter corresponding to the best option, with no additional text. """
response = model.generate_content(prompt160)
print(response.text)

B


In [None]:
df, prompts = read_prompts("/content/mcq2.tsv")
execute_prompts(df, prompts)



Too many requests. Retrying prompt 17 after a delay...




Too many requests. Retrying prompt 35 after a delay...
ValueError: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. Please check the `candidate.safety_ratings` to determine if the response was blocked.. Skipping prompt number 42.




Too many requests. Retrying prompt 53 after a delay...




Too many requests. Retrying prompt 72 after a delay...




Too many requests. Retrying prompt 91 after a delay...




Too many requests. Retrying prompt 110 after a delay...
ValueError: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. Please check the `candidate.safety_ratings` to determine if the response was blocked.. Skipping prompt number 121.




Too many requests. Retrying prompt 128 after a delay...




Too many requests. Retrying prompt 146 after a delay...




Too many requests. Retrying prompt 163 after a delay...




Too many requests. Retrying prompt 180 after a delay...




Too many requests. Retrying prompt 225 after a delay...




Too many requests. Retrying prompt 242 after a delay...




Too many requests. Retrying prompt 260 after a delay...




Too many requests. Retrying prompt 278 after a delay...




Too many requests. Retrying prompt 296 after a delay...
Total correct responses by Gemini: 170/300
Final accuracy: 56.67%



Total correct responses by Gemini: 170/300
Final accuracy: 56.67%

In [None]:
prompt242 = f""" You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. \
Your task is to choose the best option among the four provided. \
Return your answer as a single uppercase letter: A, B, C, or D.  \
**Question:** Which of the following malware examples triggers on a magic packet in TCP or UDP packets?  \
**Options:** A) BUSHWALK B) Ryuk C) SYNful Knock D) Penquin  \
**Important:** The last line of your answer should contain only the single letter corresponding to the best option, with no additional text."""
response = model.generate_content(prompt242)
print(response.text)

A


In [None]:
prompt321 = f""" You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. \
Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  \
**Question:** In the context of MITRE ATT&CK, which group is known for injecting malicious code into legitimate, signed files in production environments? (Platform: Enterprise)  \
**Options:** A) Threat Group-3390 B) Dragonfly C) APT41 D) FIN7  \
**Important:** The last line of your answer should contain only the single letter corresponding to the best option, with no additional text. """
response = model.generate_content(prompt321)
print(response.text)

A


 total des réponses correctes est 285/499 et Final accuracy ; 57.11%.

In [None]:
df, prompts = read_prompts("/content/mcq3.tsv")
execute_prompts(df, prompts)

C
B
A
C
B
B
C
A
C
B
C
C
A
B
C
A




Too many requests. Retrying prompt 17 after a delay...
B
B
B
C
B
B
A
A
A
C
A
B
A
A
B
A
B
D
B
A
D
A
A
B
A
A
A
D
C
A
A
A
B




Too many requests. Retrying prompt 50 after a delay...
C
D
B
D
C
A
D
B
C
B
B
A
C
B
A
C
A




Too many requests. Retrying prompt 67 after a delay...
A
A
A
B
A
A
A
B
A
B
B
A
C
A
A
C
B




Too many requests. Retrying prompt 84 after a delay...
A
B
B
C
C
A
C
A
D
C
D
C
C
C
C
C
A




Too many requests. Retrying prompt 101 after a delay...
A
B
B
A
A
A
B
C
C
A
B
B
B
B
D
B




Too many requests. Retrying prompt 117 after a delay...
B
B
A
D
B
B
A
B
B
A
D
A
A
B
B
C




Too many requests. Retrying prompt 133 after a delay...
A
B
B
A
C
B
B
C
A
B
A
B
A
A
C
C
A




Too many requests. Retrying prompt 150 after a delay...
C
A
B
A
B
B
C
A
D
A
A
D
A
B
A
D
A




Too many requests. Retrying prompt 167 after a delay...
A
A
C
C
B
C
C
C
D
B
C
B
C
A
C
A
A
C




Too many requests. Retrying prompt 185 after a delay...
A
B
A
B
A
A
B
D
B
B
B
B
B
B
A
Total correct responses by Gemini: 109/199
Final accuracy: 54.77%


In [None]:
df, prompts = read_prompts("/content/mcq3.tsv")
execute_prompts(df, prompts)

A
A
A
B
A
C
A
B
A
B
D
B
A
D
A
A
C
C
B
B
B
B
D
B
A
C




Too many requests. Retrying prompt 27 after a delay...
C
A
C
B
C
A
B
C
D
A
C
C
A
B
B
D
C
A




Too many requests. Retrying prompt 45 after a delay...
B
C
B
B
B
C
A
B
A
B
A
A
B
A
B
A
A




Too many requests. Retrying prompt 62 after a delay...
C
A
C
B
A
B
A
B
C
C
C
C
B
B
B
B
B




Too many requests. Retrying prompt 79 after a delay...
B
D
D
A
B
A
D
B
C
C
A
A
A
C
C
D
D
B




Too many requests. Retrying prompt 97 after a delay...
B
D
A
A
C
A
A
A
C
D
A
ValueError: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. Please check the `candidate.safety_ratings` to determine if the response was blocked.. Skipping prompt number 108.
B
B
B
A
C




Too many requests. Retrying prompt 114 after a delay...
B
A
C
B
A
D
B
A
D
B
D
C
B
A
A
C
B
A




Too many requests. Retrying prompt 132 after a delay...
A
A
C
C
A
A
A
A
B
A
B
A
A
A
B
A
C




Too many requests. Retrying prompt 149 after a delay...
C
B
A
A
B
C
C
B
C
B
C
A
B
C
B
C




Too many requests. Retrying prompt 165 after a delay...
D
B
A
C
A
B
A
A
B
A
A
B
A
C
C
B
D
C




Too many requests. Retrying prompt 183 after a delay...
A
D
B
B
B
D
A
B
A
C
A
D
C
A
B
C




Too many requests. Retrying prompt 199 after a delay...
A
C
B
A
C
A
C
D
A
D
C
B
C
A
A
A
C
B




Too many requests. Retrying prompt 217 after a delay...
B
B
A
C
B
C
D
A
B
C
B
B
B
D
B
A
C




Too many requests. Retrying prompt 234 after a delay...
C
C
D
B
A
B
B
A
A
B
B
A
C
B
C
B
A




Too many requests. Retrying prompt 251 after a delay...
A
Total correct responses by Gemini: 141/251
Final accuracy: 56.18%


Total correct responses by Gemini: 141/251
Final accuracy: 56.18%

In [None]:
df, prompts = read_prompts("/content/mcq3.tsv")
execute_prompts(df, prompts)

A
B
A
C
B
B
B
B
B
A
B
C
C
C
A




Too many requests. Retrying prompt 16 after a delay...
A
C
B
B
B
C
C
B
A
D
B
B
B
C
D
A
A
A
A




Too many requests. Retrying prompt 35 after a delay...
B
B
C
C
B
B
A
D
D
D
C
A
D
B
A
B
C
A




Too many requests. Retrying prompt 53 after a delay...
B
A
C
B
A
C
C
A
C
B
C
B
B
A
B
A
D
C




Too many requests. Retrying prompt 71 after a delay...
D
B
B
B
A
D
A
B
C
A
B
A
A
C
D
C
B
A




Too many requests. Retrying prompt 89 after a delay...
A
B
C
A
A
B
A
A
D
A
A
A
B
C
C
B
C




Too many requests. Retrying prompt 106 after a delay...
A
A
B
B
D
B
A
A
C
B
D
B
B
C
C
B
A
B




Too many requests. Retrying prompt 124 after a delay...
A
B
ValueError: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. Please check the `candidate.safety_ratings` to determine if the response was blocked.. Skipping prompt number 126.
A
B
A
B
D
C
B
A
A
C
C
B
B
A




Too many requests. Retrying prompt 141 after a delay...
C
B
A
A
D
B
B
C
B
C
A
C
B
A
D
A
A
C
C
A




Too many requests. Retrying prompt 161 after a delay...
B
D
B
B
D
D
C
B
B
B
D
C
B
D
C
B
A
C




Too many requests. Retrying prompt 179 after a delay...
A
D
C
A
A
D
A
B
A
C
B
C
C
C
C
A
D
B




Too many requests. Retrying prompt 197 after a delay...
A
A
A
C
B
A
D
B
B
ValueError: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. Please check the `candidate.safety_ratings` to determine if the response was blocked.. Skipping prompt number 206.
C
A
B
D
B
B




Too many requests. Retrying prompt 213 after a delay...
A
A
B
D
A
C
A
B
C
ValueError: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. Please check the `candidate.safety_ratings` to determine if the response was blocked.. Skipping prompt number 222.
A
B
D
D
D
C
B
A
C




Too many requests. Retrying prompt 232 after a delay...
A
B
A
C
D
B
C
A
D
B
B
B
A
A
A
C
B
B




Too many requests. Retrying prompt 250 after a delay...
A
Total correct responses by Gemini: 141/250
Final accuracy: 56.40%


Total correct responses by Gemini: 141/250
Final accuracy: 56.40%

In [None]:
df, prompts = read_prompts("/content/mcq3.tsv")
execute_prompts(df, prompts)

D
C
A
D
D
B
A
C
A
C
ValueError: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. Please check the `candidate.safety_ratings` to determine if the response was blocked.. Skipping prompt number 11.
A
ValueError: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. Please check the `candidate.safety_ratings` to determine if the response was blocked.. Skipping prompt number 13.
B
B




Too many requests. Retrying prompt 16 after a delay...
A
D
A
B
A
C
A
A
B
A
B
A
A
B
C
A
B
A
C
C
A
B




Too many requests. Retrying prompt 38 after a delay...
A
C
C
C
C
A
B
B
A
D
C
B
A
B
A
C




Too many requests. Retrying prompt 54 after a delay...
C
D
C
ValueError: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. Please check the `candidate.safety_ratings` to determine if the response was blocked.. Skipping prompt number 57.
A
C
A
B
B
A
A
D
A
A
D
A
B




Too many requests. Retrying prompt 71 after a delay...
A
A
C
B
B
A
A
B
D
B
A
C
D
B
ValueError: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. Please check the `candidate.safety_ratings` to determine if the response was blocked.. Skipping prompt number 85.
A
B
A




Too many requests. Retrying prompt 89 after a delay...
C
B
B
A
C
A
D
A
A
A
A
B
B
C
B
A
B




Too many requests. Retrying prompt 106 after a delay...
A
B
B
A
A
B
B
B
B
B
A
B
C
B
A
D
B
C




Too many requests. Retrying prompt 124 after a delay...
A
A
D
B
B
B
C
A
A
A
A
A
B
A
C
A




Too many requests. Retrying prompt 140 after a delay...
C
C
D
B
A
A
A
C
A
A
C
Total correct responses by Gemini: 66/150
Final accuracy: 44.00%


Total correct responses by Gemini: 66/150
Final accuracy: 44.00%

In [None]:
df, prompts = read_prompts("/content/mcq3.tsv")
execute_prompts(df, prompts)

C
C
D
B
C
B
C
C
B
B
B
A
D
B
ValueError: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. Please check the `candidate.safety_ratings` to determine if the response was blocked.. Skipping prompt number 15.
A




Too many requests. Retrying prompt 17 after a delay...
A
D
B
B
D
B
B
A
B
C
B
D
C
A
D
C
B
A




Too many requests. Retrying prompt 35 after a delay...
C
B
A
C
A
A
A
B
B
C
A
B
A
C
A
C
B




Too many requests. Retrying prompt 52 after a delay...
C
B
A
A
ValueError: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. Please check the `candidate.safety_ratings` to determine if the response was blocked.. Skipping prompt number 56.
B
A
A
D
B
B
C
A
B
A
C
D
A




Too many requests. Retrying prompt 70 after a delay...
A
A
B
C
B
C
C
B
A
B
C
B
A
B
A
C
C
B




Too many requests. Retrying prompt 88 after a delay...
B
ValueError: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. Please check the `candidate.safety_ratings` to determine if the response was blocked.. Skipping prompt number 89.
C
A
A
A
A
A
A
D
A
A
B
A
A
C
A
A




Too many requests. Retrying prompt 106 after a delay...
A
A
A
A
A
A
C
B
B
A
A
A
A
C
C
B
B
B




Too many requests. Retrying prompt 124 after a delay...
A
A
B
B
D
C
B
D
C
C
A
C
A
C
A
C




Too many requests. Retrying prompt 140 after a delay...
C
B
C
D
B
C
C
A
D
D
B
A
C
A
C
C
C
B




Too many requests. Retrying prompt 158 after a delay...
A
A
A
ValueError: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. Please check the `candidate.safety_ratings` to determine if the response was blocked.. Skipping prompt number 161.
ValueError: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. Please check the `candidate.safety_ratings` to determine if the response was blocked.. Skipping prompt number 162.
A
B
D
A
A
A
B
A
B
A
B
D
B




Too many requests. Retrying prompt 176 after a delay...
A
C
A
C
A
B
B
C
D
A
B
B
A
A
B
C
B
A




Too many requests. Retrying prompt 194 after a delay...
D
A
A
B
B
D
C
B
B
B
B
D
C
A
B
B
A
C




Too many requests. Retrying prompt 212 after a delay...
A
A
A
D
A
A
C
A
A
A
B
C
A
B
A
A
A
A
A




Too many requests. Retrying prompt 231 after a delay...
B
B
B
B
B
B
C
A
C
C
C
B
D
B
C
B




Too many requests. Retrying prompt 247 after a delay...
A
A
A
C
Total correct responses by Gemini: 143/250
Final accuracy: 57.20%


Total correct responses by Gemini: 143/250
Final accuracy: 57.20%

In [None]:
df, prompts = read_prompts("/content/mcq3.tsv")
execute_prompts(df, prompts)

A
B
C
D
D
D
B
A
C
B
D
C
D
A
B
D




Too many requests. Retrying prompt 17 after a delay...
B
B
C
D
C
B
C
C
D
C
C
B
C
D
C
A
C
A
C




Too many requests. Retrying prompt 36 after a delay...
B
D
C
B
C
B
C
C
D
C
C
C
D
C
C
B
C
A
D
B
C
C
C
D
C
B
C
C
B
A
C
A
C




Too many requests. Retrying prompt 69 after a delay...
C
A
B
B
B
B
B
B
B
C
A
C
D
A
C
C




Too many requests. Retrying prompt 85 after a delay...
C
C
C
B
B
B
C
A
A
B
D
D
B
C
D
B
C
D
B




Too many requests. Retrying prompt 104 after a delay...
A
B
B
B
B
A
C
B
A
C
B
C
B
A
C
C
D
D




Too many requests. Retrying prompt 122 after a delay...
B
C
B
C
A
A
C
A
C
ValueError: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. Please check the `candidate.safety_ratings` to determine if the response was blocked.. Skipping prompt number 131.
D
A
B
A
D
C
B
D
A




Too many requests. Retrying prompt 141 after a delay...
B
B
D
C
A
A
C
D
C
D
B
D
A
A
D
B
B




Too many requests. Retrying prompt 158 after a delay...
C
B
B
A
C
B
A
C
D
C
C
C
A
C
A
B
A
C




Too many requests. Retrying prompt 176 after a delay...
C
B
A
A
C
D
C
A
D
B
D
A
B
C
A
A
A
B




Too many requests. Retrying prompt 194 after a delay...
B
C
A
A
D
B
B
Total correct responses by Gemini: 135/200
Final accuracy: 67.50%


Total correct responses by Gemini: 135/200
Final accuracy: 67.50%

In [None]:
df, prompts = read_prompts("/content/mcq3.tsv")
execute_prompts(df, prompts)

C
B
C
B
C
A
A
A
A
A
C
D
D
A
A
A
A




Too many requests. Retrying prompt 18 after a delay...
D
A
A
ValueError: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. Please check the `candidate.safety_ratings` to determine if the response was blocked.. Skipping prompt number 21.
B
B
C
D
B
C
C
B
C
D
B
A
A




Too many requests. Retrying prompt 35 after a delay...
B
A
B
C
C
D
A
A
A
D
A
A
B
B
A
ValueError: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. Please check the `candidate.safety_ratings` to determine if the response was blocked.. Skipping prompt number 50.
C
C
C




Too many requests. Retrying prompt 54 after a delay...
A
A
A
A
A
D
A
B
C
B
C
C
C
C
A
A
A
B
A
A




Too many requests. Retrying prompt 74 after a delay...
B
B
A
A
A
C
A
B
A
D
A
B
B
D
C
B
C
B
C




Too many requests. Retrying prompt 93 after a delay...
B
A
C
B
A
C
B
C
C
A
C
A
A
A
C
A
C
C




Too many requests. Retrying prompt 111 after a delay...
B
A
A
D
A
B
B
B
C
C
C
C
C
A
C
A
B
C




Too many requests. Retrying prompt 129 after a delay...
D
A
C
B
C
C
C
C
A
A
B
C
A
C
A
C
A
D




Too many requests. Retrying prompt 147 after a delay...
C
A
B
C
A
B
A
C
A
A
C
D
B
C
C
B
C
A
A
C




Too many requests. Retrying prompt 167 after a delay...
C
C
C
A
B
A
A
A
A
B
B
B
D
C
B
A
B




Too many requests. Retrying prompt 184 after a delay...
C
A
B
B
B
A
D
C
C
A
C
B
A
C
B
A
D




Too many requests. Retrying prompt 201 after a delay...
B
A
A
C
A
A
B
A
C
B
C
B
C
B
A
B
C
A
A




Too many requests. Retrying prompt 220 after a delay...
B
A
B
B
D
A
A
B
A
B
A
B
A
A
B
C
C
D
C




Too many requests. Retrying prompt 239 after a delay...
C
C
C
B
ValueError: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. Please check the `candidate.safety_ratings` to determine if the response was blocked.. Skipping prompt number 243.
A
C
D
A
C
C
B
A
C
B
B
A
A
D
D




Too many requests. Retrying prompt 259 after a delay...
A
C
D
A
B
B
A
C
B
B
A
B
B
B
B
B
A
A




Too many requests. Retrying prompt 277 after a delay...
ValueError: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. Please check the `candidate.safety_ratings` to determine if the response was blocked.. Skipping prompt number 277.
D
B
C
C


ERROR:tornado.access:500 POST /v1beta/models/gemini-1.0-pro:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 761.01ms


InternalServerError: 500 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-1.0-pro:generateContent?%24alt=json%3Benum-encoding%3Dint: An internal error has occurred. Please retry or report in https://developers.generativeai.google/guide/troubleshooting. Skipping prompt number 282.
B
C
C
A
A
A
B
A
C
D
C
C
D




Too many requests. Retrying prompt 296 after a delay...
A
A
D
A
B
B
D
B
A
A
A
A
A
A
B
C
A
A
B




Too many requests. Retrying prompt 315 after a delay...
B
B
A
C
B
B
ValueError: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. Please check the `candidate.safety_ratings` to determine if the response was blocked.. Skipping prompt number 321.
C
A
A
A
C
C
A
A
B
B
B
C




Too many requests. Retrying prompt 334 after a delay...
A
D
A
A
B
A
A
A
C
C
A
C
A
D
B
B
B
Total correct responses by Gemini: 200/350
Final accuracy: 57.14%


Total correct responses by Gemini: 200/350
Final accuracy: 57.14%

In [None]:
df, prompts = read_prompts("/content/mcq3.tsv")
execute_prompts(df, prompts)

A
A
B
C
C
C
B
B
C
B
C
A
B
A
C
A
A
B
B
B
B
B
A
D




Too many requests. Retrying prompt 25 after a delay...
C
B
C
B
C
C
D
B
A
C
C
D
D
A
A
D
B
B




Too many requests. Retrying prompt 43 after a delay...
C
B
B
B
C
A
A
B
C
C
D
B
D
D
A
B
A
B




Too many requests. Retrying prompt 61 after a delay...
C
A
D
A
C
B
B
B
C
B
C
C
C
A
A
C
D
B




Too many requests. Retrying prompt 79 after a delay...
B
B
B
B
C
C
C
A
A
D
B
A
A
C
C
B
C
ValueError: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. Please check the `candidate.safety_ratings` to determine if the response was blocked.. Skipping prompt number 96.
B




Too many requests. Retrying prompt 98 after a delay...
B
D
C
A
A
B
C
C
B
C
A
C
A
A
A
A
A
C
C




Too many requests. Retrying prompt 117 after a delay...
C
C
B
B
A
D
B
B
B
C
B
B
C
D
B
D
C
A




Too many requests. Retrying prompt 135 after a delay...
A
A
C
A
C
A
A
B
A
B
A
A
B
A
D
C




Too many requests. Retrying prompt 151 after a delay...
B
B
A
A
C
A
D
D
C
B
C
C
B
B
A
A
D
B




Too many requests. Retrying prompt 169 after a delay...
B
A
B
D
B
A
C
D
C
B
C
C
B
A
C
B




Too many requests. Retrying prompt 185 after a delay...
A
A
C
A
A
A
D
C
B
A
A
C
A
A
C
C
A
D
C
A
C
C
B
C
A
A
B
B
A
B
B
ValueError: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. Please check the `candidate.safety_ratings` to determine if the response was blocked.. Skipping prompt number 216.
A
A
B
A
C
C
A
B
C
C
A
B
A
D
B
C
B




Too many requests. Retrying prompt 234 after a delay...
C
A
A
A
A
A
B
B
A
A
A
B
B
A
B
B
B
A
B
C
C
A
A
A
A
A
B
B
A
B
B
A
B
C
A
A
D
D
A
C
A
A
B
A
B
B
A
D
A
C
A
A
B
D
C
A
A
B
A
B
B
B
A
D
A
C
C
Total correct responses by Gemini: 161/300
Final accuracy: 53.67%


Total correct responses by Gemini: 161/300
Final accuracy: 53.67%

In [None]:
df, prompts = read_prompts("/content/mcq3.tsv")
execute_prompts(df, prompts)

C
C
B
B
C
B
A
B
C
D
B
C
C
D
A
B
D
D
C
C
A
C
A
A
A
D
A
A
B
B




Too many requests. Retrying prompt 31 after a delay...
C
A
B
A
C
D
A
C
C
A
C
D
C
B
C
A
A
A
A
B
B
Total correct responses by Gemini: 29/51
Final accuracy: 56.86%


Total correct responses by Gemini: 29/51
Final accuracy: 56.86%

In [None]:
import pandas as pd
import time
import requests
from http.client import RemoteDisconnected
from google.api_core.exceptions import TooManyRequests, InternalServerError
import os

def read_prompts_rcm(file):
    try:
        df = pd.read_csv(file, sep='\t', encoding='utf-8')
    except UnicodeDecodeError:
        df = pd.read_csv(file, sep='\t', encoding='ISO-8859-1')

    prompts = []
    for index, row in df.iterrows():
        prompt = f""" you are a cybersecurity expert you have to analyze and map the following CVE description to the appropriate CWE , you response should contains only cwe id. \
        CVE description: {row['Description']}
        """
        prompts.append(prompt)
    return df, prompts

def execute_prompts_rcm(df, prompts):
    results = []
    correct_answers = df['GT'].tolist()
    total_correct = 0
    max_retries = 5  # Maximum number of retries for connection errors

    for i, prompt in enumerate(prompts):
        retries = 0
        while retries < max_retries:
            try:
                # This is a placeholder for the actual model call
                response = model.generate_content(prompt)
                if not hasattr(response, 'text'):
                    raise ValueError(f"Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. Please check the `candidate.safety_ratings` to determine if the response was blocked. Prompt number: {i+1}")
                result = response.text.strip()
                print(result)
                results.append(result)
                if result.upper() == correct_answers[i].upper():
                    total_correct += 1
                # Add a delay between requests to avoid hitting the rate limit
                time.sleep(2)  # Adjust the sleep duration as needed
                break  # Exit the retry loop if successful
            except TooManyRequests:
                print(f"Too many requests. Retrying prompt {i+1} after a delay...")
                time.sleep(60)  # Wait for 60 seconds before retrying
            except (ConnectionError, RemoteDisconnected) as e:
                print(f"Connection error: {e}. Retrying prompt {i+1}...")
                retries += 1
                time.sleep(2)  # Adjust the sleep duration as needed
            except ValueError as e:
                print(f"ValueError: {e}. Retrying prompt {i+1}...")
                retries += 1
                time.sleep(2)  # Adjust the sleep duration as needed
            except InternalServerError as e:
                print(f"InternalServerError: {e}. Retrying prompt {i+1}...")
                retries += 1
                time.sleep(2)  # Adjust the sleep duration as needed

        if retries == max_retries:
            print(f"Failed to process prompt {i+1} after {max_retries} retries.")
            results.append("ERROR")

    df['Gemini Response'] = results

    # Calculate the total correct answers
    accuracy = total_correct / len(correct_answers)
    output_file = "/content/output_file_rcm.tsv"

    if os.path.exists(output_file) and os.path.getsize(output_file) > 0:
        existing_df = pd.read_csv(output_file, sep='\t', encoding='utf-8')
        combined_df = pd.concat([existing_df, df], ignore_index=True)
    else:
        combined_df = df

    # Save the updated file with Gemini's responses
    combined_df.to_csv(output_file, sep='\t', index=False)

    # Print the total correct answers
    print(f'Total correct responses by Gemini: {total_correct}/{len(correct_answers)}')
    # Print the final accuracy
    print(f'Final accuracy: {accuracy:.2%}')

df, prompts = read_prompts_rcm("/content/crm1.tsv")
execute_prompts_rcm(df, prompts)


NameError: name 'model' is not defined

In [None]:
prompti = """ Based on the following description, what is the CWE ID? \
CVE Description: In the Linux kernel through 6.7.1, there is a use-after-free in cec_queue_msg_fh, related to drivers/media/cec/core/cec-adap.c and drivers/media/cec/core/cec-api.c. \
"""
response = model.generate_content(prompti)
print(response.text)


ValueError: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. Please check the `candidate.safety_ratings` to determine if the response was blocked.

In [None]:
!pip install cvss

Collecting cvss
  Downloading cvss-3.1-py2.py3-none-any.whl.metadata (3.5 kB)
Downloading cvss-3.1-py2.py3-none-any.whl (30 kB)
Installing collected packages: cvss
Successfully installed cvss-3.1


In [None]:
import pandas as pd

def read_prompts_vsp(file):
    try:
        df = pd.read_csv(file, sep='\t', encoding='utf-8')
    except UnicodeDecodeError:
        df = pd.read_csv(file, sep='\t', encoding='ISO-8859-1')

    prompts = []
    for index, row in df.iterrows():
        prompt = f"""
        Analyze the following CVE description, calculate the CVSS v3.1 Base Score and determine the values for each base metric: AV, AC, PR, UI, S, C, I, and A. \

        Valid options for each metric are as follows: \
        - Attack Vector (AV): Network (N), Adjacent (A), Local (L), Physical (P) \
        - Attack Complexity (AC): Low (L), High (H) \
        - Privileges Required (PR): None (N), Low (L), High (H) \
        - User Interaction (UI): None (N), Required (R) \
        - Scope (S): Unchanged (U), Changed (C) \
        - Confidentiality (C): None (N), Low (L), High (H) \
        - Integrity (I): None (N), Low (L), High (H) \
        - Availability (A): None (N), Low (L), High (H) \

        provide only response in format below don't writr paragraphs or introduction just give me the CVSS v3 Vector String. \
        ** important ** your answer should be CVSS v3 Vector String in the following format: \
        Example format: CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H \

        CVE Description: {row['Description']}
        """
        prompts.append(prompt)

    return df, prompts


In [None]:
from cvss import CVSS3
def get_cvss_score(cvss_vector):
    c = CVSS3(cvss_vector)
    cvss_score = c.scores()[0]
    return cvss_score

In [None]:
def execute_prompts_vsp(df, prompts):
    results = []
    valid_indices = []  # To track the indices of valid responses
    correct_answers = df['GT'].tolist()
    error = 0
    total = 0
    max_retries = 5  # Limit the number of retries

    for i, prompt in enumerate(prompts):
        retries = 0
        success = False
        while retries < max_retries:
            try:
                response = model.generate_content(prompt)

                # Check if the response content exists
                if not response or not hasattr(response, 'text') or not response.text:
                    raise ValueError(f"No valid response for prompt number {i+1}. Response might be blocked or empty.")

                result = response.text.strip()
                print(f"Response for prompt {i+1}: {result}")
                pred = result.upper()
                gt = correct_answers[i].upper()

                try:
                    pred_vector = pred
                    pred_score = get_cvss_score(pred_vector)
                    gt_score = get_cvss_score(gt)
                    if pred_score is not None and gt_score is not None:
                        error += abs(pred_score - gt_score)
                        total += 1
                except Exception as e:
                    print(f"Invalid response at row {i + 1}: {e}")
                    continue

                results.append(result)
                valid_indices.append(i)  # Track the index of valid responses
                success = True
                # Add a delay between requests to avoid hitting the rate limit
                time.sleep(2)  # Adjust the sleep duration as needed
                break  # Exit the retry loop if successful
            except TooManyRequests:
                print(f"Too many requests. Retrying prompt {i+1} after a delay...")
                time.sleep(60)  # Wait for 60 seconds before retrying
                retries += 1
            except (ConnectionError, RemoteDisconnected) as e:
                print(f"Connection error: {e}. Retrying prompt {i+1}...")
                retries += 1
                time.sleep(2)  # Adjust the sleep duration as needed
            except ValueError as e:
                print(f"ValueError: {e}. Skipping prompt number {i+1}.")
                break  # Skip this prompt
            except InternalServerError as e:
                print(f"InternalServerError: {e}. Skipping prompt number {i+1}.")
                break  # Skip this prompt

        if not success:
            print(f"Failed to process prompt {i+1} after {max_retries} retries.")

    # Filter the DataFrame to include only rows with valid responses
    filtered_df = df.iloc[valid_indices].copy()
    filtered_df['Llama3-70b Response'] = results

    output_file = "/content/output_file_vsp.tsv"

    if os.path.exists(output_file) and os.path.getsize(output_file) > 0:
        existing_df = pd.read_csv(output_file, sep='\t', encoding='utf-8')
        combined_df = pd.concat([existing_df, filtered_df], ignore_index=True)
    else:
        combined_df = filtered_df

    # Save the updated file with the valid responses
    combined_df.to_csv(output_file, sep='\t', index=False)

    # Calculate and print the Mean Absolute Deviation (MAD)
    if total > 0:
        mad = error / total
        print(f'Mean Absolute Deviation: {mad}')
    else:
        print('No valid responses to calculate MAD.')

# Example usage
df, prompts = read_prompts_vsp("/content/cti-vsp11.tsv")
execute_prompts_vsp(df, prompts)


NameError: name 'TooManyRequests' is not defined