In [None]:
import google.generativeai as genai
import pandas as pd
import time

def generate_cvss_vectors(descriptions, api_key):
    if not api_key:
        raise ValueError("API key for Generative AI is not set.")

    # Configure the API
    genai.configure(api_key=api_key)

    # Configuration for the model
    generation_config = {
        "temperature": 0.7,
        "max_output_tokens": 50,
    }

    # Join all descriptions into one, separated by a newline
    full_prompt = "\n\n".join(descriptions)

    # Call the generative model with the configured settings
    try:
        model = genai.GenerativeModel(model_name="gemini-1.0-pro", generation_config=generation_config)
        response = model.generate_content(full_prompt)

        # Debug: Print the entire response for inspection
        print(f"Full API Response: {response}")

        # Ensure the output matches the required format by extracting lines that match the pattern
        if response.candidates:
            generated_texts = response.candidates[0].content.parts[0].text.strip().splitlines()
            return [text.strip() for text in generated_texts if text.startswith("CVSS:3.1/")]
        else:
            print("No candidates returned in the API response.")
            return [None] * len(descriptions)
    except Exception as e:
        print(f"Error during API call: {e}")
        return [None] * len(descriptions)

# Main function to process the TSV file and calculate accuracy
def process_cvss_file(filename, api_key, batch_size=2, max_rows=200):
    correct_count = 0
    total_count = 0

    # Read the TSV file using pandas
    df = pd.read_csv(filename, delimiter='\t', encoding='latin-1')  # Adjust encoding if needed

    batch_descriptions = []
    batch_gt_vectors = []

    for i, row in df.iterrows():
        if i >= max_rows:  # Stop processing after the first `max_rows` rows
            break

        description = row['Description'].strip()
        correct_vector = row['GT'].strip().upper()

        prompt = f"""Analyze the following CVE description and calculate the CVSS v3.1 Base Score. Determine the values for each base metric: AV, AC, PR, UI, S, C, I, and A. Summarize each metric’s value and provide the final CVSS v3.1 vector string.
Valid options for each metric are as follows:
- Attack Vector (AV): Network (N), Adjacent (A), Local (L), Physical (P)
- Attack Complexity (AC): Low (L), High (H)
- Privileges Required (PR): None (N), Low (L), High (H)
- User Interaction (UI): None (N), Required (R)
- Scope (S): Unchanged (U), Changed (C)
- Confidentiality (C): None (N), Low (L), High (H)
- Integrity (I): None (N), Low (L), High (H)
- Availability (A): None (N), Low (L), High (H)
Summarize each metric’s value and provide the final CVSS v3.1 vector string in the exact following format:
CVSS:3.1/AV:X/AC:X/PR:X/UI:X/S:X/C:X/I:X/A:X
CVE Description: {description}"""

        batch_descriptions.append(prompt)
        batch_gt_vectors.append(correct_vector)

        # Process the current batch
        if (i + 1) % batch_size == 0 or (i + 1) == len(df) or i == max_rows - 1:
            generated_vectors = generate_cvss_vectors(batch_descriptions, api_key)

            for generated_vector, correct_vector in zip(generated_vectors, batch_gt_vectors):
                if generated_vector == correct_vector:
                    correct_count += 1
                total_count += 1

                print(f"Generated Vector: {generated_vector}")
                print(f"Correct Vector: {correct_vector}")
                print(f"{'Correct' if generated_vector == correct_vector else 'Incorrect'}\n")

            # Clear the batch for the next round
            batch_descriptions = []
            batch_gt_vectors = []

            # Sleep to respect rate limits or avoid overloading the API
            time.sleep(4)

    # Calculate accuracy
    accuracy = (correct_count / total_count) * 100 if total_count > 0 else 0
    print(f"Total Descriptions Processed: {total_count}")
    print(f"Correct Vectors: {correct_count}")
    print(f"Accuracy: {accuracy:.2f}%")

# Example usage
if __name__ == "__main__":
    api_key = "AIzaSyA6C0BzRxHy75Z3nW2eULOMfdlOo4BNqaM"  # Replace with your actual API key
    process_cvss_file("/content/cti-vsp.tsv", api_key, batch_size=2, max_rows=200)  # Replace with your TSV file path


Full API Response: response:
GenerateContentResponse(
    done=True,
    iterator=None,
    result=protos.GenerateContentResponse({
      "candidates": [
        {
          "content": {
            "parts": [
              {
                "text": "**CVSS:3.1/AV:L/AC:H/PR:N/UI:N/S:U/C:N/I:N/A:N**\n**Summary:**\n- **AV:L"
              }
            ],
            "role": "model"
          },
          "finish_reason": "MAX_TOKENS",
          "index": 0,
          "safety_ratings": [
            {
              "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
              "probability": "NEGLIGIBLE"
            },
            {
              "category": "HARM_CATEGORY_HATE_SPEECH",
              "probability": "NEGLIGIBLE"
            },
            {
              "category": "HARM_CATEGORY_HARASSMENT",
              "probability": "NEGLIGIBLE"
            },
            {
              "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
              "probability": "LOW"
            }
   

KeyboardInterrupt: 

In [None]:
import google.generativeai as genai
import pandas as pd
import time

def generate_cvss_vectors(descriptions, api_key):
    """
    Generate CVSS vectors using the Gemini API.
    """
    if not api_key:
        raise ValueError("API key for Generative AI is not set.")

    # Configure the API
    genai.configure(api_key=api_key)

    # Configuration for the model
    generation_config = {
        "temperature": 0.7,
        "max_output_tokens": 50,
    }

    try:
        # Initialize the generative model
        model = genai.GenerativeModel(model_name="gemini-1.0-pro", generation_config=generation_config)

        # Join all descriptions into a single prompt separated by newlines
        full_prompt = "\n\n".join(descriptions)
        response = model.generate_content(full_prompt)

        # Debug: Print full API response
        print(f"Full API Response: {response}")

        # Extract and format CVSS vectors
        if response.candidates:
            generated_texts = response.candidates[0].content.parts[0].text.strip().splitlines()
            return [text.strip() for text in generated_texts if text.startswith("CVSS:3.1/")]
        else:
            print("No candidates returned in the API response.")
            return [None] * len(descriptions)

    except Exception as e:
        print(f"Error during API call: {e}")
        return [None] * len(descriptions)

def process_cvss_file(filename, api_key, batch_size=2, max_rows=10):
    """
    Process the CVSS file and evaluate the model's accuracy on the first 200 rows.
    """
    correct_count = 0
    total_count = 0

    # Read the TSV file
    try:
        df = pd.read_csv(filename, delimiter='\t', encoding='latin-1')  # Adjust encoding if needed
    except Exception as e:
        print(f"Error reading file: {e}")
        return

    batch_descriptions = []
    batch_gt_vectors = []

    for i, row in df.iterrows():
        if i >= max_rows:  # Process only up to max_rows
            break

        description = row['Description'].strip()
        correct_vector = row['GT'].strip().upper()

        # Prepare the prompt
        prompt = f"""Analyze the following CVE description and calculate the CVSS v3.1 Base Score. Determine the values for each base metric: AV, AC, PR, UI, S, C, I, and A. Summarize each metric’s value and provide the final CVSS v3.1 vector string.
Valid options for each metric are as follows:
- Attack Vector (AV): Network (N), Adjacent (A), Local (L), Physical (P)
- Attack Complexity (AC): Low (L), High (H)
- Privileges Required (PR): None (N), Low (L), High (H)
- User Interaction (UI): None (N), Required (R)
- Scope (S): Unchanged (U), Changed (C)
- Confidentiality (C): None (N), Low (L), High (H)
- Integrity (I): None (N), Low (L), High (H)
- Availability (A): None (N), Low (L), High (H)
Summarize each metric’s value and provide the final CVSS v3.1 vector string in the exact following format:
CVSS:3.1/AV:X/AC:X/PR:X/UI:X/S:X/C:X/I:X/A:X
CVE Description: {description}"""

        batch_descriptions.append(prompt)
        batch_gt_vectors.append(correct_vector)

        # Process the batch
        if (i + 1) % batch_size == 0 or i == max_rows - 1:
            generated_vectors = generate_cvss_vectors(batch_descriptions, api_key)

            for generated_vector, correct_vector in zip(generated_vectors, batch_gt_vectors):
                if generated_vector == correct_vector:
                    correct_count += 1
                total_count += 1

                print(f"Generated Vector: {generated_vector}")
                print(f"Correct Vector: {correct_vector}")
                print(f"{'Correct' if generated_vector == correct_vector else 'Incorrect'}\n")

            # Clear the batch for the next iteration
            batch_descriptions = []
            batch_gt_vectors = []

            # Sleep to respect API rate limits
            time.sleep(4)

    # Calculate accuracy
    accuracy = (correct_count / total_count) * 100 if total_count > 0 else 0
    print(f"Total Descriptions Processed: {total_count}")
    print(f"Correct Vectors: {correct_count}")
    print(f"Accuracy: {accuracy:.2f}%")

# Example usage
if __name__ == "__main__":
    api_key = "AIzaSyCVmy6EOK0c8UY1hTEPHwclgR1_a7CQLY4"  # Replace with your actual API key
    process_cvss_file("/content/cti-vsp.tsv", api_key, max_rows=10)  # Adjust the file path as needed


Error during API call: HTTPConnectionPool(host='localhost', port=39481): Read timed out. (read timeout=600.0)
Generated Vector: None
Correct Vector: CVSS:3.1/AV:L/AC:L/PR:L/UI:N/S:U/C:N/I:N/A:H
Incorrect

Generated Vector: None
Correct Vector: CVSS:3.1/AV:N/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:N
Incorrect

