# Notebook to run GPT, Gemini, Replicate models

In [None]:
!pip install openai replicate vertexai python-dotenv matplotlib google-generativeai
!pip install --upgrade google-cloud-aiplatform

Defaulting to user installation because normal site-packages is not writeable
Collecting vertexai
  Downloading vertexai-1.71.1-py3-none-any.whl.metadata (10 kB)
Collecting google-cloud-aiplatform==1.71.1 (from google-cloud-aiplatform[all]==1.71.1->vertexai)
  Downloading google_cloud_aiplatform-1.71.1-py2.py3-none-any.whl.metadata (32 kB)
Collecting google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.34.1 (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.34.1->google-cloud-aiplatform==1.71.1->google-cloud-aiplatform[all]==1.71.1->vertexai)
  Downloading google_api_core-2.25.1-py3-none-any.whl.metadata (3.0 kB)
Collecting google-auth<3.0.0dev,>=2.14.1 (from google-cloud-aiplatform==1.71.1->google-cloud-aiplatform[all]==1.71.1->vertexai)
  Downloading google_auth-2.40.3-py2.py3-none-any.whl.metadata (6.2 kB)
Collecting proto-plus<2.0.0dev,>=1.22.3 (from google-cloud-aiplatform==1.71.1->google



Defaulting to user installation because normal site-packages is not writeable
Collecting google-cloud-aiplatform
  Downloading google_cloud_aiplatform-1.110.0-py2.py3-none-any.whl.metadata (38 kB)
Collecting google-genai<2.0.0,>=1.0.0 (from google-cloud-aiplatform)
  Downloading google_genai-1.31.0-py3-none-any.whl.metadata (43 kB)
Collecting websockets<15.1.0,>=13.0.0 (from google-genai<2.0.0,>=1.0.0->google-cloud-aiplatform)
  Downloading websockets-15.0.1-cp313-cp313-win_amd64.whl.metadata (7.0 kB)
Downloading google_cloud_aiplatform-1.110.0-py2.py3-none-any.whl (7.9 MB)
   ---------------------------------------- 0.0/7.9 MB ? eta -:--:--
   - -------------------------------------- 0.3/7.9 MB ? eta -:--:--
   -- ------------------------------------- 0.5/7.9 MB 1.6 MB/s eta 0:00:05
   ----- ---------------------------------- 1.0/7.9 MB 1.8 MB/s eta 0:00:04
   ------- -------------------------------- 1.6/7.9 MB 2.1 MB/s eta 0:00:04
   --------- ------------------------------ 1.8/7.9 M

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
vertexai 1.71.1 requires google-cloud-aiplatform[all]==1.71.1, but you have google-cloud-aiplatform 1.110.0 which is incompatible.


In [1]:
from openai import OpenAI
import replicate
import vertexai
from vertexai.generative_models import GenerativeModel, GenerationConfig, HarmBlockThreshold, HarmCategory

import pandas as pd
import numpy as np
import os
import time
import re
import matplotlib.pyplot as plt

## Setup all APIs

In [2]:
# replicate
import os
from dotenv import load_dotenv

load_dotenv()

True

In [4]:
from openai  import OpenAI
# ChatGPT _ don't have key
openai_client = OpenAI(api_key="OPENAI_API_KEY")

In [3]:
# llama3 
import ollama
response = ollama.chat(model='llama3', messages=[
    {'role': 'user', 'content': 'Hello from Python!'}
])
print(response['message']['content'])


Nice to meet you, Python!

How can I help you today? Do you have a question about programming in Python or something else entirely? I'm here to listen and assist if I can!


In [3]:
# Gemini 
project_id = "Gemini API"   # add project ID and location
import google.generativeai as genai
    
# Set your API key
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

## Prediction Params & Method

In [4]:
# set parameters for more deterministic output
temperature = 0
top_p = 1
seed = 42
max_tokens = 2048

In [5]:
sys_prompt = 'You are a cybersecurity expert specializing in cyberthreat intelligence.'

In [6]:
model_mapping = {
    # Ollama models (local)
    'llama3': 'llama3',
    'llama3.1': 'llama3.1',
    'mistral': 'mistral',
    'codellama': 'codellama',
    'gemma': 'gemma',
    'phi': 'phi',
    
    # Google Gemini (free tier)
    'gemini': 'gemini-2.0-flash',  # Free model
    'gemini-pro': 'gemini-1.5-pro'  # Has free quota
}

In [7]:
import time
import ollama

def get_single_prediction_test(question, model_name):
    """
    Get prediction from either Ollama (local) or Gemini (cloud) models
    """
    try:
        if model_name in ['llama3', 'llama3.1', 'mistral', 'codellama', 'gemma', 'phi']:
            # Ollama models
            response = ollama.chat(
                model=model_mapping[model_name],
                messages=[
                    {'role': 'system', 'content': sys_prompt},
                    {'role': 'user', 'content': question}
                ],
                options={
                    'temperature': temperature,
                    'top_p': top_p,
                    'seed': seed,
                    'num_predict': max_tokens
                }
            )
            output = response['message']['content']
            
        elif model_name.startswith('gemini'):
            # Google Gemini
            model = genai.GenerativeModel(model_mapping[model_name])
            prompt = sys_prompt + ' ' + question
            
            generation_config = genai.types.GenerationConfig(
                temperature=temperature,
                top_p=top_p,
                max_output_tokens=max_tokens,
            )
            
            response = model.generate_content(
                prompt,
                generation_config=generation_config
            )
            output = response.text
            time.sleep(1)  # Rate limiting for free tier
            
        else:
            raise ValueError(f"Unknown model: {model_name}")
            
        return output
        
    except Exception as e:
        print(f"Error with model {model_name}: {e}")
        return f"Error: {str(e)}"


In [8]:
def get_single_prediction(question, model_name):
    if model_name.startswith('llama') or model_name.startswith('mistral'):
        # replicate
        model = model_mapping[model_name]
        prompt = sys_prompt + ' ' + question
        input = {'prompt': prompt, 'max_tokens': max_tokens, 'temperature': temperature, 'top_p': top_p, 'seed': seed}
        output = replicate.run(model, input=input)
        output = "".join(output)
    elif model_name.startswith('gemma'):
        # replicate
        model = model_mapping[model_name]
        prompt = sys_prompt + ' ' + question
        input = {'prompt': prompt, 'max_tokens': max_tokens, 'temperature': 0.01, 'top_p': top_p, 'seed': seed}
        output = replicate.run(model, input=input)
        output = "".join(output)
    elif model_name.startswith('01-ai'):
        # replicate
        model = model_mapping[model_name]
        prompt = sys_prompt + ' ' + question
        input = {'prompt': prompt, 'max_tokens': max_tokens, 'temperature': temperature, 'top_p': top_p, 'seed': seed}
        output = replicate.run(model, input=input)
        output = "".join(output)
    elif model_name.startswith('gpt'):
        # ChatGPT
        model = model_mapping[model_name]
        response = openai_client.chat.completions.create(
            model=model,
            messages=[
                {'role': 'system', 'content': sys_prompt},
                {'role': 'user', 'content': question}
            ],
            temperature=temperature,
            top_p=top_p,
            max_tokens=max_tokens,
            seed=seed
        )
        output = response.choices[0].message.content
    elif model_name.startswith('gemini'):
        # Gemini
        model = model_mapping[model_name]
        model = GenerativeModel(model_name=model)
        prompt = sys_prompt + ' ' + question
        generation_config = GenerationConfig(
            temperature=temperature,
            top_p=top_p,
            max_output_tokens=max_tokens,
        )
        safety_settings = {
            HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
            HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_ONLY_HIGH,
            HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
            HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
            HarmCategory.HARM_CATEGORY_UNSPECIFIED: HarmBlockThreshold.BLOCK_ONLY_HIGH,
        }
        response = model.generate_content(
            prompt,
            generation_config=generation_config,
            safety_settings=safety_settings
        )
        output = response.text
        time.sleep(1)   # so it doesn't throw error
    return output


#### Test

In [11]:
question = (
    "Analyze the following CVE description and map it to the appropriate CWE. "
    "Provide a brief justification. The last line of your answer should only contain the CWE ID.\n\n"
    "CVE Description:\n\n"
    "Dell EMC CloudLink 7.1 and all prior versions contain an Improper Input Validation Vulnerability. "
    "A remote low privileged attacker may potentially exploit this vulnerability, "
    "leading to execution of arbitrary files on the server."
)

##### Are all the APIS working?

In [12]:
print(get_single_prediction_test(question, 'mistral'))

 The CVE description you provided indicates that there is a security vulnerability in Dell EMC CloudLink 7.1 and prior versions due to Improper Input Validation. This means that the software does not properly check or filter user inputs, allowing an attacker to supply malicious input that can lead to unexpected behavior, such as executing arbitrary files on the server.

In terms of Common Weakness Enumeration (CWE), this vulnerability falls under CWE-20: Improper Input Validation. This is because the software fails to ensure that input data conforms to expected format or behavior, which can lead to unintended functionality and potential security risks.

So, the CWE ID for this vulnerability is CWE-20.


In [10]:
print(get_single_prediction_test(question, 'gemini'))

NameError: name 'question' is not defined

In [13]:
print(get_single_prediction_test(question, 'llama3'))

The CVE description mentions "Improper Input Validation" which is a clear indication of a CWE-20: Input Validation, Sanitization and Escaping error.

Justification:
The vulnerability allows an attacker to inject malicious input that can be executed as code on the server, indicating a failure in validating or sanitizing user input. This is a classic example of CWE-20, where the system does not properly validate or sanitize user input, allowing an attacker to manipulate the system's behavior.

CWE: 20


# Run Evaluation for a Dataset

### All formatting comes here
While these captures most output format of the LLMs we studied, we still had to manually collect some responses from the generated response file

In [9]:
import re
def format_rcm(text):
    # Define the regex pattern for CWE ID
    cwe_pattern = r'CWE-\d+'

    # Find all matches in the text
    matches = re.findall(cwe_pattern, text)

    # Return the last match if any match is found, otherwise return the original text
    if matches:
        return matches[-1], True
    else:
        return text, False

def format_vsp(text):
    # Define the regex pattern for CVSS v3.1 vector string
    #cvss_pattern = r'AV:[^/]+?/AC:[^/]+?/PR:[^/]+?/UI:[^/]+?/S:[^/]+?/C:[^/]+?/I:[^/]+?/A:[^/]+?'
    cvss_pattern = r'AV:[A-Za-z]+/AC:[A-Za-z]+/PR:[A-Za-z]+/UI:[A-Za-z]+/S:[A-Za-z]+/C:[A-Za-z]+/I:[A-Za-z]+/A:[A-Za-z]+'


    # Find all matches in the text
    matches = re.findall(cvss_pattern, text)

    # Return the last match if any match is found, otherwise return the original text
    if matches:
        return matches[-1], True
    else:
        return text, False

def format_mcq(text):
    last_line = text.split('\n')[-1].rstrip()
    if last_line.startswith('A)') or last_line.startswith('B)') or last_line.startswith('C)') or last_line.startswith('D)'):
        return last_line[0]
    if last_line.endswith('A') or last_line.endswith('B') or last_line.endswith('C') or last_line.endswith('D'):
        return last_line[-1]
    if last_line.endswith('**'):
        return last_line[-3]
    if len(last_line) == 0:
        last_line = text.split('\n')[-2].rstrip()
        if last_line.startswith('A)') or last_line.startswith('B)') or last_line.startswith('C)') or last_line.startswith('D)'):
            return last_line[0]
        if last_line.endswith('A') or last_line.endswith('B') or last_line.endswith('C') or last_line.endswith('D'):
            return last_line[-1]
        if last_line.endswith('**'):
            return last_line[-3]
    return ' '.join(text.split('\n'))

def format_taa(text):
    # need to manually extract the attribution
    return ' '.join(text.split('\n'))

In [10]:
import pandas as pd
import time

def run_evaluation(file_path, task, model_name):
    # Keep track of time and total #chars generated
    start_time = time.time()
    count_chars = 0
    instructions_failed = 0
    try:
        #data = pd.read_csv(file_path, encoding='utf-8', sep='\t')
        data = pd.read_excel("cti_vsp_dataset_Final.xlsx")
        data.columns = data.columns.str.strip()
    except Exception as e:
        print(f"Error reading file: {e}")
        return

    # response contain the entire response, result the formatted result
    all_responses = []
    all_results = []
    delay_per_request = 60 / 15
    for index, row in data.iterrows():
        prompt = row['Prompt']
        try:
            output = get_single_prediction_test(prompt, model_name)
            
            count_chars += len(output)
            all_responses.append(output)
            if task == 'rcm':
                answer, success = format_rcm(output)
                if not success:
                    instructions_failed += 1
            elif task == 'vsp':
                answer, success = format_vsp(output)
                if not success:
                    instructions_failed += 1      
            elif task == 'mcq':
                answer = format_mcq(output)
            elif task == 'taa':
                answer = format_taa(output)
            else:
                raise ValueError('Task unknown!')
        except Exception as e:
            answer = 'Error'
            all_responses.append(answer)
            print('Exception at row ', index+1)
            print(e)
        all_results.append(answer)
        print(index+1, answer)
        # print(index+1)

    # Rate limiting: wait before next request
        if model_name.startswith('gemini'):
            time.sleep(delay_per_request)


    time_taken = time.time() - start_time
    print('Time taken:', time_taken)
    print('#Characters generated:', count_chars)
    print('#Instructions failed:', instructions_failed)

    # Save all the responses & results
    out_response = file_path.split('.')[0] + '_' + model_name + '_response.txt'
    out_result = file_path.split('.')[0] + '_' + model_name + '_result.txt'

    with open(out_response, 'w', encoding='utf-8') as f:
        out_str = ''
        for i in range(len(all_responses)):
            out_str += '#####' + str(i+1) + '#####\n'
            out_str += all_responses[i]
            out_str += '\n\n'
        f.write(out_str)
    with open(out_result, 'w', encoding='utf-8') as f:
        f.write('\n'.join(all_results))

    print('------- Done --------')

In [43]:
run_evaluation('Formatted_MCQ_Data_FINAL_FINAL_v3.tsv', 'mcq', 'llama3' \
'')

1 B
2 B
3 A
4 B
5 B
6 B
7 C
8 A
9 B
10 D
11 B
12 A
13 A
14 C
15 D
16 B
17 D
18 D
19 D
20 C
21 C
22 C
23 C
24 D
25 B
26 D
27 D
28 D
29 D
30 D
31 A
32 A
33 D
34 D
35 D
36 D
37 B
38 D
39 B
40 C
41 D
42 D
43 D
44 D
45 C
46 A
47 A
48 D
49 A
50 C
51 B
52 C
53 B
54 A
55 B
56 A
57 D
58 A
59 A
60 A
61 B
62 D
63 A
64 A
65 B
66 D
67 A
68 A
69 A
70 D
71 A
72 A
73 A
74 D
75 A
76 D
77 B
78 D
79 B
80 A
81 A
82 C
83 A
84 D
85 A
86 D
87 B
88 D
89 B
90 D
91 D
92 D
93 B
94 B
95 B
96 D
97 D
98 D
99 A
100 C
101 D
102 D
103 D
104 C
105 D
106 C
107 D
108 C
109 A
110 A
111 A
112 D
113 A
114 A
115 A
116 A
117 A
118 A
119 A
120 A
121 A
122 D
123 A
124 A
125 A
126 A
127 A
128 A
129 C
130 A
131 D
132 A
133 D
134 A
135 A
136 A
137 D
138 A
139 D
140 D
141 A
142 A
143 D
144 A
145 D
146 A
147 D
148 A
149 D
150 A
151 A
152 A
153 D
154 A
155 D
156 A
157 D
158 D
159 A
160 D
161 A
162 D
163 D
164 D
165 A
166 D
167 D
168 D
169 A
170 D
171 A
172 D
173 A
174 D
175 A
176 D
177 D
178 A
179 A
180 D
181 D
182 D
183 C
184 A
185 

In [40]:
run_evaluation('Formatted_MCQ_Data_FINAL_FINAL_v3.tsv', 'mcq', 'gemini')

1 B
2 B
3 A
4 B
5 B
6 B
7 C
8 A
9 C
10 D
11 B
12 A
13 A
14 C
15 D
16 B
17 D
18 D
19 D
20 C
21 C
22 C
23 D
24 D
25 B
26 D
27 D
28 D
29 D
30 D
31 A
32 A
33 D
34 D
35 D
36 D
37 B
38 D
39 B
40 C
41 D
42 D
43 D
44 D
45 C
46 A
47 A
48 D
49 A
50 C
51 B
52 C
53 B
54 A
55 B
56 D
57 D
58 A
59 A
60 A
61 B
62 D
63 A
64 A
65 B
66 D
67 A
68 A
69 A
70 D
71 A
72 A
73 A
74 D
75 A
76 D
77 B
78 D
79 B
80 B
81 A
82 C
83 A
84 D
85 A
86 D
87 B
88 D
89 B
90 D
91 A
92 D
93 B
94 B
95 B
96 D
97 D
98 D
99 A
100 C
101 D
102 D
103 D
104 C
105 B
106 C
107 B
108 C
109 A
110 A
111 A
112 D
113 B
114 B
115 A
116 A
117 A
118 A
119 A
120 A
121 A
122 D
123 A
124 A
125 A
126 A
127 A
128 A
129 C
130 A
131 D
132 A
133 D
134 A
135 A
136 A
137 D
138 A
139 D
140 D
141 A
142 A
143 D
144 A
145 D
146 A
147 D
148 A
149 D
150 A
151 B
152 A
153 D
154 B
155 D
156 A
157 D
158 D
159 A
160 D
161 A
162 D
163 D
164 D
165 D
166 D
167 D
168 D
169 A
170 D
171 A
172 D
173 A
174 C
175 A
176 D
177 D
178 D
179 A
180 D
181 D
182 D
183 C
184 A
185 

KeyboardInterrupt: 

In [13]:
run_evaluation('final_rcm_data.tsv', 'rcm', 'llama3')

1 CWE-94
2 CWE-120
3 CWE-416
4 CWE-476
5 CWE-79
6 CWE-22
7 CWE-200
8 CWE-20
9 CWE-79
10 CWE-352
11 CWE-125
12 CWE-79
13 CWE-78
14 CWE-79
15 CWE-79
16 CWE-264
17 CWE-400
18 CWE-269
19 CWE-285
20 CWE-79
21 CWE-119
22 CWE-269
23 CWE-125
24 CWE-79
25 CWE-79
26 CWE-125
27 CWE-22
28 CWE-20
29 CWE-119
30 CWE-416
31 CWE-89
32 CWE-120
33 CWE-16
34 CWE-79
35 CWE-352
36 CWE-94
37 CWE-400
38 CWE-22
39 CWE-22
40 CWE-22
41 CWE-94
42 CWE-120
43 CWE-416
44 CWE-20
45 CWE-22
46 CWE-89
47 CWE-285
48 CWE-119
49 CWE-94
50 CWE-89
51 CWE-269
52 CWE-352
53 CWE-190
54 CWE-79
55 CWE-22
56 CWE-285
57 CWE-120
58 CWE-416
59 CWE-94
60 CWE-264
61 CWE-89
62 CWE-125
63 CWE-89
64 CWE-284
65 CWE-4
66 CWE-352
67 CWE-79
68 CWE-400
69 CWE-79
70 CWE-89
71 CWE-79
72 CWE-95
73 CWE-119
74 CWE-22
75 CWE-22
76 CWE-22
77 CWE-89
78 CWE-611
79 CWE-264
80 CWE-94
81 CWE-269
82 CWE-89
83 CWE-79
84 CWE-78
85 CWE-120
86 CWE-78
87 CWE-89
88 CWE-287
89 CWE-400
90 CWE-200
91 CWE-259
92 CWE-79
93 CWE-416
94 CWE-78
95 CWE-20
96 CWE-79
97 CWE

In [31]:
#data = pd.read_csv("cti_vsp_dataset_Final(1).tsv", encoding='utf-8', on_bad_lines='skip')
data = pd.read_excel("cti_vsp_dataset_Final.xlsx")
data.columns = data.columns.str.strip()
for index, row in data.iterrows():
        prompt = row['Prompt']
        print(prompt)

Analyze the following CVE description and calculate the CVSS v3.1 Base Score. Determine the values for each base metric: AV, AC, PR, UI, S, C, I, and A. Summarize each metric's value and provide the final CVSS v3.1 vector string. Valid options for each metric are as follows: - **Attack Vector (AV)**: Network (N), Adjacent (A), Local (L), Physical (P) - **Attack Complexity (AC)**: Low (L), High (H) - **Privileges Required (PR)**: None (N), Low (L), High (H) - **User Interaction (UI)**: None (N), Required (R) - **Scope (S)**: Unchanged (U), Changed (C) - **Confidentiality (C)**: None (N), Low (L), High (H) - **Integrity (I)**: None (N), Low (L), High (H) - **Availability (A)**: None (N), Low (L), High (H) Summarize each metric's value and provide the final CVSS v3.1 vector string. Ensure the final line of your response contains only the CVSS v3 Vector String in the following format: Example format: CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H CVE Description: :description
Analyze the fol

In [32]:
data.columns = data.columns.str.strip()
data.head()

Unnamed: 0,cve_id,description,GT,Prompt
0,CVE-2024-26732,"In the Linux kernel, the following vulnerabili...",CVSS:3.1/AV:L/AC:L/PR:L/UI:N/S:U/C:N/I:N/A:H,Analyze the following CVE description and calc...
1,CVE-2024-30046,Visual Studio Denial of Service Vulnerability,CVSS:3.1/AV:N/AC:H/PR:N/UI:N/S:U/C:N/I:N/A:H,Analyze the following CVE description and calc...
2,CVE-2024-6612,CSP violations generated links in the console ...,CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:L/I:N/A:N,Analyze the following CVE description and calc...
3,CVE-2024-39435,"In Logmanager service, there is a possible mis...",CVSS:3.1/AV:L/AC:L/PR:H/UI:R/S:U/C:H/I:H/A:H,Analyze the following CVE description and calc...
4,CVE-2023-37608,An issue in Automatic Systems SOC FL9600 First...,CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:N/A:N,Analyze the following CVE description and calc...


In [11]:
run_evaluation('cti_vsp_dataset_Final.xlsx', 'vsp', 'llama3')

1 AV:N/AC:L/PR:N/UI:N/S:C/C:H/I:H/A:H
2 AV:N/AC:L/PR:N/UI:N/S:U/C:L/I:N/A:H
3 AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H
4 AV:L/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H
5 AV:L/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H
6 AV:N/AC:H/PR:N/UI:N/S:U/C:N/I:H/A:H
7 AV:N/AC:L/PR:N/UI:N/S:U/C:L/I:H/A:H
8 AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:H/A:H
9 AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:H/A:H
10 AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:L/A:H
11 AV:N/AC:L/PR:N/UI:N/S:U/C:L/I:H/A:H
12 AV:N/AC:L/PR:N/UI:N/S:C/C:H/I:H/A:H
13 AV:N/AC:L/PR:N/UI:N/S:C/C:H/I:H/A:H
14 AV:N/AC:L/PR:N/UI:N/S:C/C:H/I:H/A:N
15 AV:N/AC:L/PR:H/UI:N/S:C/C:H/I:H/A:N
16 AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:L/A:N
17 AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:L/A:N
18 AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:N
19 AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H
20 AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H
21 AV:N/AC:L/PR:N/UI:N/S:C/C:H/I:H/A:N
22 AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H
23 AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H
24 AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H
25 AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H
26 AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:H

In [32]:
# run_evaluation('datasets/cti-taa.tsv', 'taa', 'gpt3')