In [None]:
import torch
import os

# Set the API key
os.environ['HF_TOKEN'] = 'please use your key'

In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM

api_key = os.getenv('HF_TOKEN')
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B",use_auth_token=api_key)
model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B",use_auth_token=api_key)

  from .autonotebook import tqdm as notebook_tqdm
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Downloading shards: 100%|██████████| 4/4 [01:16<00:00, 19.13s/it]
Loading checkpoint shards: 100%|██████████| 4/4 [00:03<00:00,  1.22it/s]


In [3]:
# Use a pipeline as a high-level helper
from transformers import pipeline

text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer)

E0000 00:00:1722701058.137447      13 common_lib.cc:798] Could not set metric server port: INVALID_ARGUMENT: Could not find SliceBuilder port 8471 in any of the 0 ports provided in `tpu_process_addresses`="local"
=== Source Location Trace: ===
learning/45eac/tfrc/runtime/common_lib.cc:479
E0803 16:04:18.169371263      13 oauth2_credentials.cc:238]            oauth_fetch: UNKNOWN:C-ares status is not ARES_SUCCESS qtype=A name=metadata.google.internal. is_balancer=0: Domain name not found {created_time:"2024-08-03T16:04:18.169354113+00:00", grpc_status:2}


In [4]:
import os
import pandas as pd
import re



In [5]:
# Load the dataset from the TSV file
df = pd.read_csv('/kaggle/input/cti-mcq/cti-mcq.tsv', delimiter='\t')
df = df.drop('GT', axis=1)

In [6]:
# Function to prepare the prompt for the model
def prepare_prompt(question, options, prompt):
    options_text = "\n".join([f"Option {chr(65+i)}: {opt}" for i, opt in enumerate(options)])
    formatted_prompt = f"{prompt}\nQuestion: {question}\n{options_text}\nAnswer:"
    print("Formatted Prompt:\n", formatted_prompt)  # Debug print to see the formatted prompt
    return formatted_prompt

# Function to tokenize input
def tokenize_input(formatted_prompt, tokenizer):
    return tokenizer(formatted_prompt, return_tensors='pt')['input_ids']

# Function to generate response from the model
def generate_response(input_ids, model, tokenizer, max_length=512):
    response = model.generate(input_ids, max_length=max_length, pad_token_id=tokenizer.eos_token_id)
    generated_text = tokenizer.decode(response[0], skip_special_tokens=True)
    return generated_text

# Function to extract the answer from generated text
def extract_answer(generated_text):
    print("Generated text:", generated_text)  # Debug print to see the actual generated text
    # Find the last line that contains only a single uppercase letter
    match = re.findall(r'\b[A-D]\b', generated_text)
    return match[-1] if match else "No valid answer found"


# Number of rows to sample
n_samples = 10  # or any number of rows you want to sample

# Randomly sample rows from the DataFrame
sampled_df = df.sample(n=n_samples, random_state=42)
results = []
for _, row in sampled_df.iterrows():
    question = row['Question']
    options = [row['Option A'], row['Option B'], row['Option C'], row['Option D']]
    prompt = row['Prompt']

    # Prepare the prompt
    formatted_prompt = prepare_prompt(question, options, prompt)

    # Tokenize the input
    input_ids = tokenize_input(formatted_prompt, tokenizer)

    # Generate a response
    generated_text = generate_response(input_ids, model, tokenizer)

    # Extract the answer
    predicted_answer = extract_answer(generated_text)

    # Append results
    results.append({
        'Question': question,
        'Formatted Prompt': formatted_prompt,
        'Generated Text': generated_text,
        'Predicted Answer': predicted_answer,
    })

# Create a DataFrame from the results
results_df = pd.DataFrame(results)

# Save the results to a CSV file
results_df.to_csv('predicted_answers.csv', index=False)

print("Predicted answers saved to 'predicted_answers.csv'")


The attention mask is not set and cannot be inferred from input because pad token is same as eos token.As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Formatted Prompt:
 You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  **Question:** Which threat group has utilized JuicyPotato to abuse the SeImpersonate token privilege for privilege escalation as documented in MITRE ATT&CK technique T1134?  **Options:** A) Blue Mockingbird B) C0135 C) APT41 D) BlackCat  **Important:** The last line of your answer should contain only the single letter corresponding to the best option, with no additional text. 
Question: Which threat group has utilized JuicyPotato to abuse the SeImpersonate token privilege for privilege escalation as documented in MITRE ATT&CK technique T1134?
Option A: Blue Mockingbird
Option B: C0135
Option C: APT41
Option D: BlackCat
Answer:
Generated text: You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) 

In [8]:
dff=pd.read_csv('predicted_answers.csv')
dff.head()

Unnamed: 0,Question,Formatted Prompt,Generated Text,Predicted Answer
0,Which threat group has utilized JuicyPotato to...,You are given a multiple-choice question (MCQ)...,You are given a multiple-choice question (MCQ)...,A
1,"During a security assessment, you found that a...",You are given a multiple-choice question (MCQ)...,You are given a multiple-choice question (MCQ)...,D
2,What might be a suitable mitigation technique ...,You are given a multiple-choice question (MCQ)...,You are given a multiple-choice question (MCQ)...,B
3,When considering the potential consequences of...,You are given a multiple-choice question (MCQ)...,You are given a multiple-choice question (MCQ)...,B
4,"Under MITRE ATT&CK ID T1134.002, which malware...",You are given a multiple-choice question (MCQ)...,You are given a multiple-choice question (MCQ)...,A


In [10]:
df2=pd.read_csv('/kaggle/input/cti-mcq/cti-mcq.tsv', delimiter='\t')
df2.tail()

Unnamed: 0,URL,Question,Option A,Option B,Option C,Option D,Prompt,GT
2495,https://cwe.mitre.org/data/definitions/444.html,Which of the following is a recommended mitiga...,Use TLS instead of SSL,Perform a comprehensive security audit,Terminate the client session after each request,Implement rate-limiting,You are given a multiple-choice question (MCQ)...,C
2496,https://cwe.mitre.org/data/definitions/31.html,Which Windows-specific characteristic makes CW...,Prevalence of path traversal vulnerabilities i...,Windows uses a different directory structure c...,Windows has a higher frequency of external inp...,Windows' handling of directory traversal isn't...,You are given a multiple-choice question (MCQ)...,B
2497,http://capec.mitre.org/data/definitions/662.html,What is the primary method an adversary uses t...,Exploiting software bugs in the browser directly,Installing a Trojan on the user's machine,Send phishing emails with malicious links,Bypassing strict security policies without pri...,You are given a multiple-choice question (MCQ)...,B
2498,https://cwe.mitre.org/data/definitions/283.html,The CWE-283 weakness primarily involves what t...,Failure to encrypt data during transmission,Failure to sanitize user input,Failure to verify the ownership of a critical ...,Failure to implement proper logging mechanisms,You are given a multiple-choice question (MCQ)...,C
2499,https://cwe.mitre.org/data/definitions/441.html,"In the context of CWE-441, which feature shoul...",History of all transactions,Data integrity of requests and responses,Identity of the initiator of the transaction,Original request content,You are given a multiple-choice question (MCQ)...,C


In [12]:
import pandas as pd

cti=pd.read_csv('/kaggle/input/cti-mcq/cti-mcq.tsv', delimiter='\t')
answer=pd.read_csv('/kaggle/working/predicted_answers.csv')
cti.head()

Unnamed: 0,URL,Question,Option A,Option B,Option C,Option D,Prompt,GT
0,https://attack.mitre.org/techniques/T1548/,Which of the following mitigations involves pr...,Audit,Execution Prevention,Operating System Configuration,User Account Control,You are given a multiple-choice question (MCQ)...,B
1,https://attack.mitre.org/techniques/T1548/,Which data source is recommended for monitorin...,Command,File,Process,User Account,You are given a multiple-choice question (MCQ)...,D
2,https://attack.mitre.org/techniques/T1548/,What does mitigation ID M1028 suggest to preve...,Limiting privileges of cloud accounts,Preventing unsigned applications from running,Minimizing applications with setuid or setgid ...,Enforcing the highest UAC level,You are given a multiple-choice question (MCQ)...,C
3,https://attack.mitre.org/techniques/T1548/,Which process creation is an indicator of pote...,C:\Windows\System32\services.exe,C:\Windows\System32\cmd.exe,C:\Windows\System32\rundll32.exe,C:\Windows\System32\notepad.exe,You are given a multiple-choice question (MCQ)...,B
4,https://attack.mitre.org/techniques/T1548/,"In a Linux environment, what is recommended to...",Monitor Windows Registry Key Modification,Monitor OS API Execution,Monitor file metadata for setuid or setgid bit...,Audit process metadata changes,You are given a multiple-choice question (MCQ)...,C


In [13]:
merged_df = pd.merge(cti, answer, on='Question', suffixes=('_gt', '_model'))
merged_df.head()

Unnamed: 0,URL,Question,Option A,Option B,Option C,Option D,Prompt,GT,Formatted Prompt,Generated Text,Predicted Answer
0,https://attack.mitre.org/techniques/T1068/,What group has leveraged CVE-2021-36934 for pr...,APT32,APT29,PLATINUM,FIN6,You are given a multiple-choice question (MCQ)...,B,You are given a multiple-choice question (MCQ)...,You are given a multiple-choice question (MCQ)...,B
1,https://attack.mitre.org/techniques/T1136/001/,What mitigation is recommended by MITRE ATT&CK...,Enable Secure Boot,Use Anti-virus Software,Enable Multi-factor Authentication,Whitelist Applications,You are given a multiple-choice question (MCQ)...,C,You are given a multiple-choice question (MCQ)...,You are given a multiple-choice question (MCQ)...,C
2,https://attack.mitre.org/techniques/T1645/,What might be a suitable mitigation technique ...,Attestation,Lock Bootloader,Security Updates,System Partition Integrity,You are given a multiple-choice question (MCQ)...,A,You are given a multiple-choice question (MCQ)...,You are given a multiple-choice question (MCQ)...,B
3,https://attack.mitre.org/techniques/T1517/,"During a security assessment, you found that a...",Application Logs - Audit Logs,Process Monitoring - Network Traffic,User Interface - System Settings,Application Vetting - Permissions Requests,You are given a multiple-choice question (MCQ)...,D,You are given a multiple-choice question (MCQ)...,You are given a multiple-choice question (MCQ)...,D
4,https://attack.mitre.org/techniques/T1134/,Which threat group has utilized JuicyPotato to...,Blue Mockingbird,C0135,APT41,BlackCat,You are given a multiple-choice question (MCQ)...,A,You are given a multiple-choice question (MCQ)...,You are given a multiple-choice question (MCQ)...,A


In [15]:
def calculate_accuracy(row):
    gt_answer = row['GT']
    model_answer = row['Predicted Answer']

    return int(gt_answer.strip().lower() == model_answer.strip().lower())

In [16]:
# Apply the accuracy calculation function
merged_df['accuracy'] = merged_df.apply(calculate_accuracy, axis=1)

# Calculate overall accuracy
overall_accuracy = merged_df['accuracy'].mean()

print(f'Overall Accuracy: {overall_accuracy * 100:.2f}%')

# Save the merged dataset with accuracy to a new CSV file
merged_df.to_csv('merged_with_accuracy.csv', index=False)

Overall Accuracy: 60.00%
