In [1]:
import torch
import os

# Set the API key
os.environ['HF_TOKEN'] = ''

In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM

api_key = os.getenv('HF_TOKEN')
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B",use_auth_token=api_key)
model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B",use_auth_token=api_key)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


config.json:   0%|          | 0.00/654 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/177 [00:00<?, ?B/s]

In [3]:
# Use a pipeline as a high-level helper
from transformers import pipeline

text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer)


Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


In [5]:
import os
import pandas as pd
import re


df = pd.read_csv('/content/output_questions (2).tsv',sep='\t')
df = df.drop(['Correct Answer','Explanation'], axis=1)



# Function to prepare the prompt for the model
def prepare_prompt(question, options, prompt):
    options_text = "\n".join([f"Option {chr(65+i)}: {opt}" for i, opt in enumerate(options)])
    formatted_prompt = f"{prompt}\nQuestion: {question}\n{options_text}\nAnswer:"
    print("Formatted Prompt:\n", formatted_prompt)  # Debug print to see the formatted prompt
    return formatted_prompt

# Function to tokenize input
def tokenize_input(formatted_prompt, tokenizer):
    return tokenizer(formatted_prompt, return_tensors='pt')['input_ids']

# Function to generate response from the model
def generate_response(input_ids, model, tokenizer, max_length=512):
    response = model.generate(input_ids, max_length=max_length, pad_token_id=tokenizer.eos_token_id)
    generated_text = tokenizer.decode(response[0], skip_special_tokens=True)
    return generated_text

# Function to extract the answer from generated text
def extract_answer(generated_text):
    print("Generated text:", generated_text)  # Debug print to see the actual generated text
    # Find the last line that contains only a single uppercase letter
    match = re.findall(r'\b[A-D]\b', generated_text)
    return match[-1] if match else "No valid answer found"


# Number of rows to sample
n_samples = 1  # or any number of rows you want to sample

# Randomly sample rows from the DataFrame
sampled_df = df.sample(n=n_samples, random_state=42)
results = []
for _, row in sampled_df.iterrows():
    question = row['Question']
    options = [row['Option A'], row['Option B'], row['Option C'], row['Option D']]
    options_str = "\n".join([f"{chr(65 + i)}. {opt}" for i, opt in enumerate(options)])
    prompt = (
        "You are a cybersecurity expert specializing in cyber threat intelligence. "
        "You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. "
        "Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.\n\n"
        f"Question: {question}\n"
        f"Options:\n{options_str}"
    )


    # Prepare the prompt
    formatted_prompt = prepare_prompt(question, options, prompt)

    # Tokenize the input
    input_ids = tokenize_input(formatted_prompt, tokenizer)

    # Generate a response
    generated_text = generate_response(input_ids, model, tokenizer)

    # Extract the answer
    predicted_answer = extract_answer(generated_text)

    # Append results
    results.append({
        'Question': question,
        'Formatted Prompt': formatted_prompt,
        'Generated Text': generated_text,
        'llm38b': predicted_answer,
    })

# Create a DataFrame from the results
results_df = pd.DataFrame(results)

# Save the results to a CSV file
results_df.to_csv('llm38b.csv', index=False)

print("Predicted answers saved to 'llm38b.csv'")


Formatted Prompt:
 You are a cybersecurity expert specializing in cyber threat intelligence. You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.

Question: Which MITRE ATT&CK Enterprise technique involves an adversary using an existing, legitimate remote access application to maintain persistent access to a victim network?
Options:
A. T1190 - Exploit Public-Facing Application
B. T1071 - Application Layer Protocol
C. T1547.012 - Boot or Logon Autostart Execution
D. T1076 - Remote Desktop Protocol (RDP)
Question: Which MITRE ATT&CK Enterprise technique involves an adversary using an existing, legitimate remote access application to maintain persistent access to a victim network?
Option A: T1190 - Exploit Public-Facing Application
Option B: T1071 - Application Layer Protocol
Option C: T1547.012 - Boot

# New Section

In [None]:
import pandas as pd

cti=pd.read_csv('/content/cti-mcq.tsv', delimiter='\t')
answer=pd.read_csv('/content/llm38b.csv')
cti.head()


In [None]:
merged_df = pd.merge(cti, answer, on='Question', suffixes=('_gt', '_model'))
merged_df.head()




In [None]:
def calculate_accuracy(row):
    gt_answer = row['GT']
    model_answer = row['Predicted Answer']

    return int(gt_answer.strip().lower() == model_answer.strip().lower())

In [None]:
# Apply the accuracy calculation function
merged_df['accuracy'] = merged_df.apply(calculate_accuracy, axis=1)

# Calculate overall accuracy
overall_accuracy = merged_df['accuracy'].mean()

print(f'Overall Accuracy: {overall_accuracy * 100:.2f}%')

# Save the merged dataset with accuracy to a new CSV file
merged_df.to_csv('merged_with_accuracy.csv', index=False)

In [None]:
import os
import pandas as pd
import re


# Load the dataset from the TSV file
df = pd.read_csv('/content/cti-mcq.tsv', delimiter='\t')
df = df.drop('GT', axis=1)


# Function to prepare the prompt for the model
def prepare_prompt(question, options, prompt):
    options_text = "\n".join([f"Option {chr(65+i)}: {opt}" for i, opt in enumerate(options)])
    formatted_prompt = f"{prompt}\nQuestion: {question}\n{options_text}\nAnswer:"
    print("Formatted Prompt:\n", formatted_prompt)  # Debug print to see the formatted prompt
    return formatted_prompt

# Function to tokenize input
def tokenize_input(formatted_prompt, tokenizer):
    return tokenizer(formatted_prompt, return_tensors='pt')['input_ids']

# Function to generate response from the model
def generate_response(input_ids, model, tokenizer, max_length=512):
    response = model.generate(input_ids, max_length=max_length, pad_token_id=tokenizer.eos_token_id)
    generated_text = tokenizer.decode(response[0], skip_special_tokens=True)
    return generated_text

# Function to extract the answer from generated text
def extract_answer(generated_text):
    print("Generated text:", generated_text)  # Debug print to see the actual generated text
    # Find the last line that contains only a single uppercase letter
    match = re.findall(r'\b[A-D]\b', generated_text)
    return match[-1] if match else "No valid answer found"


# Number of rows to sample
n_samples = 250  # or any number of rows you want to sample

# Randomly sample rows from the DataFrame
sampled_df = df.sample(n=n_samples, random_state=42)
results = []
for _, row in sampled_df.iterrows():
    question = row['Question']
    options = [row['Option A'], row['Option B'], row['Option C'], row['Option D']]
    prompt = row['Prompt']

    # Prepare the prompt
    formatted_prompt = prepare_prompt(question, options, prompt)

    # Tokenize the input
    input_ids = tokenize_input(formatted_prompt, tokenizer)

    # Generate a response
    generated_text = generate_response(input_ids, model, tokenizer)

    # Extract the answer
    predicted_answer = extract_answer(generated_text)

    # Append results
    results.append({
        'Question': question,
        'Formatted Prompt': formatted_prompt,
        'Generated Text': generated_text,
        'Predicted Answer': predicted_answer,
    })

# Create a DataFrame from the results
results_df = pd.DataFrame(results)

# Save the results to a CSV file
results_df.to_csv('white.csv', index=False)

print("Predicted answers saved to 'white.csv'")


In [None]:
import torch

# Check if GPU is available
if torch.cuda.is_available():
  device = torch.device("cuda")
  print("Using GPU:", torch.cuda.get_device_name(0))
else:
  device = torch.device("cpu")
  print("Using CPU")


In [None]:
white=pd.read_csv('/content/white.csv')
ans=pd.read_csv('/content/cti-mcq.tsv',sep="\t")


In [None]:
merged = pd.merge(white, ans, on='Question', suffixes=('_gt', '_model'))
print(merged.head())



In [None]:
def compute_mcq_accuracy(df):
    correct = 0
    total = 0
    for idx, row in df.iterrows():
        pred = row['llm38b']
        gt = row['GT']
        if pred in ['A', 'B', 'C', 'D', 'X']:
            total += 1
        else:
            print('Invalid response at row {}'.format(idx+1))
        if pred == gt:
            correct += 1
    return correct/total*100

In [None]:
print('Accuracy:', compute_mcq_accuracy(merged_df))