In [1]:
import pandas as pd
import ast
# Step 4: Define functions for transformations
def safe_parse_labels(labels):
    """Safely parse labels from a string or return them as-is if already a list."""
    if isinstance(labels, str):  # If it's a string, evaluate it
        try:
            return ast.literal_eval(labels)
        except (ValueError, SyntaxError):
            return []
    elif isinstance(labels, list):  # If it's already a list, return as is
        return labels
    return []

def remap_techniques(labels, updates):
    """Remap techniques to updated equivalents using manual_updates."""
    return [updates.get(tech, tech) for tech in labels]

def convert_and_deduplicate(labels):
    """Convert sub-techniques to parent techniques and remove duplicates."""
    return list(set([tech.split('.')[0] if '.' in tech else tech for tech in labels]))

def map_techniques_to_tactics(labels, mapping):
    """Map techniques to their corresponding tactics."""
    mapped_tactics = []
    for tech in labels:
        mapped_tactics.extend(mapping.get(tech, []))
    return list(set(mapped_tactics))  # Remove duplicates

In [2]:
import pandas as pd
import ast
import os

def map_to_tactics(file_path, predicted_header):
    # Load the original dataset
    original_df = pd.read_csv(file_path)

    # Load the updated MITRE ATT&CK Enterprise data
    updated_mitre_data_path = 'enterprise-attack-v16.1 (1).xlsx'
    updated_mitre_data = pd.read_excel(updated_mitre_data_path, sheet_name=None)

    # Extract necessary sheets
    updated_techniques_df = updated_mitre_data['techniques']
    tactics_df = updated_mitre_data['tactics']

    # Step 1: Create a mapping from techniques to tactics
    technique_to_tactics_mapping = {}
    for index, row in updated_techniques_df.iterrows():
        technique_id = row['ID']
        if pd.notnull(row['tactics']):
            tactics = [tactic.strip() for tactic in row['tactics'].split(',')]
            tactic_ids = [tactics_df[tactics_df['name'] == tactic]['ID'].values[0] for tactic in tactics]
            technique_to_tactics_mapping[technique_id] = tactic_ids

    # Step 2: Define user-provided manual remappings for deprecated techniques
    manual_updates = {
        'T1192': 'T1566.002',
        'T1139': 'T1552.003',
        'T1094': 'T1095',
        'T1081': 'T1552.001',
        'T1154': 'T1546.005',
        'T1166': 'T1548.001',
        'T1196': 'T1218.002',
        'T1155': 'T1059.002',
        'T1050': 'T1543.003',
        'T1077': 'T1021.002',
        'T1170': 'T1218.005',
        'T1100': 'T1505.003',
    }

    # Step 3: Define deprecated techniques
    deprecated_techniques = ['T1043']

    # Step 5: Apply transformations
    # Parse columns to ensure consistent format
    original_df['True_labels'] = original_df['True_labels'].apply(safe_parse_labels)
    original_df['Predicted_labels'] = original_df[predicted_header].apply(safe_parse_labels)

    # Remove deprecated techniques from True_labels
    original_df['True_labels'] = original_df['True_labels'].apply(
        lambda x: [tech for tech in x if tech not in deprecated_techniques]
    )
    original_df = original_df[original_df['True_labels'].apply(len) > 0]  # Remove rows with empty True_labels

    # Remap deprecated techniques to updated equivalents
    original_df['True_labels'] = original_df['True_labels'].apply(lambda x: remap_techniques(x, manual_updates))
    original_df['Predicted_labels'] = original_df['Predicted_labels'].apply(lambda x: remap_techniques(x, manual_updates))

    # Convert to parent techniques and remove duplicates
    original_df['True_labels'] = original_df['True_labels'].apply(convert_and_deduplicate)
    original_df['Predicted_labels'] = original_df['Predicted_labels'].apply(convert_and_deduplicate)

    # Map techniques to tactics
    original_df['True_labels'] = original_df['True_labels'].apply(lambda x: map_techniques_to_tactics(x, technique_to_tactics_mapping))
    original_df['Predicted_labels'] = original_df['Predicted_labels'].apply(lambda x: map_techniques_to_tactics(x, technique_to_tactics_mapping))

    # Step 6: Save the final processed dataset
    deduplicated_parent_techniques_output_path = os.path.join(os.path.dirname(file_path), 'tactics_' + str(os.path.basename(file_path)))
    original_df.to_csv(deduplicated_parent_techniques_output_path, index=False)

    print(f"Final dataset with deduplicated parent techniques saved to: {deduplicated_parent_techniques_output_path}")


In [5]:
map_to_tactics('GPT/prompting_without_techniques_guide_zero_shot_with_limit.csv', 'Technique_id')
map_to_tactics('GPT/prompting_without_techniques_guide_one_shot_with_limit.csv', 'Technique_id')
map_to_tactics('GPT/prompting_without_techniques_guide_two_shot_with_limit.csv', 'Technique_id')
map_to_tactics('GPT/prompting_with_techniques_guide_zero_shot_False.csv', 'Without_Prompt_Limit_With_Competition_With_Limit_Return')
map_to_tactics('GPT/prompting_with_techniques_guide_one_shot_False.csv', 'Without_Prompt_Limit_With_Competition_With_Limit_Return')
map_to_tactics('GPT/prompting_with_techniques_guide_two_shot_False.csv', 'Without_Prompt_Limit_With_Competition_With_Limit_Return')

Final dataset with deduplicated parent techniques saved to: GPT\tactics_prompting_without_techniques_guide_zero_shot_with_limit.csv
Final dataset with deduplicated parent techniques saved to: GPT\tactics_prompting_without_techniques_guide_one_shot_with_limit.csv
Final dataset with deduplicated parent techniques saved to: GPT\tactics_prompting_without_techniques_guide_two_shot_with_limit.csv
Final dataset with deduplicated parent techniques saved to: GPT\tactics_prompting_with_techniques_guide_zero_shot_False.csv
Final dataset with deduplicated parent techniques saved to: GPT\tactics_prompting_with_techniques_guide_one_shot_False.csv
Final dataset with deduplicated parent techniques saved to: GPT\tactics_prompting_with_techniques_guide_two_shot_False.csv


In [8]:
map_to_tactics('Gemini/prompting_without_techniques_guide_zero_shot_with_limit.csv', 'Technique_id')
map_to_tactics('Gemini/prompting_without_techniques_guide_one_shot_with_limit.csv', 'Technique_id')
map_to_tactics('Gemini/prompting_without_techniques_guide_two_shot_with_limit.csv', 'Technique_id')
map_to_tactics('Gemini/prompting_with_techniques_guide_zero_shot_False.csv', 'Without_Prompt_Limit_With_Competition_With_Limit_Return')
map_to_tactics('Gemini/prompting_with_techniques_guide_one_shot_False.csv', 'Without_Prompt_Limit_With_Competition_With_Limit_Return')
map_to_tactics('Gemini/prompting_with_techniques_guide_two_shot_False.csv', 'Without_Prompt_Limit_With_Competition_With_Limit_Return')

Final dataset with deduplicated parent techniques saved to: Gemini\tactics_prompting_without_techniques_guide_zero_shot_with_limit.csv
Final dataset with deduplicated parent techniques saved to: Gemini\tactics_prompting_without_techniques_guide_one_shot_with_limit.csv
Final dataset with deduplicated parent techniques saved to: Gemini\tactics_prompting_without_techniques_guide_two_shot_with_limit.csv
Final dataset with deduplicated parent techniques saved to: Gemini\tactics_prompting_with_techniques_guide_zero_shot_False.csv
Final dataset with deduplicated parent techniques saved to: Gemini\tactics_prompting_with_techniques_guide_one_shot_False.csv
Final dataset with deduplicated parent techniques saved to: Gemini\tactics_prompting_with_techniques_guide_two_shot_False.csv


In [9]:
map_to_tactics('Claude/prompting_without_techniques_guide_zero_shot_with_limit.csv', 'Technique_id')
map_to_tactics('Claude/prompting_without_techniques_guide_one_shot_with_limit.csv', 'Technique_id')
map_to_tactics('Claude/prompting_without_techniques_guide_two_shot_with_limit.csv', 'Technique_id')
map_to_tactics('Claude/prompting_with_techniques_guide_zero_shot_False.csv', 'Without_Prompt_Limit_With_Competition_With_Limit_Return')
map_to_tactics('Claude/prompting_with_techniques_guide_one_shot_False.csv', 'Without_Prompt_Limit_With_Competition_With_Limit_Return')
map_to_tactics('Claude/prompting_with_techniques_guide_two_shot_False.csv', 'Without_Prompt_Limit_With_Competition_With_Limit_Return')

Final dataset with deduplicated parent techniques saved to: Claude\tactics_prompting_without_techniques_guide_zero_shot_with_limit.csv
Final dataset with deduplicated parent techniques saved to: Claude\tactics_prompting_without_techniques_guide_one_shot_with_limit.csv
Final dataset with deduplicated parent techniques saved to: Claude\tactics_prompting_without_techniques_guide_two_shot_with_limit.csv
Final dataset with deduplicated parent techniques saved to: Claude\tactics_prompting_with_techniques_guide_zero_shot_False.csv
Final dataset with deduplicated parent techniques saved to: Claude\tactics_prompting_with_techniques_guide_one_shot_False.csv
Final dataset with deduplicated parent techniques saved to: Claude\tactics_prompting_with_techniques_guide_two_shot_False.csv


In [10]:
map_to_tactics('ML/ChatGPT_test_ground_truth_and_predictions.csv', 'Predicted_labels')
map_to_tactics('ML/Claude_test_ground_truth_and_predictions.csv', 'Predicted_labels')
map_to_tactics('ML/Gemini_test_ground_truth_and_predictions.csv', 'Predicted_labels')

Final dataset with deduplicated parent techniques saved to: ML\tactics_ChatGPT_test_ground_truth_and_predictions.csv
Final dataset with deduplicated parent techniques saved to: ML\tactics_Claude_test_ground_truth_and_predictions.csv
Final dataset with deduplicated parent techniques saved to: ML\tactics_Gemini_test_ground_truth_and_predictions.csv


In [11]:
import ast
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.metrics import confusion_matrix
import math

def evaluation(true_labels, predicted_labels):
  from sklearn.metrics import precision_score, recall_score, f1_score
  import ast

  # Convert string representations of lists into actual Python lists
  predicted_labels = predicted_labels.apply(ast.literal_eval)
  true_labels = true_labels.apply(ast.literal_eval)

  # Create a flattened list of unique labels for multi-label metrics
  unique_labels = set(label for sublist in true_labels for label in sublist).union(
      set(label for sublist in true_labels for label in sublist)
  )
  unique_labels = sorted(unique_labels)

  # Binarize the labels for multi-label metric calculation
  def binarize_labels(label_list, classes):
      return [1 if label in label_list else 0 for label in classes]

  Binarized_Predicted = predicted_labels.apply(lambda x: binarize_labels(x, unique_labels))
  Binarized_True = true_labels.apply(lambda x: binarize_labels(x, unique_labels))

  # Stack the binary arrays for multi-label metric computation
  y_pred = Binarized_Predicted.tolist()
  y_true = Binarized_True.tolist()

  # Calculate precision, recall, and F1-score
  avg_precision = precision_score(y_true, y_pred, average='micro')
  avg_recall = recall_score(y_true, y_pred, average='micro')
  avg_f1 = f1_score(y_true, y_pred, average='micro')

  avg_precision, avg_recall, avg_f1
  print("Metric    |   Score")
  print("-------------------")
  print(f"Precision |   {avg_precision:.2f}")
  print(f"Recall    |   {avg_recall:.2f}")
  print(f"F1 Score  |   {avg_f1:.2f}")


In [12]:
def evaluate_tactics(file_path):
    loadData = pd.read_csv(file_path)
    true_labels_ZS = loadData['True_labels']
    predicted_labels = loadData['Predicted_labels']
    print(f"Evaluating {file_path}")
    evaluation(true_labels_ZS, predicted_labels)


In [13]:
evaluate_tactics('GPT/tactics_prompting_without_techniques_guide_zero_shot_with_limit.csv')
evaluate_tactics('GPT/tactics_prompting_without_techniques_guide_one_shot_with_limit.csv')
evaluate_tactics('GPT/tactics_prompting_without_techniques_guide_two_shot_with_limit.csv')
evaluate_tactics('GPT/tactics_prompting_with_techniques_guide_zero_shot_False.csv')
evaluate_tactics('GPT/tactics_prompting_with_techniques_guide_one_shot_False.csv')
evaluate_tactics('GPT/tactics_prompting_with_techniques_guide_two_shot_False.csv')

Evaluating GPT/tactics_prompting_without_techniques_guide_zero_shot_with_limit.csv
Metric    |   Score
-------------------
Precision |   0.50
Recall    |   0.45
F1 Score  |   0.47
Evaluating GPT/tactics_prompting_without_techniques_guide_one_shot_with_limit.csv
Metric    |   Score
-------------------
Precision |   0.49
Recall    |   0.47
F1 Score  |   0.48
Evaluating GPT/tactics_prompting_without_techniques_guide_two_shot_with_limit.csv
Metric    |   Score
-------------------
Precision |   0.44
Recall    |   0.50
F1 Score  |   0.47
Evaluating GPT/tactics_prompting_with_techniques_guide_zero_shot_False.csv
Metric    |   Score
-------------------
Precision |   0.62
Recall    |   0.69
F1 Score  |   0.65
Evaluating GPT/tactics_prompting_with_techniques_guide_one_shot_False.csv
Metric    |   Score
-------------------
Precision |   0.62
Recall    |   0.69
F1 Score  |   0.65
Evaluating GPT/tactics_prompting_with_techniques_guide_two_shot_False.csv
Metric    |   Score
-------------------
Preci

In [15]:
evaluate_tactics('Gemini/tactics_prompting_without_techniques_guide_zero_shot_with_limit.csv')
evaluate_tactics('Gemini/tactics_prompting_without_techniques_guide_one_shot_with_limit.csv')
evaluate_tactics('Gemini/tactics_prompting_without_techniques_guide_two_shot_with_limit.csv')
evaluate_tactics('Gemini/tactics_prompting_with_techniques_guide_zero_shot_False.csv')
evaluate_tactics('Gemini/tactics_prompting_with_techniques_guide_one_shot_False.csv')
evaluate_tactics('Gemini/tactics_prompting_with_techniques_guide_two_shot_False.csv')

Evaluating Gemini/tactics_prompting_without_techniques_guide_zero_shot_with_limit.csv
Metric    |   Score
-------------------
Precision |   0.24
Recall    |   0.26
F1 Score  |   0.25
Evaluating Gemini/tactics_prompting_without_techniques_guide_one_shot_with_limit.csv
Metric    |   Score
-------------------
Precision |   0.34
Recall    |   0.39
F1 Score  |   0.36
Evaluating Gemini/tactics_prompting_without_techniques_guide_two_shot_with_limit.csv
Metric    |   Score
-------------------
Precision |   0.29
Recall    |   0.28
F1 Score  |   0.29
Evaluating Gemini/tactics_prompting_with_techniques_guide_zero_shot_False.csv
Metric    |   Score
-------------------
Precision |   0.38
Recall    |   0.45
F1 Score  |   0.41
Evaluating Gemini/tactics_prompting_with_techniques_guide_one_shot_False.csv
Metric    |   Score
-------------------
Precision |   0.50
Recall    |   0.66
F1 Score  |   0.57
Evaluating Gemini/tactics_prompting_with_techniques_guide_two_shot_False.csv
Metric    |   Score
-------

In [16]:
evaluate_tactics('Claude/tactics_prompting_without_techniques_guide_zero_shot_with_limit.csv')
evaluate_tactics('Claude/tactics_prompting_without_techniques_guide_one_shot_with_limit.csv')
evaluate_tactics('Claude/tactics_prompting_without_techniques_guide_two_shot_with_limit.csv')
evaluate_tactics('Claude/tactics_prompting_with_techniques_guide_zero_shot_False.csv')
evaluate_tactics('Claude/tactics_prompting_with_techniques_guide_one_shot_False.csv')
evaluate_tactics('Claude/tactics_prompting_with_techniques_guide_two_shot_False.csv')

Evaluating Claude/tactics_prompting_without_techniques_guide_zero_shot_with_limit.csv
Metric    |   Score
-------------------
Precision |   0.39
Recall    |   0.57
F1 Score  |   0.46
Evaluating Claude/tactics_prompting_without_techniques_guide_one_shot_with_limit.csv
Metric    |   Score
-------------------
Precision |   0.39
Recall    |   0.60
F1 Score  |   0.47
Evaluating Claude/tactics_prompting_without_techniques_guide_two_shot_with_limit.csv
Metric    |   Score
-------------------
Precision |   0.42
Recall    |   0.64
F1 Score  |   0.51
Evaluating Claude/tactics_prompting_with_techniques_guide_zero_shot_False.csv
Metric    |   Score
-------------------
Precision |   0.57
Recall    |   0.79
F1 Score  |   0.66
Evaluating Claude/tactics_prompting_with_techniques_guide_one_shot_False.csv
Metric    |   Score
-------------------
Precision |   0.59
Recall    |   0.80
F1 Score  |   0.68
Evaluating Claude/tactics_prompting_with_techniques_guide_two_shot_False.csv
Metric    |   Score
-------

In [17]:
evaluate_tactics('ML/tactics_ChatGPT_test_ground_truth_and_predictions.csv')
evaluate_tactics('ML/tactics_Claude_test_ground_truth_and_predictions.csv')
evaluate_tactics('ML/tactics_Gemini_test_ground_truth_and_predictions.csv')

Evaluating ML/tactics_ChatGPT_test_ground_truth_and_predictions.csv
Metric    |   Score
-------------------
Precision |   0.96
Recall    |   0.77
F1 Score  |   0.85
Evaluating ML/tactics_Claude_test_ground_truth_and_predictions.csv
Metric    |   Score
-------------------
Precision |   0.91
Recall    |   0.92
F1 Score  |   0.91
Evaluating ML/tactics_Gemini_test_ground_truth_and_predictions.csv
Metric    |   Score
-------------------
Precision |   0.91
Recall    |   0.92
F1 Score  |   0.92


# Evaluate Techniques

In [18]:
import pandas as pd
import ast
# Step 4: Define functions for transformations
def safe_parse_labels(labels):
    """Safely parse labels from a string or return them as-is if already a list."""
    if isinstance(labels, str):  # If it's a string, evaluate it
        try:
            return ast.literal_eval(labels)
        except (ValueError, SyntaxError):
            return []
    elif isinstance(labels, list):  # If it's already a list, return as is
        return labels
    return []

def remap_techniques(labels, updates):
    """Remap techniques to updated equivalents using manual_updates."""
    return [updates.get(tech, tech) for tech in labels]

def convert_and_deduplicate(labels):
    """Convert sub-techniques to parent techniques and remove duplicates."""
    return list(set([tech.split('.')[0] if '.' in tech else tech for tech in labels]))

In [19]:
import pandas as pd
import ast
import os

def map_to_techniques(file_path, predicted_header):
    # Load the original dataset
    original_df = pd.read_csv(file_path)

    # Load the updated MITRE ATT&CK Enterprise data
    updated_mitre_data_path = 'enterprise-attack-v16.1 (1).xlsx'
    updated_mitre_data = pd.read_excel(updated_mitre_data_path, sheet_name=None)

    # Extract necessary sheets
    updated_techniques_df = updated_mitre_data['techniques']
    tactics_df = updated_mitre_data['tactics']

    # Step 1: Create a mapping from techniques to tactics
    technique_to_tactics_mapping = {}
    for index, row in updated_techniques_df.iterrows():
        technique_id = row['ID']
        if pd.notnull(row['tactics']):
            tactics = [tactic.strip() for tactic in row['tactics'].split(',')]
            tactic_ids = [tactics_df[tactics_df['name'] == tactic]['ID'].values[0] for tactic in tactics]
            technique_to_tactics_mapping[technique_id] = tactic_ids

    # Step 2: Define user-provided manual remappings for deprecated techniques
    manual_updates = {
        'T1192': 'T1566.002',
        'T1139': 'T1552.003',
        'T1094': 'T1095',
        'T1081': 'T1552.001',
        'T1154': 'T1546.005',
        'T1166': 'T1548.001',
        'T1196': 'T1218.002',
        'T1155': 'T1059.002',
        'T1050': 'T1543.003',
        'T1077': 'T1021.002',
        'T1170': 'T1218.005',
        'T1100': 'T1505.003',
    }

    # Step 3: Define deprecated techniques
    deprecated_techniques = ['T1043']

    # Step 5: Apply transformations
    # Parse columns to ensure consistent format
    original_df['True_labels'] = original_df['True_labels'].apply(safe_parse_labels)
    original_df['Predicted_labels'] = original_df[predicted_header].apply(safe_parse_labels)

    # Remove deprecated techniques from True_labels
    original_df['True_labels'] = original_df['True_labels'].apply(
        lambda x: [tech for tech in x if tech not in deprecated_techniques]
    )
    original_df = original_df[original_df['True_labels'].apply(len) > 0]  # Remove rows with empty True_labels

    # Remap deprecated techniques to updated equivalents
    original_df['True_labels'] = original_df['True_labels'].apply(lambda x: remap_techniques(x, manual_updates))
    original_df['Predicted_labels'] = original_df['Predicted_labels'].apply(lambda x: remap_techniques(x, manual_updates))

    # Convert to parent techniques and remove duplicates
    original_df['True_labels'] = original_df['True_labels'].apply(convert_and_deduplicate)
    original_df['Predicted_labels'] = original_df['Predicted_labels'].apply(convert_and_deduplicate)

    # Step 6: Save the final processed dataset
    deduplicated_parent_techniques_output_path = os.path.join(os.path.dirname(file_path), 'tactics_' + str(os.path.basename(file_path)))
    original_df.to_csv(deduplicated_parent_techniques_output_path, index=False)

    print(f"Final dataset with deduplicated parent techniques saved to: {deduplicated_parent_techniques_output_path}")


In [20]:
def evaluate_techniques(file_path, predicted_header):
    loadData = pd.read_csv(file_path)
    true_labels_ZS = loadData['True_labels']
    predicted_labels = loadData[predicted_header]
    print(f"Evaluating {file_path}")
    evaluation(true_labels_ZS, predicted_labels)


In [21]:
evaluate_techniques('Gemini/prompting_without_techniques_guide_zero_shot_with_limit.csv', 'Technique_id')
evaluate_techniques('Gemini/prompting_without_techniques_guide_one_shot_with_limit.csv', 'Technique_id')
evaluate_techniques('Gemini/prompting_without_techniques_guide_two_shot_with_limit.csv', 'Technique_id')
evaluate_techniques('Gemini/prompting_with_techniques_guide_zero_shot_False.csv', 'Without_Prompt_Limit_With_Competition_With_Limit_Return')
evaluate_techniques('Gemini/prompting_with_techniques_guide_one_shot_False.csv', 'Without_Prompt_Limit_With_Competition_With_Limit_Return')
evaluate_techniques('Gemini/prompting_with_techniques_guide_two_shot_False.csv', 'Without_Prompt_Limit_With_Competition_With_Limit_Return')

Evaluating Gemini/prompting_without_techniques_guide_zero_shot_with_limit.csv
Metric    |   Score
-------------------
Precision |   0.20
Recall    |   0.14
F1 Score  |   0.16
Evaluating Gemini/prompting_without_techniques_guide_one_shot_with_limit.csv
Metric    |   Score
-------------------
Precision |   0.34
Recall    |   0.24
F1 Score  |   0.28
Evaluating Gemini/prompting_without_techniques_guide_two_shot_with_limit.csv
Metric    |   Score
-------------------
Precision |   0.30
Recall    |   0.18
F1 Score  |   0.23
Evaluating Gemini/prompting_with_techniques_guide_zero_shot_False.csv
Metric    |   Score
-------------------
Precision |   0.41
Recall    |   0.28
F1 Score  |   0.33
Evaluating Gemini/prompting_with_techniques_guide_one_shot_False.csv
Metric    |   Score
-------------------
Precision |   0.60
Recall    |   0.48
F1 Score  |   0.53
Evaluating Gemini/prompting_with_techniques_guide_two_shot_False.csv
Metric    |   Score
-------------------
Precision |   0.55
Recall    |   0.

In [22]:
evaluate_techniques('Claude/prompting_without_techniques_guide_zero_shot_with_limit.csv', 'Technique_id')
evaluate_techniques('Claude/prompting_without_techniques_guide_one_shot_with_limit.csv', 'Technique_id')
evaluate_techniques('Claude/prompting_without_techniques_guide_two_shot_with_limit.csv', 'Technique_id')
evaluate_techniques('Claude/prompting_with_techniques_guide_zero_shot_False.csv', 'Without_Prompt_Limit_With_Competition_With_Limit_Return')
evaluate_techniques('Claude/prompting_with_techniques_guide_one_shot_False.csv', 'Without_Prompt_Limit_With_Competition_With_Limit_Return')
evaluate_techniques('Claude/prompting_with_techniques_guide_two_shot_False.csv', 'Without_Prompt_Limit_With_Competition_With_Limit_Return')

Evaluating Claude/prompting_without_techniques_guide_zero_shot_with_limit.csv
Metric    |   Score
-------------------
Precision |   0.29
Recall    |   0.40
F1 Score  |   0.34
Evaluating Claude/prompting_without_techniques_guide_one_shot_with_limit.csv
Metric    |   Score
-------------------
Precision |   0.28
Recall    |   0.41
F1 Score  |   0.33
Evaluating Claude/prompting_without_techniques_guide_two_shot_with_limit.csv
Metric    |   Score
-------------------
Precision |   0.31
Recall    |   0.42
F1 Score  |   0.36
Evaluating Claude/prompting_with_techniques_guide_zero_shot_False.csv
Metric    |   Score
-------------------
Precision |   0.51
Recall    |   0.59
F1 Score  |   0.55
Evaluating Claude/prompting_with_techniques_guide_one_shot_False.csv
Metric    |   Score
-------------------
Precision |   0.57
Recall    |   0.60
F1 Score  |   0.59
Evaluating Claude/prompting_with_techniques_guide_two_shot_False.csv
Metric    |   Score
-------------------
Precision |   0.63
Recall    |   0.

In [23]:
evaluate_techniques('GPT/prompting_without_techniques_guide_zero_shot_with_limit.csv', 'Technique_id')
evaluate_techniques('GPT/prompting_without_techniques_guide_one_shot_with_limit.csv', 'Technique_id')
evaluate_techniques('GPT/prompting_without_techniques_guide_two_shot_with_limit.csv', 'Technique_id')
evaluate_techniques('GPT/prompting_with_techniques_guide_zero_shot_False.csv', 'Without_Prompt_Limit_With_Competition_With_Limit_Return')
evaluate_techniques('GPT/prompting_with_techniques_guide_one_shot_False.csv', 'Without_Prompt_Limit_With_Competition_With_Limit_Return')
evaluate_techniques('GPT/prompting_with_techniques_guide_two_shot_False.csv', 'Without_Prompt_Limit_With_Competition_With_Limit_Return')

Evaluating GPT/prompting_without_techniques_guide_zero_shot_with_limit.csv
Metric    |   Score
-------------------
Precision |   0.45
Recall    |   0.27
F1 Score  |   0.34
Evaluating GPT/prompting_without_techniques_guide_one_shot_with_limit.csv
Metric    |   Score
-------------------
Precision |   0.46
Recall    |   0.29
F1 Score  |   0.36
Evaluating GPT/prompting_without_techniques_guide_two_shot_with_limit.csv
Metric    |   Score
-------------------
Precision |   0.35
Recall    |   0.30
F1 Score  |   0.32
Evaluating GPT/prompting_with_techniques_guide_zero_shot_False.csv
Metric    |   Score
-------------------
Precision |   0.56
Recall    |   0.55
F1 Score  |   0.56
Evaluating GPT/prompting_with_techniques_guide_one_shot_False.csv
Metric    |   Score
-------------------
Precision |   0.66
Recall    |   0.58
F1 Score  |   0.62
Evaluating GPT/prompting_with_techniques_guide_two_shot_False.csv
Metric    |   Score
-------------------
Precision |   0.70
Recall    |   0.55
F1 Score  |   0

In [24]:
evaluate_techniques('ML/ChatGPT_test_ground_truth_and_predictions.csv', 'Predicted_labels')
evaluate_techniques('ML/Claude_test_ground_truth_and_predictions.csv', 'Predicted_labels')
evaluate_techniques('ML/Gemini_test_ground_truth_and_predictions.csv', 'Predicted_labels')

Evaluating ML/ChatGPT_test_ground_truth_and_predictions.csv
Metric    |   Score
-------------------
Precision |   0.92
Recall    |   0.70
F1 Score  |   0.79
Evaluating ML/Claude_test_ground_truth_and_predictions.csv
Metric    |   Score
-------------------
Precision |   0.86
Recall    |   0.85
F1 Score  |   0.85
Evaluating ML/Gemini_test_ground_truth_and_predictions.csv
Metric    |   Score
-------------------
Precision |   0.88
Recall    |   0.87
F1 Score  |   0.87


# Rare Techniques Evaluation

In [28]:
evaluate_techniques('Gemini/rare_prompting_with_techniques_guide_two_shot_False.csv', 'Without_Prompt_Limit_With_Competition_With_Limit_Return')
evaluate_techniques('Claude/rare_prompting_with_techniques_guide_two_shot_False.csv', 'Without_Prompt_Limit_With_Competition_With_Limit_Return')
evaluate_techniques('GPT/rare_prompting_with_techniques_guide_two_shot_False.csv', 'Without_Prompt_Limit_With_Competition_With_Limit_Return')

Evaluating Gemini/rare_prompting_with_techniques_guide_two_shot_False.csv
Metric    |   Score
-------------------
Precision |   0.22
Recall    |   0.21
F1 Score  |   0.22
Evaluating Claude/rare_prompting_with_techniques_guide_two_shot_False.csv
Metric    |   Score
-------------------
Precision |   0.23
Recall    |   0.18
F1 Score  |   0.20
Evaluating GPT/rare_prompting_with_techniques_guide_two_shot_False.csv
Metric    |   Score
-------------------
Precision |   0.29
Recall    |   0.19
F1 Score  |   0.23


In [29]:
map_to_tactics('Gemini/rare_prompting_with_techniques_guide_two_shot_False.csv', 'Without_Prompt_Limit_With_Competition_With_Limit_Return')
map_to_tactics('Claude/rare_prompting_with_techniques_guide_two_shot_False.csv', 'Without_Prompt_Limit_With_Competition_With_Limit_Return')
map_to_tactics('GPT/rare_prompting_with_techniques_guide_two_shot_False.csv', 'Without_Prompt_Limit_With_Competition_With_Limit_Return')

Final dataset with deduplicated parent techniques saved to: Gemini\tactics_rare_prompting_with_techniques_guide_two_shot_False.csv
Final dataset with deduplicated parent techniques saved to: Claude\tactics_rare_prompting_with_techniques_guide_two_shot_False.csv
Final dataset with deduplicated parent techniques saved to: GPT\tactics_rare_prompting_with_techniques_guide_two_shot_False.csv


In [30]:
evaluate_tactics('Gemini/tactics_rare_prompting_with_techniques_guide_two_shot_False.csv')
evaluate_tactics('Claude/tactics_rare_prompting_with_techniques_guide_two_shot_False.csv')
evaluate_tactics('GPT/tactics_rare_prompting_with_techniques_guide_two_shot_False.csv')

Evaluating Gemini/tactics_rare_prompting_with_techniques_guide_two_shot_False.csv
Metric    |   Score
-------------------
Precision |   0.29
Recall    |   0.38
F1 Score  |   0.33
Evaluating Claude/tactics_rare_prompting_with_techniques_guide_two_shot_False.csv
Metric    |   Score
-------------------
Precision |   0.26
Recall    |   0.29
F1 Score  |   0.27
Evaluating GPT/tactics_rare_prompting_with_techniques_guide_two_shot_False.csv
Metric    |   Score
-------------------
Precision |   0.28
Recall    |   0.26
F1 Score  |   0.27
