# Installing Necessary Libraries 

In [None]:
# !pip install PyMuPDF
# !pip install spacy
# !pip install stix2
# !pip install bertopic
# !pip install sentence-transformers

In [None]:
!pip install umap-learn

In [None]:
!python -m spacy download en_core_web_lg

In [None]:
spacy.cli.download("en_core_web_sm")

# Importing Libraries 

In [1]:
import pymupdf as fitz
import re
from stix2 import Indicator, AttackPattern, Relationship, Report, Bundle
import json
from datetime import datetime
import TTPelement
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
from umap.umap_ import UMAP
from bertopic import BERTopic
from hdbscan import HDBSCAN
from bertopic.representation import KeyBERTInspired, PartOfSpeech, MaximalMarginalRelevance
from sklearn.feature_extraction.text import CountVectorizer
import pickle
import pandas as pd
import torch
import pickle
import numpy as np
import os
import spacy
from transformers import DistilBertTokenizer, AutoTokenizer
from transformers import DistilBertForSequenceClassification, AutoModelForSequenceClassification
import logging
import sys
import time

In [2]:
nlp = spacy.load("en_core_web_sm")

# PDF text Extraction

In [3]:
def extract_text_from_pdf(pdf_path):
    with fitz.open(pdf_path) as doc:
        text = ""
        for page in doc:
            text += page.get_text()
    return text

In [None]:
text=extract_text_from_pdf('aptreport.pdf')

In [None]:
text

# Text Cleaning and Chunking

In [4]:
def CleanText(text):
  cleaned_text = re.sub(r'\s+', ' ', text)    # remove blank spaces
  cleaned_text = re.sub(r'[^a-zA-Z0-9\s.,!?:/()\[\]@_-]+', '', cleaned_text)     # keep only required characters

  return cleaned_text
    
def chunk_sentences(text):
    """Chunk the cleaned text into sentences using spaCy."""
    doc = nlp(text)  # Process the text with spaCy
    sentences = [sent.text.strip() for sent in doc.sents]  # Extract sentences
    return [s for s in sentences if s]  # Filter out empty strings

In [None]:
cleaned_text = CleanText(text)
clean=chunk_sentences(cleaned_text)

In [None]:
clean

# Topic Modelling

In [5]:
class StemTokenizer:
    def __init__(self):
        self.stemmer = PorterStemmer()

    def __call__(self, doc):
        return [self.stemmer.stem(t) for t in word_tokenize(doc)]

def create_pos_patterns():
    """Define POS patterns for the PartOfSpeech representation model."""
    return [
        [{'POS': 'ADJ'}, {'POS': 'NOUN'}],  # e.g., "malicious software"
        [{'POS': 'NOUN'}],                  # e.g., "malware", "ransomware"
        [{'POS': 'VERB'}, {'POS': 'NOUN'}], # e.g., "exploit vulnerability"
        [{'POS': 'NOUN'}, {'POS': 'NOUN'}], # e.g., "threat actor", "data breach"
        [{'POS': 'PROPN'}],                 # e.g., Specific group names like "APT29"
        [{'POS': 'NOUN'}, {'POS': 'PROPN'}] # e.g., "attacks by Lazarus Group"
    ]

def build_representation_model():
    """Build the representation model for BERTopic."""
    pos_patterns = create_pos_patterns()
    return {
        "Main": KeyBERTInspired(),
        "POS": [
            PartOfSpeech("en_core_web_sm", pos_patterns=pos_patterns),
            MaximalMarginalRelevance(diversity=0.4)
        ],
    }

def create_vectorizer():
    """Create a custom CountVectorizer with a stemming tokenizer."""
    return CountVectorizer(
        min_df=1,         # Capture rare words
        max_df=0.95,      # Avoid overly common words
        ngram_range=(1, 2),
        stop_words='english',
        tokenizer=StemTokenizer()
    )

def create_umap_model():
    """Create a UMAP model with adjusted parameters for smaller datasets."""
    return UMAP(
        n_neighbors=3,
        n_components=5,
        min_dist=0.0,
        metric='cosine',
        random_state=42
    )

def create_hdbscan_model():
    """Create an HDBSCAN model with adjusted parameters for smaller datasets."""
    return HDBSCAN(
        min_cluster_size=2,
        min_samples=1,
        metric='euclidean',
        cluster_selection_method='eom',
        prediction_data=True
    )

def build_topic_model(clean_data):
    """
    Build and fit a BERTopic model using the custom components.

    Parameters:
        clean_data (list of str): The preprocessed and cleaned dataset.

    Returns:
        tuple: The fitted BERTopic model, topics, and initial probabilities.
    """
    representation_model = build_representation_model()
    vectorizer_model = create_vectorizer()
    umap_model = create_umap_model()
    hdbscan_model = create_hdbscan_model()

    topic_model = BERTopic(
        nr_topics=None,  # Adjust or set to None for automatic determination
        vectorizer_model=vectorizer_model,
        representation_model=representation_model,
        umap_model=umap_model,
        hdbscan_model=hdbscan_model,
        verbose=True
    )

    # Fit the model
    topic_model.fit(clean_data)

    # Transform the data
    topics, ini_probs = topic_model.transform(clean_data)
    return topic_model, topics, ini_probs

In [None]:
# Build and fit the BERTopic model
topic_model, topics, ini_probs = build_topic_model(clean)

In [None]:
topics = topic_model.get_representative_docs()

In [None]:
topics

In [None]:
for topic_id, docs in topics.items():
    print(f"Topic {topic_id}:")
    for doc in docs:
        print(f"- {doc}")
    print("\n")

# IOC Extraction and Soft Tagging

In [6]:
def get_ttp_elements_from_chunks(chunks):
    elements = []
    counts = []
    replaced = []

    for chunk in chunks:
        elems, count, replaced_values = TTPelement.GetTTPelements(chunk)
        elements.append(elems)
        counts.append(count)
        replaced.append(replaced_values)
        
#     print("Elements:", elements)
#     print("Counts:", counts)
#     print("Replaced Values:", replaced)

    return elements, counts, replaced

In [None]:
chunks=get_ttp_elements_from_chunks(topics)

In [7]:
def process_dict_with_ttp(data):
    processed_results = {}  # Dictionary to store processed results

    for key, sentences in data.items():
        # Ensure sentences is a list; skip if not
        if not isinstance(sentences, list):
            raise ValueError(f"Expected a list of sentences for key '{key}', got {type(sentences)}")

        # Merge the sentences into a single string for each key
        merged_sentences = " ".join(sentences)  # Join with a space or use ". " if you want a period between sentences

        # Process the merged sentences using get_ttp_elements_from_chunks
        elements, counts, replaced = get_ttp_elements_from_chunks([merged_sentences])  # Pass the merged sentence list

        # Structure the processed results
        processed_results[key] = {
            "Original Sentences": sentences,
            "Merged Sentences": merged_sentences,
            "Elements": elements,
            "Counts": counts,
            "Replaced Sentences": replaced,
        }

    return processed_results

In [None]:
# Process the dictionary
results = process_dict_with_ttp(topics)

# Display results for each key
for key, result in results.items():
    print(f"Results for Key {key}:\n")
    for i, sentence in enumerate(result["Original Sentences"]):
        print(f"Sentence: {sentence}")
        print(f"  Elements: {result['Elements'][i]}")
        print(f"  Word Count: {result['Counts'][i]}")
        print(f"  Replaced Sentence: {result['Replaced Sentences'][i]}")
        print()


# Model

In [8]:
# Load the model, tokenizer, and multilabel binarizer
model_dir = "distilbert-finedtune"

In [9]:
# Load the model
model = DistilBertForSequenceClassification.from_pretrained(model_dir)
model.eval()  # Set to evaluation mode

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): DistilBertSdpaAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)


In [10]:
# Load the tokenizer
tokenizer = DistilBertTokenizer.from_pretrained(model_dir)

In [11]:
# Load the multilabel binarizer
with open("multilabel_binarizer.pkl", "rb") as f:
    multilabel_binarizer = pickle.load(f)

In [12]:
# Load the CSV file to create a label-to-column mapping
csv_path = "Labels.csv"
label_data = pd.read_csv(csv_path)

In [13]:
label_data = label_data.drop(['Unnamed: 0'],axis=1)

In [14]:
# Normalize columns in the CSV
label_data['Tactic-Name'] = label_data['Tactic-Name'].str.strip().str.lower()
label_data['Technique-Name'] = label_data['Technique-Name'].str.strip().str.lower()
label_data['SubTechnique-Name'] = label_data['SubTechnique-Name'].str.strip().str.lower()
label_data['id'] = label_data['id'].str.strip().str.lower()

In [15]:
# Create a label-to-column mapping
tactic_labels = label_data['Tactic-Name'].unique().tolist()
technique_labels = label_data['Technique-Name'].unique().tolist()
sub_technique_labels = label_data['SubTechnique-Name'].unique().tolist()
technique_id_labels = label_data['id'].unique().tolist()

In [16]:
def categorize_label(label):
    label = label.strip().lower()
    if label in tactic_labels:
        return "Tactic Name"
    elif label in technique_labels:
        return "Technique Name"
    elif label in sub_technique_labels:
        return "Sub-Technique Name"
    elif label in technique_id_labels:
        return "Technique ID"
    else:
        return "Unknown"

In [17]:
def analyze_output(structured_output):
    # Normalize all values in structured_output for consistency
    structured_output = {key: [val.strip().lower() for val in value] for key, value in structured_output.items()}
    
    # Check if all four fields are given and valid
    if all(len(structured_output[key]) > 0 for key in structured_output):
        matching_rows = label_data[(label_data['Tactic-Name'].isin(structured_output['Tactic Name'])) &
                                   (label_data['Technique-Name'].isin(structured_output['Technique Name'])) &
                                   (label_data['SubTechnique-Name'].isin(structured_output['Sub-Technique Name'])) &
                                   (label_data['id'].isin(structured_output['Technique ID']))]
        if not matching_rows.empty:
            # print(f"[DEBUG] All four fields match:\n{matching_rows}\n")
            return structured_output
    
    # Handle cases with three fields given
    if len(structured_output['Tactic Name']) > 0 and len(structured_output['Technique Name']) > 0 and len(structured_output['Sub-Technique Name']) > 0:
        matching_rows = label_data[(label_data['Tactic-Name'].isin(structured_output['Tactic Name'])) &
                                   (label_data['Technique-Name'].isin(structured_output['Technique Name'])) &
                                   (label_data['SubTechnique-Name'].isin(structured_output['Sub-Technique Name']))]
        if not matching_rows.empty:
            return find_missing_labels_from_rows(structured_output, matching_rows)

    if len(structured_output['Tactic Name']) > 0 and len(structured_output['Technique Name']) > 0 and len(structured_output['Technique ID']) > 0:
        matching_rows = label_data[(label_data['Tactic-Name'].isin(structured_output['Tactic Name'])) &
                                   (label_data['Technique-Name'].isin(structured_output['Technique Name'])) &
                                   (label_data['id'].isin(structured_output['Technique ID']))]
        if not matching_rows.empty:
            return find_missing_labels_from_rows(structured_output, matching_rows)

    if len(structured_output['Tactic Name']) > 0 and len(structured_output['Sub-Technique Name']) > 0 and len(structured_output['Technique ID']) > 0:
        matching_rows = label_data[(label_data['Tactic-Name'].isin(structured_output['Tactic Name'])) &
                                   (label_data['SubTechnique-Name'].isin(structured_output['Sub-Technique Name'])) &
                                   (label_data['id'].isin(structured_output['Technique ID']))]
        if not matching_rows.empty:
            return find_missing_labels_from_rows(structured_output, matching_rows)

    if len(structured_output['Technique Name']) > 0 and len(structured_output['Sub-Technique Name']) > 0 and len(structured_output['Technique ID']) > 0:
        matching_rows = label_data[(label_data['Technique-Name'].isin(structured_output['Technique Name'])) &
                                   (label_data['SubTechnique-Name'].isin(structured_output['Sub-Technique Name'])) &
                                   (label_data['id'].isin(structured_output['Technique ID']))]
        if not matching_rows.empty:
            return find_missing_labels_from_rows(structured_output, matching_rows)

    # Handle cases with two fields given
    valid_pairs = []
    matching_rows_dict = []
    keys = list(structured_output.keys())
    for i in range(len(keys)):
        for j in range(len(keys)):
            if i != j:
                key1, key2 = keys[i], keys[j]
                values1, values2 = structured_output[key1], structured_output[key2]
                for value1 in values1:
                    for value2 in values2:
                        if key1 == "Tactic Name" and key2 == "Technique Name":
                            matching_rows = label_data[(label_data['Tactic-Name'] == value1) &
                                                       (label_data['Technique-Name'] == value2)]
                        elif key1 == "Tactic Name" and key2 == "Sub-Technique Name":
                            matching_rows = label_data[(label_data['Tactic-Name'] == value1) &
                                                       (label_data['SubTechnique-Name'] == value2)]
                        elif key1 == "Tactic Name" and key2 == "Technique ID":
                            matching_rows = label_data[(label_data['Tactic-Name'] == value1) &
                                                       (label_data['id'] == value2)]
                        elif key1 == "Technique Name" and key2 == "Sub-Technique Name":
                            matching_rows = label_data[(label_data['Technique-Name'] == value1) &
                                                       (label_data['SubTechnique-Name'] == value2)]
                        elif key1 == "Technique Name" and key2 == "Technique ID":
                            matching_rows = label_data[(label_data['Technique-Name'] == value1) &
                                                       (label_data['id'] == value2)]
                        elif key1 == "Sub-Technique Name" and key2 == "Technique ID":
                            matching_rows = label_data[(label_data['SubTechnique-Name'] == value1) &
                                                       (label_data['id'] == value2)]
                        else:
                            matching_rows = pd.DataFrame()
                        
                        if not matching_rows.empty:
                            valid_pairs.append((key1, value1, key2, value2))
                            matching_rows_dict.append(matching_rows)

    # Debug: Print valid pairs formed
    # print(f"[DEBUG] Valid pairs formed: {valid_pairs}\n")
    
    # Create new outputs for each valid pair and find missing labels
    merged_output = {"Tactic Name": [], "Technique Name": [], "Sub-Technique Name": [], "Technique ID": []}
    for idx, pair in enumerate(valid_pairs):
        key1, value1, key2, value2 = pair
        matching_rows = matching_rows_dict[idx]
        temp_output = {"Tactic Name": [], "Technique Name": [], "Sub-Technique Name": [], "Technique ID": []}
        temp_output[key1].append(value1)
        temp_output[key2].append(value2)
        completed_output = find_missing_labels_from_rows(temp_output, matching_rows)
        for key in merged_output:
            merged_output[key].extend([val for val in completed_output[key] if val not in merged_output[key]])
    
    # Handle cases with a single field given
    for key in structured_output:
        if len(structured_output[key]) > 0:
            column_name = key.replace(' ', '-')
            if column_name in label_data.columns:
                matching_rows = label_data[label_data[column_name].isin(structured_output[key])]
                if not matching_rows.empty:
                    completed_output = find_missing_labels_from_rows(structured_output, matching_rows)
                    for key in merged_output:
                        merged_output[key].extend([val for val in completed_output[key] if val not in merged_output[key]])
    
    return merged_output

In [18]:
def find_missing_labels_from_rows(filtered_output, matching_rows):
    # Normalize all values in filtered_output for consistency
    filtered_output = {key: [val.strip().lower() for val in value] for key, value in filtered_output.items()}

    # Filter rows that match all non-empty fields in filtered_output
    condition = pd.Series(True, index=matching_rows.index)
    for key in filtered_output:
        if filtered_output[key]:
            column_name = None
            if key == "Tactic Name":
                column_name = 'Tactic-Name'
            elif key == "Technique Name":
                column_name = 'Technique-Name'
            elif key == "Sub-Technique Name":
                column_name = 'SubTechnique-Name'
            elif key == "Technique ID":
                column_name = 'id'

            if column_name:
                condition &= matching_rows[column_name].isin(filtered_output[key])

    filtered_matching_rows = matching_rows[condition]

    # Debug: Print filtered matching rows
    # print(f"[DEBUG] Filtered matching rows based on existing output:\n{filtered_matching_rows}\n")

    # Find missing labels based on the filtered output using filtered matching rows
    if not filtered_matching_rows.empty:
        for _, row in filtered_matching_rows.iterrows():
            if len(filtered_output["Tactic Name"]) > 0 and len(filtered_output["Technique Name"]) > 0:
                if len(filtered_output["Sub-Technique Name"]) == 0 and pd.notna(row['SubTechnique-Name']):
                    filtered_output["Sub-Technique Name"].append(row['SubTechnique-Name'])
                if len(filtered_output["Technique ID"]) == 0 and pd.notna(row['id']):
                    filtered_output["Technique ID"].append(row['id'])

            if len(filtered_output["Tactic Name"]) > 0 and len(filtered_output["Sub-Technique Name"]) > 0:
                if len(filtered_output["Technique Name"]) == 0 and pd.notna(row['Technique-Name']):
                    filtered_output["Technique Name"].append(row['Technique-Name'])
                if len(filtered_output["Technique ID"]) == 0 and pd.notna(row['id']):
                    filtered_output["Technique ID"].append(row['id'])

            if len(filtered_output["Technique Name"]) > 0 and len(filtered_output["Sub-Technique Name"]) > 0:
                if len(filtered_output["Tactic Name"]) == 0 and pd.notna(row['Tactic-Name']):
                    filtered_output["Tactic Name"].append(row['Tactic-Name'])
                if len(filtered_output["Technique ID"]) == 0 and pd.notna(row['id']):
                    filtered_output["Technique ID"].append(row['id'])

    return filtered_output

In [19]:
def predict(text):   
    # Tokenize the input text
    encoding = tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=512)
    
    # Move to GPU if available
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    encoding = {key: value.to(device) for key, value in encoding.items()}

    # Perform inference
    with torch.no_grad():
        outputs = model(**encoding)
        logits = outputs.logits

    # Apply sigmoid to get probabilities
    sigmoid = torch.nn.Sigmoid()
    probs = sigmoid(logits).cpu().numpy()

    # Set threshold and get predictions
    threshold = 0.3
    preds = (probs >= threshold).astype(int)

    # Transform predictions back to original labels
    labels = multilabel_binarizer.inverse_transform(preds)

    #Debug: Print the model output
    # print(f"[DEBUG] Model output labels: {labels}\n")

    # Prepare structured output
    structured_output = {"Tactic Name": [], "Technique Name": [], "Sub-Technique Name": [], "Technique ID": []}
    for label_set in labels:
        for label in label_set:
            category = categorize_label(label)
            if category != "Unknown":
                structured_output[category].append(label)
    
    # Analyze the output to filter valid pairs and find missing labels
    final_output = analyze_output(structured_output)

    # Append the original text as a chunk in the final output
    final_output["Chunk"] = [text]

    return final_output

In [None]:
# Example usage
if __name__ == "__main__":
    test_text = "Phishing impersonating governmental entities as the spreading method. In some campaigns, particularly those involving financial attacks, the group impersonates banking institutions."
    predictions = predict(test_text)
    for label_type, value in predictions.items():
        print(f"{label_type}: {', '.join(value)}")

In [None]:
# Example usage
if __name__ == "__main__":
    test_text = "Sophisticated cyberattacks frequently involve phishing campaigns, where attackers utilize malicious URLs and email addresses to deceive victims."
    predictions = predict(test_text)
    for label_type, value in predictions.items():
        print(f"{label_type}: {', '.join(value)}")

In [None]:
predictions

# Final stix bundles)


In [20]:
def CreateStixIndicator(IOCtype, IOCvalue):
    # Define a mapping for IOC types to their STIX patterns
    ioc_mapping = {
        "ipv4": "[ipv4-addr:value = '{}']",
        "ipv6": "[ipv6-addr:value = '{}']",
        "asn": "[autonomous-system:number = '{}']",
        "domain": "[domain-name:value = '{}']",
        "email": "[email-addr:value = '{}']",
        "filename": "[file:name = '{}']",
        "url": "[url:value = '{}']",
        "hash": "[file:hash = '{}']",
        "filepath": "[file:file_path = '{}']",
        "cve": "[vulnerability:cve = '{}']",
        "regkey": "[windows-registry-key:key = '{}']",
        "encodeencryptalgorithms": "[crypto-algorithm:name = '{}']",
        "communicationprotocol": "[network-traffic:protocols[*] = '{}']",
        "dataobject": "[artifact:payload_bin = '{}']"
    }

    # Ensure the IOC type is valid
    if IOCtype not in ioc_mapping:
        raise ValueError(f"Unsupported IOC type: {IOCtype}")

    # Replace backslashes in IOCvalue and format the pattern
    pattern_val = ioc_mapping[IOCtype].format(IOCvalue.replace('\\', '/'))

    # Create the STIX Indicator
    indicator = Indicator(
        name=IOCtype,
        description=f"Extracted {IOCtype}",
        indicator_types=["malicious-activity"],
        pattern=pattern_val,
        labels=["malicious"],
        pattern_type="stix"
    )

    return indicator

In [21]:
def return_stix_objects(counts, tactic_name, technique_name, sub_tech_name, technique_id, iocs, chunks, do_include_non_ioc=False):
    logging.basicConfig(level=logging.INFO)

    indicators_stix = []
    relationships_stix = []
    attack_patterns_stix = []

    for i in range(len(counts)):
        # Use provided values or fallback to defaults
        tactic = tactic_name[i] if i < len(tactic_name) and tactic_name[i] else "Unknown Tactic"
        technique = technique_name[i] if i < len(technique_name) and technique_name[i] else "Unknown Technique"
        sub_tech = sub_tech_name[i] if i < len(sub_tech_name) and sub_tech_name[i] else "No Sub-Technique"
        tech_id = technique_id[i] if i < len(technique_id) and technique_id[i] else "TXXXX"
        ioc = iocs[i] if i < len(iocs) and iocs[i] else {}
        description = chunks[i] if i < len(chunks) and chunks[i] else "No description available."

        # Normalize tactic names (handle both single and multiple tactics)
        tactic_names = tactic if isinstance(tactic, list) else [tactic]

        for tactic in tactic_names:
            # Create the AttackPattern STIX object
            phase_name = tactic.lower().replace(' ', '-').replace('_', '-')
            attack_pattern = AttackPattern(
                name=f"{technique} - {sub_tech}",
                description=description,
                external_references=[{
                    "source_name": "mitre-attack",
                    "external_id": tech_id
                }],
                kill_chain_phases=[{
                    "kill_chain_name": "mitre-attack",
                    "phase_name": phase_name
                }],
                aliases=[technique, sub_tech]
            )
            attack_patterns_stix.append(attack_pattern)

            # Handle IOCs if present
            if ioc:
                for ioc_type, ioc_list in ioc.items():
                    for ioc_value in ioc_list:
                        try:
                            indicator = CreateStixIndicator(ioc_type, str(ioc_value))
                            indicators_stix.append(indicator)

                            # Create relationship between Indicator and AttackPattern
                            relationship = Relationship(
                                source_ref=indicator.id,
                                target_ref=attack_pattern.id,
                                relationship_type="indicates",
                                description=f"Indicator ({ioc_value}) is associated with Attack Pattern ({technique} - {sub_tech}).",
                                start_time=datetime.now(),
                                allow_custom=True
                            )
                            relationships_stix.append(relationship)
                        except Exception as e:
                            logging.error(f"Error creating indicator for IOC ({ioc_value}): {e}")
            elif do_include_non_ioc:
                # Include a "No IOC available" relationship if configured
                logging.info(f"No IOCs found for chunk {i}. Creating placeholder relationship.")
                relationship = Relationship(
                    source_ref=None,
                    target_ref=attack_pattern.id,
                    relationship_type="indicates",
                    description="No IOC available.",
                    start_time=datetime.now(),
                    allow_custom=True
                )
                relationships_stix.append(relationship)

    return indicators_stix, relationships_stix, attack_patterns_stix

In [22]:
def make_stix(indicators, relationships, attack_patterns, clean_text, report_name, labels=None, published=None):
    logging.basicConfig(level=logging.INFO)

    # Default values for optional parameters
    labels = labels or []
    labels.append("threat-report")
    published = published or datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%fZ')

    try:
        # Validate inputs
        if not all(isinstance(obj_list, (list, tuple)) for obj_list in [indicators, relationships, attack_patterns]):
            raise ValueError("Indicators, relationships, and attack patterns must be iterable (e.g., list or tuple).")

        # Collect all object IDs
        all_ids = [obj.id for obj in indicators + relationships + attack_patterns]

        # Log a warning if no objects exist
        if not all_ids:
            logging.warning(f"No STIX objects to include in the report '{report_name}'. Creating an empty bundle.")
            return Bundle(objects=[])

        # Create the report
        report = Report(
            name=report_name,
            description=clean_text,
            published=published,
            object_refs=all_ids,
            labels=labels,
        )

        # Collect all objects into a bundle
        objects = indicators + attack_patterns + relationships + [report]
        bundle = Bundle(objects=objects)

        return bundle

    except Exception as e:
        logging.error(f"An error occurred while creating the STIX bundle for report '{report_name}': {e}")
        return Bundle(objects=[])

# Main Workflow Code

In [None]:
chunks = [
    """
    The APT group launched a watering hole attack targeting specific industry websites to deliver malware. The IP address 45.33.32.156 was 
    identified as hosting the malicious content, alongside the domain compromised-site.com. Visitors were redirected to this domain and 
    prompted to download a fake software update named UpdateInstaller.exe, which contained malware. The malware would establish persistence 
    by creating registry keys and communicating with the C2 server at 203.0.113.60 using HTTPS.
    """,

    """
    In an escalation of their tactics, the same APT group deployed ransomware via malvertising on popular websites. Users who clicked on the ad were redirected to a download page 
    hosted at malicious-ads.io, where the ransomware executable ransom_payload_v3.2.exe was served. Once executed, the ransomware 
    encrypted files on the victim’s machine and displayed a ransom note directing the victim to contact the attacker via email at 
    support@ransom.com. During encryption, the ransomware communicated with IP addresses like 185.60.218.35 and 192.168.0.1, sending 
    encrypted session keys and victim data.
    """,

    """
    The group also launched brute-force attacks targeting SSH servers within specific critical infrastructure organizations. IP addresses such as 103.21.244.1 and 102.51.145.2 were detected 
    repeatedly attempting login attempts. Once access was obtained, the attackers deployed a reverse shell to 198.51.100.50 over port 443, 
    allowing them to control the compromised system. Further reconnaissance commands were executed to gather system information, which was 
    then exfiltrated to the command and control server. The tactic of initial access through brute force and persistence through remote 
    access tools was identified.
    """,

    """
    Phishing emails were distributed with the subject line “Account Security Notice” to lure users into clicking a malicious link. The emails, 
    sent from noreply@security-alerts.com, claimed to be from the user's bank and included a link to verify account details. The link directed 
    users to http://phishing-site.com/login, a site that mimicked the bank's login page. This page harvested login credentials, which were then 
    used in unauthorized access attempts on financial systems. This is another technique in the APT group's playbook, showing tactics of credential access through phishing and credential dumping.
    """,

    """
    A supply chain attack was discovered, revealing that the APT group compromised a legitimate software vendor’s update server. They injected malware 
    into the update package hosted on update.vendor-software.com. End users who downloaded the update unknowingly installed the malware, which 
    created a backdoor to a remote IP address 203.0.113.45. This backdoor allowed attackers to execute arbitrary commands and exfiltrate data 
    from infected systems. The tactics of lateral movement and exfiltration through command and control were observed.
    """,

    """
    Cybercriminals within the same APT group also targeted e-commerce platforms with SQL injection attacks, attempting to extract credit card details from databases. The 
    attackers operated from IP addresses 198.51.100.88 and 102.51.67.30. Upon successful extraction, the data was encrypted and exfiltrated to 
    a remote server at 203.0.113.77. The exfiltration utilized SSL to secure the data transfer, making detection more challenging. Techniques 
    of data exfiltration and defense evasion were identified in this attack.
    """,

    """
    The group distributed a banking trojan through spam campaigns that claimed to be from a well-known financial institution. The email urged recipients 
    to download a “security patch” from http://safe-banking-update.com/patch.exe, which was actually the trojan. Once installed, the trojan 
    collected browser session data, including saved passwords and autofill information, and exfiltrated this data to the server at 45.33.32.111. 
    The tactics of credential access and data collection were associated with this campaign.
    """,

    """
    Finally, a botnet campaign linked to this APT group was identified, leveraging IoT devices for launching DDoS attacks against critical services. The devices connected to IP addresses 198.51.100.20 and 
    192.0.2.12, forming a network of compromised systems. The botnet periodically sent commands to these IPs to execute coordinated DDoS attacks 
    against high-profile targets. The tactic of impact through service disruption was noted, along with persistence through IoT device compromise.
    """
]

# Main

In [23]:
def main_workflow(pdf_path, base_output_folder):
    logging.basicConfig(level=logging.INFO)
    logging.info(f"Processing {pdf_path} and saving results in {base_output_folder}")

    # Step 0: Extract and Clean Text
    logging.info("Step 0: Extracting text from PDF...")
    raw_text = extract_text_from_pdf(pdf_path)

    logging.info("Step 0: Cleaning the extracted text...")
    clean_text = CleanText(raw_text)

    # Step 1: Chunk the cleaned text into sentences
    logging.info("Step 1: Chunking the cleaned text into sentences...")
    chunks = chunk_sentences(clean_text)

    # Step 2: Build a Topic Model
    logging.info("Step 2: Building the topic model...")
    topic_model, topics, ini_probs = build_topic_model(chunks)

    # Step 3: Fetch representative documents
    logging.info("Step 3: Fetching representative documents for each topic...")
    representative_docs = topic_model.get_representative_docs()

    # Step 4: Process topics using IoC extraction
    logging.info("Step 4: Processing topics for IoC extraction...")
    processed_topics = process_dict_with_ttp(representative_docs)

    # Step 5: Process each topic and generate reports
    logging.info("Step 5: Generating reports...")
    for topic_id, result in processed_topics.items():
        topic_sentences = result["Original Sentences"]
        elements_list = result["Elements"]
        counts_list = result["Counts"]
        replaced_chunks = result["Replaced Sentences"]

        # Create parent folder for the topic
        topic_folder = os.path.join(base_output_folder, f"topic_{topic_id}")
        os.makedirs(topic_folder, exist_ok=True)

        for idx, replaced_chunk in enumerate(replaced_chunks):
            chunk_folder = os.path.join(topic_folder, f"chunk_{idx + 1}")
            os.makedirs(chunk_folder, exist_ok=True)

            logging.info(f"Processing chunk {idx + 1}/{len(replaced_chunks)} of topic {topic_id}...")

            # Step 6: Predict TTPs on the merged chunk
            predictions = predict(replaced_chunk)

            # If predictions are empty, process each sentence separately
            if not predictions:
                logging.warning(f"No predictions available for chunk {idx + 1} of topic {topic_id}. Processing each sentence separately.")
                
                # Split the merged chunk into individual sentences and process each separately
                for sentence in replaced_chunk.split(". "):  # Adjust split based on how sentences are merged
                    sentence = sentence.strip()  # Clean up any extra spaces
                    if sentence:
                        logging.info(f"Processing sentence: {sentence}")
                        sentence_predictions = predict(sentence)  # Predict on individual sentence

                        # Handle predictions for the individual sentence
                        if sentence_predictions:
                            tactic_names = sentence_predictions.get('Tactic Name', ["Unknown"])
                            technique_names = sentence_predictions.get('Technique Name', ["Unknown"])
                            sub_tech_names = sentence_predictions.get('Sub-Technique Name', ["Unknown"])
                            technique_ids = sentence_predictions.get('Technique ID', ["Unknown"])

                            # Ensure consistent list lengths for predictions
                            max_len = max(len(tactic_names), len(technique_names), len(sub_tech_names), len(technique_ids))
                            tactic_names = (tactic_names or ["Unknown"]) * max_len
                            technique_names = (technique_names or ["Unknown"]) * max_len
                            sub_tech_names = (sub_tech_names or ["Unknown"]) * max_len
                            technique_ids = (technique_ids or ["Unknown"]) * max_len

                            # Step 7: Generate STIX Objects for the individual sentence
                            iocs = [elements_list[idx]]
                            counts_list_for_stix = [counts_list[idx]]
                            chunks_for_stix = [sentence]
                            do_include_non_ioc = False

                            for i, (tactic, technique, sub_technique, tech_id) in enumerate(zip(tactic_names, technique_names, sub_tech_names, technique_ids)):
                                try:
                                    indicators, relationships, attack_patterns = return_stix_objects(
                                        counts_list_for_stix,
                                        [tactic],
                                        [technique],
                                        [sub_technique],
                                        [tech_id],
                                        iocs,
                                        chunks_for_stix,
                                        do_include_non_ioc
                                    )

                                    # Skip if no objects were created
                                    if not (indicators or relationships or attack_patterns):
                                        logging.warning(f"No STIX objects created for sentence {sentence}. Skipping.")
                                        continue

                                    # Create STIX bundle
                                    report_name = f"prediction_{i + 1}"
                                    bundle = make_stix(indicators, relationships, attack_patterns, sentence, report_name)

                                    if bundle is None:
                                        logging.warning(f"STIX bundle creation failed for sentence {sentence}, prediction {i + 1}.")
                                        continue

                                    # Save the STIX bundle
                                    stix_file_path = os.path.join(chunk_folder, f"{report_name}.json")
                                    with open(stix_file_path, "w") as f:
                                        json.dump(json.loads(bundle.serialize()), f, indent=4)

                                    logging.info(f"Saved STIX file: {stix_file_path}")

                                except Exception as e:
                                    logging.error(f"Error processing sentence: {sentence}, prediction {i + 1}: {e}")

                        else:
                            logging.warning(f"No predictions available for individual sentence: {sentence}. Skipping.")
                continue  # Skip to next chunk if we handle sentence-by-sentence

            else:
                # Handle predictions for the merged chunk
                tactic_names = predictions.get('Tactic Name', ["Unknown"])
                technique_names = predictions.get('Technique Name', ["Unknown"])
                sub_tech_names = predictions.get('Sub-Technique Name', ["Unknown"])
                technique_ids = predictions.get('Technique ID', ["Unknown"])

                # Ensure consistent list lengths for predictions
                max_len = max(len(tactic_names), len(technique_names), len(sub_tech_names), len(technique_ids))
                tactic_names = (tactic_names or ["Unknown"]) * max_len
                technique_names = (technique_names or ["Unknown"]) * max_len
                sub_tech_names = (sub_tech_names or ["Unknown"]) * max_len
                technique_ids = (technique_ids or ["Unknown"]) * max_len

                # Step 7: Generate STIX Objects
                iocs = [elements_list[idx]]
                counts_list_for_stix = [counts_list[idx]]
                chunks_for_stix = [replaced_chunk]
                do_include_non_ioc = False

                for i, (tactic, technique, sub_technique, tech_id) in enumerate(zip(tactic_names, technique_names, sub_tech_names, technique_ids)):
                    try:
                        indicators, relationships, attack_patterns = return_stix_objects(
                            counts_list_for_stix,
                            [tactic],
                            [technique],
                            [sub_technique],
                            [tech_id],
                            iocs,
                            chunks_for_stix,
                            do_include_non_ioc
                        )

                        # Skip if no objects were created
                        if not (indicators or relationships or attack_patterns):
                            logging.warning(f"No STIX objects created for chunk {idx + 1}, prediction {i + 1}. Skipping.")
                            continue

                        # Create STIX bundle
                        report_name = f"prediction_{i + 1}"
                        bundle = make_stix(indicators, relationships, attack_patterns, replaced_chunk, report_name)

                        if bundle is None:
                            logging.warning(f"STIX bundle creation failed for chunk {idx + 1}, prediction {i + 1}.")
                            continue

                        # Save the STIX bundle
                        stix_file_path = os.path.join(chunk_folder, f"{report_name}.json")
                        with open(stix_file_path, "w") as f:
                            json.dump(json.loads(bundle.serialize()), f, indent=4)

                        logging.info(f"Saved STIX file: {stix_file_path}")

                    except Exception as e:
                        logging.error(f"Error processing chunk {idx + 1}, prediction {i + 1}: {e}")

    logging.info("Workflow completed successfully!")

In [None]:
# Run main workflow
if __name__ == "__main__":
     main_workflow("Reports/(3) Kimsuky is targeting an arms manufacturer in Europe. _ LinkedIn.pdf", 'testresults')

In [24]:
def process_reports(reports_folder, results_base_folder):
    # Configure logging
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s - %(levelname)s - %(message)s",
        handlers=[
            logging.StreamHandler(sys.stdout),  # Log to console
            logging.FileHandler("processing.log", mode="w"),  # Log to file
        ],
    )

    # Create the base results folder if it doesn't exist
    os.makedirs(results_base_folder, exist_ok=True)

    logging.info(f"Starting PDF processing from folder: {reports_folder}")
    logging.info(f"Results will be saved in: {results_base_folder}\n")

    processed_files = 0
    skipped_files = 0
    errored_files = []

    # Iterate through each file in the Reports folder
    for report_file in os.listdir(reports_folder):
        if report_file.endswith(".pdf"):  # Process only PDF files
            input_path = os.path.join(reports_folder, report_file)
            report_name = os.path.splitext(report_file)[0]  # Remove the file extension
            output_folder = os.path.join(results_base_folder, report_name)

            # Create a folder for each report's results
            os.makedirs(output_folder, exist_ok=True)

            logging.info(f"Processing file: {report_file}")
            logging.info(f"Input Path: {input_path}")
            logging.info(f"Output Folder: {output_folder}\n")

            try:
                # Call the main workflow for each PDF
                main_workflow(input_path, output_folder)
                processed_files += 1
                logging.info(f"Finished processing: {report_file}\n")
            except Exception as e:
                errored_files.append(report_file)
                logging.error(f"Error processing file {report_file}: {e}\n")

        else:
            skipped_files += 1
            logging.warning(f"Skipping non-PDF file: {report_file}")

    # Summary
    logging.info("PDF processing completed.")
    logging.info(f"Processed files: {processed_files}")
    logging.info(f"Skipped files: {skipped_files}")
    if errored_files:
        logging.error(f"Errored files: {len(errored_files)}")
        for file in errored_files:
            logging.error(f" - {file}")

    logging.info("All tasks have been completed.\n")

In [25]:
def process_reports(reports_folder, results_base_folder):
    # Configure logging
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s - %(levelname)s - %(message)s",
        handlers=[
            logging.StreamHandler(sys.stdout),  # Log to console
            logging.FileHandler("processing.log", mode="w"),  # Log to file
        ],
    )

    # Create the base results folder if it doesn't exist
    os.makedirs(results_base_folder, exist_ok=True)

    logging.info(f"Starting PDF processing from folder: {reports_folder}")
    logging.info(f"Results will be saved in: {results_base_folder}\n")

    processed_files = 0
    skipped_files = 0
    errored_files = []
    total_files = len([f for f in os.listdir(reports_folder) if f.endswith(".pdf")])

    # Start time tracking
    start_time = time.time()

    # Iterate through each file in the Reports folder
    for report_file in os.listdir(reports_folder):
        if report_file.endswith(".pdf"):  # Process only PDF files
            if processed_files >= 100:  # Stop after processing 100 files
                break

            input_path = os.path.join(reports_folder, report_file)
            report_name = os.path.splitext(report_file)[0]  # Remove the file extension
            output_folder = os.path.join(results_base_folder, report_name)

            # Create a folder for each report's results
            os.makedirs(output_folder, exist_ok=True)

            logging.info(f"Processing file: {report_file}")
            logging.info(f"Input Path: {input_path}")
            logging.info(f"Output Folder: {output_folder}\n")

            try:
                # Call the main workflow for each PDF
                main_workflow(input_path, output_folder)
                processed_files += 1
                logging.info(f"Finished processing: {report_file}\n")
            except Exception as e:
                errored_files.append(report_file)
                logging.error(f"Error processing file {report_file}: {e}\n")

        else:
            skipped_files += 1
            logging.warning(f"Skipping non-PDF file: {report_file}")

    # Time taken to process first 100 reports
    end_time = time.time()
    time_taken = end_time - start_time

    # Summary
    logging.info("PDF processing completed.")
    logging.info(f"Processed files: {processed_files}")
    logging.info(f"Skipped files: {skipped_files}")
    if errored_files:
        logging.error(f"Errored files: {len(errored_files)}")
        for file in errored_files:
            logging.error(f" - {file}")

    logging.info(f"Time taken to process first 100 reports: {time_taken:.2f} seconds.")
    logging.info("All tasks have been completed.\n")

In [26]:
if __name__ == "__main__":
    # Default folder paths
    reports_folder = "Reports"
    results_base_folder = "Results_Topic_Para"

    # Check if the reports folder exists
    if not os.path.exists(reports_folder):
        print(f"Error: Reports folder '{reports_folder}' does not exist.")
        sys.exit(1)

    # Call the processing function
    process_reports(reports_folder, results_base_folder)

2024-12-03 06:19:30,636 - INFO - Starting PDF processing from folder: Reports
2024-12-03 06:19:30,636 - INFO - Results will be saved in: Results_Topic_Para

2024-12-03 06:19:30,639 - INFO - Processing file: Technical-threat-report-Arid-Viper-April-2021.pdf
2024-12-03 06:19:30,642 - INFO - Input Path: Reports/Technical-threat-report-Arid-Viper-April-2021.pdf
2024-12-03 06:19:30,642 - INFO - Output Folder: Results_Topic_Para/Technical-threat-report-Arid-Viper-April-2021

2024-12-03 06:19:30,643 - INFO - Processing Reports/Technical-threat-report-Arid-Viper-April-2021.pdf and saving results in Results_Topic_Para/Technical-threat-report-Arid-Viper-April-2021
2024-12-03 06:19:30,643 - INFO - Step 0: Extracting text from PDF...
2024-12-03 06:19:30,699 - INFO - Step 0: Cleaning the extracted text...
2024-12-03 06:19:30,706 - INFO - Step 1: Chunking the cleaned text into sentences...
2024-12-03 06:19:31,758 - INFO - Step 2: Building the topic model...


2024-12-03 06:19:32,214 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:19:32,217 - INFO - Use pytorch device_name: cuda
2024-12-03 06:19:32,217 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/8 [00:00<?, ?it/s]

2024-12-03 06:19:35,788 - BERTopic - Embedding - Completed ✓
2024-12-03 06:19:35,789 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:19:40,298 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:19:40,299 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:19:40,321 - BERTopic - Cluster - Completed ✓
2024-12-03 06:19:40,324 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:19:45,766 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/8 [00:00<?, ?it/s]

2024-12-03 06:19:46,137 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:19:46,139 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:19:46,139 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:19:46,145 - BERTopic - Cluster - Completed ✓


2024-12-03 06:19:46,145 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:19:46,146 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:19:46,277 - INFO - Step 5: Generating reports...
2024-12-03 06:19:46,277 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:19:46,359 - INFO - Saved STIX file: Results_Topic_Para/Technical-threat-report-Arid-Viper-April-2021/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:19:46,364 - INFO - Saved STIX file: Results_Topic_Para/Technical-threat-report-Arid-Viper-April-2021/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:19:46,364 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:19:46,371 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:19:46,386 - INFO - Saved STIX file: Results_Topic_Para/Technical-threat-report-Arid-Viper-April-2021/topic_1/chunk_1/prediction_1.json
2024-12-03 06:19:46,387 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 06:19:46,401 - INFO - Saved ST

2024-12-03 06:19:53,690 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:19:53,692 - INFO - Use pytorch device_name: cuda
2024-12-03 06:19:53,693 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

2024-12-03 06:19:56,276 - BERTopic - Embedding - Completed ✓
2024-12-03 06:19:56,276 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:19:56,450 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:19:56,451 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:19:56,468 - BERTopic - Cluster - Completed ✓
2024-12-03 06:19:56,470 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:20:00,825 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

2024-12-03 06:20:01,107 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:20:01,110 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:20:01,113 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:20:01,118 - BERTopic - Cluster - Completed ✓


2024-12-03 06:20:01,119 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:20:01,121 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:20:01,212 - INFO - Step 5: Generating reports...
2024-12-03 06:20:01,214 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:20:01,322 - INFO - Saved STIX file: Results_Topic_Para/2022 Adversary Infrastructure Report/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:20:01,322 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:20:01,428 - INFO - Saved STIX file: Results_Topic_Para/2022 Adversary Infrastructure Report/topic_0/chunk_1/prediction_1.json
2024-12-03 06:20:01,443 - INFO - Saved STIX file: Results_Topic_Para/2022 Adversary Infrastructure Report/topic_0/chunk_1/prediction_2.json
2024-12-03 06:20:01,446 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:20:01,466 - INFO - Saved STIX file: Results_Topic_Para/2022 Adversary Infrastructure Report/topic_1/chunk_1/prediction_1.js

2024-12-03 06:20:10,254 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:20:10,256 - INFO - Use pytorch device_name: cuda
2024-12-03 06:20:10,257 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/23 [00:00<?, ?it/s]

2024-12-03 06:20:12,663 - BERTopic - Embedding - Completed ✓
2024-12-03 06:20:12,664 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:20:13,662 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:20:13,664 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:20:13,722 - BERTopic - Cluster - Completed ✓
2024-12-03 06:20:13,726 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:20:27,918 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/23 [00:00<?, ?it/s]

2024-12-03 06:20:28,751 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:20:28,755 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:20:28,756 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:20:28,771 - BERTopic - Cluster - Completed ✓


2024-12-03 06:20:28,773 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:20:28,777 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:20:29,047 - INFO - Step 5: Generating reports...
2024-12-03 06:20:29,048 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:20:29,114 - INFO - Saved STIX file: Results_Topic_Para/MacMalware_2023/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:20:29,118 - INFO - Saved STIX file: Results_Topic_Para/MacMalware_2023/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:20:29,123 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:20:29,190 - INFO - Saved STIX file: Results_Topic_Para/MacMalware_2023/topic_0/chunk_1/prediction_1.json
2024-12-03 06:20:29,192 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:20:29,295 - INFO - Saved STIX file: Results_Topic_Para/MacMalware_2023/topic_1/chunk_1/prediction_1.json
2024-12-03 06:20:29,297 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 0

2024-12-03 06:20:42,598 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:20:42,600 - INFO - Use pytorch device_name: cuda
2024-12-03 06:20:42,601 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/8 [00:00<?, ?it/s]

2024-12-03 06:20:44,982 - BERTopic - Embedding - Completed ✓
2024-12-03 06:20:44,983 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:20:45,194 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:20:45,195 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:20:45,215 - BERTopic - Cluster - Completed ✓
2024-12-03 06:20:45,219 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:20:49,877 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/8 [00:00<?, ?it/s]

2024-12-03 06:20:50,220 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:20:50,221 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:20:50,221 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:20:50,230 - BERTopic - Cluster - Completed ✓


2024-12-03 06:20:50,231 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:20:50,234 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:20:50,342 - INFO - Step 5: Generating reports...
2024-12-03 06:20:50,343 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:20:50,359 - INFO - Saved STIX file: Results_Topic_Para/The-Desert-Falcons-targeted-attacks/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:20:50,363 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:20:50,370 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:20:50,384 - INFO - Saved STIX file: Results_Topic_Para/The-Desert-Falcons-targeted-attacks/topic_1/chunk_1/prediction_1.json
2024-12-03 06:20:50,385 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 06:20:50,509 - INFO - Saved STIX file: Results_Topic_Para/The-Desert-Falcons-targeted-attacks/topic_2/chunk_1/prediction_1.json
2024-12-03 06:20:50,509 - INFO - Processing chunk 1/1 of topic 3...
2024

2024-12-03 06:20:56,736 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:20:56,738 - INFO - Use pytorch device_name: cuda
2024-12-03 06:20:56,739 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/7 [00:00<?, ?it/s]

2024-12-03 06:20:59,128 - BERTopic - Embedding - Completed ✓
2024-12-03 06:20:59,128 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:20:59,343 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:20:59,345 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:20:59,367 - BERTopic - Cluster - Completed ✓
2024-12-03 06:20:59,369 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:21:04,341 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/7 [00:00<?, ?it/s]

2024-12-03 06:21:04,649 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:21:04,651 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:21:04,651 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:21:04,656 - BERTopic - Cluster - Completed ✓


2024-12-03 06:21:04,657 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:21:04,657 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:21:04,792 - INFO - Step 5: Generating reports...
2024-12-03 06:21:04,793 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:21:04,807 - INFO - Saved STIX file: Results_Topic_Para/A_year_of_Russian_hybrid_warfare_in_Ukraine_MS_Threat_Intelligence/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:21:04,808 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:21:04,971 - INFO - Saved STIX file: Results_Topic_Para/A_year_of_Russian_hybrid_warfare_in_Ukraine_MS_Threat_Intelligence/topic_0/chunk_1/prediction_1.json
2024-12-03 06:21:04,973 - INFO - Saved STIX file: Results_Topic_Para/A_year_of_Russian_hybrid_warfare_in_Ukraine_MS_Threat_Intelligence/topic_0/chunk_1/prediction_2.json
2024-12-03 06:21:04,974 - INFO - Saved STIX file: Results_Topic_Para/A_year_of_Russian_hybrid_warfare_in_Ukraine_MS

2024-12-03 06:21:09,914 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:21:09,916 - INFO - Use pytorch device_name: cuda
2024-12-03 06:21:09,917 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/9 [00:00<?, ?it/s]

2024-12-03 06:21:12,594 - BERTopic - Embedding - Completed ✓
2024-12-03 06:21:12,597 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:21:12,857 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:21:12,858 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:21:12,881 - BERTopic - Cluster - Completed ✓
2024-12-03 06:21:12,884 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:21:18,784 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/9 [00:00<?, ?it/s]

2024-12-03 06:21:19,143 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:21:19,145 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:21:19,147 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:21:19,154 - BERTopic - Cluster - Completed ✓


2024-12-03 06:21:19,155 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:21:19,156 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:21:19,287 - INFO - Step 5: Generating reports...
2024-12-03 06:21:19,288 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:21:19,306 - INFO - Saved STIX file: Results_Topic_Para/aa23-347a-russian-foreign-intelligence-service-svr-exploiting-jetbrains-teamcity-cve-globally/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:21:19,310 - INFO - Saved STIX file: Results_Topic_Para/aa23-347a-russian-foreign-intelligence-service-svr-exploiting-jetbrains-teamcity-cve-globally/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:21:19,313 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:21:19,362 - INFO - Saved STIX file: Results_Topic_Para/aa23-347a-russian-foreign-intelligence-service-svr-exploiting-jetbrains-teamcity-cve-globally/topic_0/chunk_1/prediction_1.json
2024-12-03 06:21:19,363 - INFO - 

2024-12-03 06:21:26,453 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:21:26,456 - INFO - Use pytorch device_name: cuda
2024-12-03 06:21:26,456 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-12-03 06:21:28,753 - BERTopic - Embedding - Completed ✓
2024-12-03 06:21:28,755 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:21:28,779 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:21:28,780 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:21:28,786 - BERTopic - Cluster - Completed ✓
2024-12-03 06:21:28,788 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:21:29,347 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-12-03 06:21:29,385 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:21:29,386 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:21:29,387 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:21:29,388 - BERTopic - Cluster - Completed ✓


2024-12-03 06:21:29,388 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:21:29,389 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:21:29,399 - INFO - Step 5: Generating reports...
2024-12-03 06:21:29,401 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:21:29,589 - INFO - Saved STIX file: Results_Topic_Para/APT37.blog/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:21:29,591 - INFO - Saved STIX file: Results_Topic_Para/APT37.blog/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:21:29,592 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:21:29,599 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:21:29,612 - INFO - Saved STIX file: Results_Topic_Para/APT37.blog/topic_1/chunk_1/prediction_1.json
2024-12-03 06:21:29,613 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 06:21:29,800 - INFO - Saved STIX file: Results_Topic_Para/APT37.blog/topic_2/chunk_1/prediction_1.json
2024-12-03 06:21:29,800 - INFO -

2024-12-03 06:21:30,690 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:21:30,692 - INFO - Use pytorch device_name: cuda
2024-12-03 06:21:30,693 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-12-03 06:21:32,951 - BERTopic - Embedding - Completed ✓
2024-12-03 06:21:32,952 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:21:32,971 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:21:32,972 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:21:32,976 - BERTopic - Cluster - Completed ✓
2024-12-03 06:21:32,978 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:21:33,402 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-12-03 06:21:33,438 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:21:33,439 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:21:33,439 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:21:33,440 - BERTopic - Cluster - Completed ✓


2024-12-03 06:21:33,441 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:21:33,441 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:21:33,447 - INFO - Step 5: Generating reports...
2024-12-03 06:21:33,448 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:21:33,524 - INFO - Saved STIX file: Results_Topic_Para/Dust_Storm_Infographic_v4/topic_0/chunk_1/prediction_1.json
2024-12-03 06:21:33,526 - INFO - Saved STIX file: Results_Topic_Para/Dust_Storm_Infographic_v4/topic_0/chunk_1/prediction_2.json
2024-12-03 06:21:33,527 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:21:33,540 - INFO - Saved STIX file: Results_Topic_Para/Dust_Storm_Infographic_v4/topic_1/chunk_1/prediction_1.json
2024-12-03 06:21:33,542 - INFO - Saved STIX file: Results_Topic_Para/Dust_Storm_Infographic_v4/topic_1/chunk_1/prediction_2.json
2024-12-03 06:21:33,543 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 06:21:33,654 - INFO - Saved STIX

2024-12-03 06:21:35,982 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:21:35,984 - INFO - Use pytorch device_name: cuda
2024-12-03 06:21:35,985 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/16 [00:00<?, ?it/s]

2024-12-03 06:21:38,318 - BERTopic - Embedding - Completed ✓
2024-12-03 06:21:38,320 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:21:38,887 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:21:38,888 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:21:38,927 - BERTopic - Cluster - Completed ✓
2024-12-03 06:21:38,930 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:21:48,671 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/16 [00:00<?, ?it/s]

2024-12-03 06:21:49,217 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:21:49,219 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:21:49,220 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:21:49,232 - BERTopic - Cluster - Completed ✓


2024-12-03 06:21:49,233 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:21:49,234 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:21:49,419 - INFO - Step 5: Generating reports...
2024-12-03 06:21:49,421 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:21:49,440 - INFO - Saved STIX file: Results_Topic_Para/Bartholomew-GuerreroSaade-VB2016/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:21:49,441 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:21:49,448 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:21:49,715 - INFO - Saved STIX file: Results_Topic_Para/Bartholomew-GuerreroSaade-VB2016/topic_1/chunk_1/prediction_1.json
2024-12-03 06:21:49,716 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 06:21:49,731 - INFO - Saved STIX file: Results_Topic_Para/Bartholomew-GuerreroSaade-VB2016/topic_2/chunk_1/prediction_1.json
2024-12-03 06:21:49,733 - INFO - Processing chunk 1/1 of topic 3...
2024-12-03 06

2024-12-03 06:22:00,188 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:22:00,191 - INFO - Use pytorch device_name: cuda
2024-12-03 06:22:00,191 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

2024-12-03 06:22:02,783 - BERTopic - Embedding - Completed ✓
2024-12-03 06:22:02,783 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:22:02,896 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:22:02,897 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:22:02,908 - BERTopic - Cluster - Completed ✓
2024-12-03 06:22:02,911 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:22:05,536 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

2024-12-03 06:22:05,708 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:22:05,709 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:22:05,710 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:22:05,713 - BERTopic - Cluster - Completed ✓


2024-12-03 06:22:05,713 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:22:05,714 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:22:05,762 - INFO - Step 5: Generating reports...
2024-12-03 06:22:05,763 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:22:05,887 - INFO - Saved STIX file: Results_Topic_Para/callisto-group/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:22:05,888 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:22:05,901 - INFO - Saved STIX file: Results_Topic_Para/callisto-group/topic_0/chunk_1/prediction_1.json
2024-12-03 06:22:05,902 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:22:05,994 - INFO - Saved STIX file: Results_Topic_Para/callisto-group/topic_1/chunk_1/prediction_1.json
2024-12-03 06:22:05,997 - INFO - Saved STIX file: Results_Topic_Para/callisto-group/topic_1/chunk_1/prediction_2.json
2024-12-03 06:22:05,999 - INFO - Saved STIX file: Results_Topic_Para/callisto-group/t

2024-12-03 06:22:09,078 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:22:09,080 - INFO - Use pytorch device_name: cuda
2024-12-03 06:22:09,080 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2024-12-03 06:22:11,290 - BERTopic - Embedding - Completed ✓
2024-12-03 06:22:11,291 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:22:11,352 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:22:11,353 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:22:11,360 - BERTopic - Cluster - Completed ✓
2024-12-03 06:22:11,363 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:22:12,869 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2024-12-03 06:22:12,975 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:22:12,979 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:22:12,980 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:22:12,986 - BERTopic - Cluster - Completed ✓


2024-12-03 06:22:12,987 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:22:12,988 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:22:13,034 - INFO - Step 5: Generating reports...
2024-12-03 06:22:13,035 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:22:13,134 - INFO - Saved STIX file: Results_Topic_Para/Musical Chairs Playing Tetris/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:22:13,135 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:22:13,176 - INFO - Saved STIX file: Results_Topic_Para/Musical Chairs Playing Tetris/topic_0/chunk_1/prediction_1.json
2024-12-03 06:22:13,187 - INFO - Saved STIX file: Results_Topic_Para/Musical Chairs Playing Tetris/topic_0/chunk_1/prediction_2.json
2024-12-03 06:22:13,197 - INFO - Saved STIX file: Results_Topic_Para/Musical Chairs Playing Tetris/topic_0/chunk_1/prediction_3.json
2024-12-03 06:22:13,199 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:22:13,228 -

2024-12-03 06:22:16,648 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:22:16,651 - INFO - Use pytorch device_name: cuda
2024-12-03 06:22:16,651 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

2024-12-03 06:22:18,842 - BERTopic - Embedding - Completed ✓
2024-12-03 06:22:18,842 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:22:18,963 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:22:18,964 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:22:18,976 - BERTopic - Cluster - Completed ✓
2024-12-03 06:22:18,978 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:22:22,070 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

2024-12-03 06:22:22,268 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:22:22,269 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:22:22,269 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:22:22,272 - BERTopic - Cluster - Completed ✓


2024-12-03 06:22:22,273 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:22:22,274 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:22:22,343 - INFO - Step 5: Generating reports...
2024-12-03 06:22:22,344 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:22:22,363 - INFO - Saved STIX file: Results_Topic_Para/BlueDelta Exploits Ukrainian Government Roundcube Mail Servers to Support Espionage Activities/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:22:22,368 - INFO - Saved STIX file: Results_Topic_Para/BlueDelta Exploits Ukrainian Government Roundcube Mail Servers to Support Espionage Activities/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:22:22,369 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:22:22,390 - INFO - Saved STIX file: Results_Topic_Para/BlueDelta Exploits Ukrainian Government Roundcube Mail Servers to Support Espionage Activities/topic_0/chunk_1/prediction_1.json
2024-12-03 06:22:22,396 - INFO

2024-12-03 06:22:27,324 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:22:27,326 - INFO - Use pytorch device_name: cuda
2024-12-03 06:22:27,326 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/23 [00:00<?, ?it/s]

2024-12-03 06:22:29,715 - BERTopic - Embedding - Completed ✓
2024-12-03 06:22:29,716 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:22:30,664 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:22:30,665 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:22:30,723 - BERTopic - Cluster - Completed ✓
2024-12-03 06:22:30,726 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:22:44,529 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/23 [00:00<?, ?it/s]

2024-12-03 06:22:45,329 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:22:45,332 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:22:45,333 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:22:45,351 - BERTopic - Cluster - Completed ✓


2024-12-03 06:22:45,353 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:22:45,354 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:22:45,632 - INFO - Step 5: Generating reports...
2024-12-03 06:22:45,633 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:22:45,647 - INFO - Saved STIX file: Results_Topic_Para/ESET_Threat_Report_Q32020/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:22:45,649 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:22:45,664 - INFO - Saved STIX file: Results_Topic_Para/ESET_Threat_Report_Q32020/topic_0/chunk_1/prediction_1.json
2024-12-03 06:22:45,665 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:22:45,862 - INFO - Saved STIX file: Results_Topic_Para/ESET_Threat_Report_Q32020/topic_1/chunk_1/prediction_1.json
2024-12-03 06:22:45,863 - INFO - Saved STIX file: Results_Topic_Para/ESET_Threat_Report_Q32020/topic_1/chunk_1/prediction_2.json
2024-12-03 06:22:45,864 - INFO - Processi

2024-12-03 06:22:59,300 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:22:59,302 - INFO - Use pytorch device_name: cuda
2024-12-03 06:22:59,302 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/8 [00:00<?, ?it/s]

2024-12-03 06:23:01,537 - BERTopic - Embedding - Completed ✓
2024-12-03 06:23:01,538 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:23:01,969 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:23:01,970 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:23:01,988 - BERTopic - Cluster - Completed ✓
2024-12-03 06:23:01,990 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:23:06,057 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/8 [00:00<?, ?it/s]

2024-12-03 06:23:06,369 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:23:06,372 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:23:06,372 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:23:06,378 - BERTopic - Cluster - Completed ✓


2024-12-03 06:23:06,379 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:23:06,379 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:23:06,483 - INFO - Step 5: Generating reports...
2024-12-03 06:23:06,483 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:23:06,561 - INFO - Saved STIX file: Results_Topic_Para/xHunt Campaign_ New BumbleBee Webshell and SSH Tunnels Used for Lateral Movement/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:23:06,566 - INFO - Saved STIX file: Results_Topic_Para/xHunt Campaign_ New BumbleBee Webshell and SSH Tunnels Used for Lateral Movement/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:23:06,567 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:23:06,579 - INFO - Saved STIX file: Results_Topic_Para/xHunt Campaign_ New BumbleBee Webshell and SSH Tunnels Used for Lateral Movement/topic_0/chunk_1/prediction_1.json
2024-12-03 06:23:06,579 - INFO - Processing chunk 1/1 of topic 1...
2024

2024-12-03 06:23:12,890 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:23:12,893 - INFO - Use pytorch device_name: cuda
2024-12-03 06:23:12,894 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2024-12-03 06:23:15,090 - BERTopic - Embedding - Completed ✓
2024-12-03 06:23:15,091 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:23:15,168 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:23:15,168 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:23:15,178 - BERTopic - Cluster - Completed ✓
2024-12-03 06:23:15,180 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:23:16,970 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2024-12-03 06:23:17,096 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:23:17,098 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:23:17,098 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:23:17,101 - BERTopic - Cluster - Completed ✓


2024-12-03 06:23:17,101 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:23:17,102 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:23:17,139 - INFO - Step 5: Generating reports...
2024-12-03 06:23:17,139 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:23:17,278 - INFO - Saved STIX file: Results_Topic_Para/Iranian Fileless Attack Infiltrates Israeli Organizations/topic_0/chunk_1/prediction_1.json
2024-12-03 06:23:17,280 - INFO - Saved STIX file: Results_Topic_Para/Iranian Fileless Attack Infiltrates Israeli Organizations/topic_0/chunk_1/prediction_2.json
2024-12-03 06:23:17,280 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:23:17,319 - INFO - Saved STIX file: Results_Topic_Para/Iranian Fileless Attack Infiltrates Israeli Organizations/topic_1/chunk_1/prediction_1.json
2024-12-03 06:23:17,320 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 06:23:17,338 - INFO - Saved STIX file: Results_Topic_Para/Iranian

2024-12-03 06:23:19,012 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:23:19,014 - INFO - Use pytorch device_name: cuda
2024-12-03 06:23:19,014 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-12-03 06:23:21,213 - BERTopic - Embedding - Completed ✓
2024-12-03 06:23:21,216 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:23:21,241 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:23:21,242 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:23:21,245 - BERTopic - Cluster - Completed ✓
2024-12-03 06:23:21,249 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:23:21,457 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-12-03 06:23:21,483 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:23:21,485 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:23:21,485 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:23:21,486 - BERTopic - Cluster - Completed ✓


2024-12-03 06:23:21,489 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:23:21,490 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:23:21,491 - INFO - Step 5: Generating reports...
2024-12-03 06:23:21,492 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:23:21,604 - INFO - Saved STIX file: Results_Topic_Para/US23-Heiding-Devicing-and-Detecting-Phishing/topic_0/chunk_1/prediction_1.json
2024-12-03 06:23:21,605 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:23:21,797 - INFO - Saved STIX file: Results_Topic_Para/US23-Heiding-Devicing-and-Detecting-Phishing/topic_1/chunk_1/prediction_1.json
2024-12-03 06:23:21,799 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 06:23:21,921 - INFO - Saved STIX file: Results_Topic_Para/US23-Heiding-Devicing-and-Detecting-Phishing/topic_2/chunk_1/prediction_1.json
2024-12-03 06:23:21,922 - INFO - Processing chunk 1/1 of topic 3...
2024-12-03 06:23:22,062 - INFO - Saved STIX fil

2024-12-03 06:23:23,379 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:23:23,382 - INFO - Use pytorch device_name: cuda
2024-12-03 06:23:23,382 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

2024-12-03 06:23:25,692 - BERTopic - Embedding - Completed ✓
2024-12-03 06:23:25,693 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:23:25,848 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:23:25,848 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:23:25,864 - BERTopic - Cluster - Completed ✓
2024-12-03 06:23:25,867 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:23:30,294 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

2024-12-03 06:23:30,591 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:23:30,592 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:23:30,593 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:23:30,597 - BERTopic - Cluster - Completed ✓


2024-12-03 06:23:30,597 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:23:30,598 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:23:30,703 - INFO - Step 5: Generating reports...
2024-12-03 06:23:30,704 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:23:30,737 - INFO - Saved STIX file: Results_Topic_Para/RedFoxtrot_group/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:23:30,738 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:23:30,836 - INFO - Saved STIX file: Results_Topic_Para/RedFoxtrot_group/topic_0/chunk_1/prediction_1.json
2024-12-03 06:23:30,837 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:23:30,949 - INFO - Saved STIX file: Results_Topic_Para/RedFoxtrot_group/topic_1/chunk_1/prediction_1.json
2024-12-03 06:23:30,949 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 06:23:31,036 - INFO - Saved STIX file: Results_Topic_Para/RedFoxtrot_group/topic_2/chunk_1/prediction_1.json
2024-12-0

2024-12-03 06:23:36,738 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:23:36,741 - INFO - Use pytorch device_name: cuda
2024-12-03 06:23:36,741 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:23:38,979 - BERTopic - Embedding - Completed ✓
2024-12-03 06:23:38,980 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:23:39,080 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:23:39,081 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:23:39,091 - BERTopic - Cluster - Completed ✓
2024-12-03 06:23:39,093 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:23:41,703 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:23:41,872 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:23:41,874 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:23:41,875 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:23:41,878 - BERTopic - Cluster - Completed ✓


2024-12-03 06:23:41,879 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:23:41,879 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:23:41,942 - INFO - Step 5: Generating reports...
2024-12-03 06:23:41,942 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:23:42,044 - INFO - Saved STIX file: Results_Topic_Para/cta-cn-2024-0624/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:23:42,045 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:23:42,107 - INFO - Saved STIX file: Results_Topic_Para/cta-cn-2024-0624/topic_0/chunk_1/prediction_1.json
2024-12-03 06:23:42,109 - INFO - Saved STIX file: Results_Topic_Para/cta-cn-2024-0624/topic_0/chunk_1/prediction_2.json
2024-12-03 06:23:42,110 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:23:42,304 - INFO - Saved STIX file: Results_Topic_Para/cta-cn-2024-0624/topic_1/chunk_1/prediction_1.json
2024-12-03 06:23:42,304 - INFO - Processing chunk 1/1 of topic 2...
2024-12-0

2024-12-03 06:23:46,199 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:23:46,202 - INFO - Use pytorch device_name: cuda
2024-12-03 06:23:46,202 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

2024-12-03 06:23:48,513 - BERTopic - Embedding - Completed ✓
2024-12-03 06:23:48,514 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:23:48,639 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:23:48,640 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:23:48,653 - BERTopic - Cluster - Completed ✓
2024-12-03 06:23:48,655 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:23:52,310 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

2024-12-03 06:23:52,539 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:23:52,540 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:23:52,541 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:23:52,544 - BERTopic - Cluster - Completed ✓


2024-12-03 06:23:52,545 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:23:52,545 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:23:52,635 - INFO - Step 5: Generating reports...
2024-12-03 06:23:52,636 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:23:52,657 - INFO - Saved STIX file: Results_Topic_Para/Iron Tiger APT Updates Toolkit With Evolved SysUpdate Malware/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:23:52,658 - INFO - Saved STIX file: Results_Topic_Para/Iron Tiger APT Updates Toolkit With Evolved SysUpdate Malware/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:23:52,660 - INFO - Saved STIX file: Results_Topic_Para/Iron Tiger APT Updates Toolkit With Evolved SysUpdate Malware/topic_-1/chunk_1/prediction_3.json
2024-12-03 06:23:52,660 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:23:52,693 - INFO - Saved STIX file: Results_Topic_Para/Iron Tiger APT Updates Toolkit With Evolved SysUpdate Malwa

2024-12-03 06:23:59,111 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:23:59,113 - INFO - Use pytorch device_name: cuda
2024-12-03 06:23:59,114 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/14 [00:00<?, ?it/s]

2024-12-03 06:24:01,448 - BERTopic - Embedding - Completed ✓
2024-12-03 06:24:01,449 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:24:01,911 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:24:01,912 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:24:01,946 - BERTopic - Cluster - Completed ✓
2024-12-03 06:24:01,949 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:24:10,856 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/14 [00:00<?, ?it/s]

2024-12-03 06:24:11,382 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:24:11,384 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:24:11,385 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:24:11,394 - BERTopic - Cluster - Completed ✓


2024-12-03 06:24:11,395 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:24:11,395 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:24:11,579 - INFO - Step 5: Generating reports...
2024-12-03 06:24:11,580 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:24:11,594 - INFO - Saved STIX file: Results_Topic_Para/rpt-apt38-2018-web_v4/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:24:11,596 - INFO - Saved STIX file: Results_Topic_Para/rpt-apt38-2018-web_v4/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:24:11,596 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:24:11,609 - INFO - Saved STIX file: Results_Topic_Para/rpt-apt38-2018-web_v4/topic_0/chunk_1/prediction_1.json
2024-12-03 06:24:11,610 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:24:11,708 - INFO - Saved STIX file: Results_Topic_Para/rpt-apt38-2018-web_v4/topic_1/chunk_1/prediction_1.json
2024-12-03 06:24:11,709 - INFO - Processing chunk 1/1 of

2024-12-03 06:24:23,975 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:24:23,977 - INFO - Use pytorch device_name: cuda
2024-12-03 06:24:23,977 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/34 [00:00<?, ?it/s]

2024-12-03 06:24:26,606 - BERTopic - Embedding - Completed ✓
2024-12-03 06:24:26,607 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:24:28,389 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:24:28,390 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:24:28,474 - BERTopic - Cluster - Completed ✓
2024-12-03 06:24:28,478 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:24:46,769 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/34 [00:00<?, ?it/s]

2024-12-03 06:24:47,977 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:24:47,980 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:24:47,981 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:24:48,003 - BERTopic - Cluster - Completed ✓


2024-12-03 06:24:48,004 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:24:48,005 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:24:48,440 - INFO - Step 5: Generating reports...
2024-12-03 06:24:48,441 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:24:48,464 - INFO - Saved STIX file: Results_Topic_Para/Kaspersky_Lab_crouching_yeti_appendixes_eng_final/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:24:48,465 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:24:48,480 - INFO - Saved STIX file: Results_Topic_Para/Kaspersky_Lab_crouching_yeti_appendixes_eng_final/topic_0/chunk_1/prediction_1.json
2024-12-03 06:24:48,480 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:24:48,507 - INFO - Saved STIX file: Results_Topic_Para/Kaspersky_Lab_crouching_yeti_appendixes_eng_final/topic_1/chunk_1/prediction_1.json
2024-12-03 06:24:48,507 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 06:24:48,608 - INFO

2024-12-03 06:25:12,525 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:25:12,528 - INFO - Use pytorch device_name: cuda
2024-12-03 06:25:12,529 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/9 [00:00<?, ?it/s]

2024-12-03 06:25:14,757 - BERTopic - Embedding - Completed ✓
2024-12-03 06:25:14,758 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:25:15,098 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:25:15,099 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:25:15,120 - BERTopic - Cluster - Completed ✓
2024-12-03 06:25:15,122 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:25:19,176 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/9 [00:00<?, ?it/s]

2024-12-03 06:25:19,429 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:25:19,430 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:25:19,431 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:25:19,437 - BERTopic - Cluster - Completed ✓


2024-12-03 06:25:19,437 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:25:19,438 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:25:19,497 - INFO - Step 5: Generating reports...
2024-12-03 06:25:19,498 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:25:19,698 - INFO - Saved STIX file: Results_Topic_Para/20151028_codeblue_apt-en/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:25:19,699 - INFO - Saved STIX file: Results_Topic_Para/20151028_codeblue_apt-en/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:25:19,700 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:25:19,834 - INFO - Saved STIX file: Results_Topic_Para/20151028_codeblue_apt-en/topic_0/chunk_1/prediction_1.json
2024-12-03 06:25:19,836 - INFO - Saved STIX file: Results_Topic_Para/20151028_codeblue_apt-en/topic_0/chunk_1/prediction_2.json
2024-12-03 06:25:19,837 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:25:19,975 - INFO - Saved STIX 

2024-12-03 06:25:28,255 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:25:28,258 - INFO - Use pytorch device_name: cuda
2024-12-03 06:25:28,261 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/16 [00:00<?, ?it/s]

2024-12-03 06:25:30,588 - BERTopic - Embedding - Completed ✓
2024-12-03 06:25:30,590 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:25:31,125 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:25:31,125 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:25:31,164 - BERTopic - Cluster - Completed ✓
2024-12-03 06:25:31,166 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:25:41,100 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/16 [00:00<?, ?it/s]

2024-12-03 06:25:41,683 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:25:41,687 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:25:41,687 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:25:41,701 - BERTopic - Cluster - Completed ✓


2024-12-03 06:25:41,702 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:25:41,702 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:25:41,907 - INFO - Step 5: Generating reports...
2024-12-03 06:25:41,908 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:25:42,026 - INFO - Saved STIX file: Results_Topic_Para/ICS_eng/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:25:42,027 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:25:42,045 - INFO - Saved STIX file: Results_Topic_Para/ICS_eng/topic_0/chunk_1/prediction_1.json
2024-12-03 06:25:42,045 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:25:42,053 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 06:25:42,067 - INFO - Saved STIX file: Results_Topic_Para/ICS_eng/topic_2/chunk_1/prediction_1.json
2024-12-03 06:25:42,068 - INFO - Processing chunk 1/1 of topic 3...
2024-12-03 06:25:42,166 - INFO - Saved STIX file: Results_Topic_Para/ICS_eng/topic_3/chu

2024-12-03 06:25:54,596 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:25:54,598 - INFO - Use pytorch device_name: cuda
2024-12-03 06:25:54,599 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

2024-12-03 06:25:56,812 - BERTopic - Embedding - Completed ✓
2024-12-03 06:25:56,813 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:25:56,939 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:25:56,939 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:25:56,953 - BERTopic - Cluster - Completed ✓
2024-12-03 06:25:56,955 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:25:59,929 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

2024-12-03 06:26:00,122 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:26:00,123 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:26:00,124 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:26:00,128 - BERTopic - Cluster - Completed ✓


2024-12-03 06:26:00,129 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:26:00,129 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:26:00,183 - INFO - Step 5: Generating reports...
2024-12-03 06:26:00,184 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:26:00,297 - INFO - Saved STIX file: Results_Topic_Para/FTA 1007 - Shamoon/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:26:00,298 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:26:00,317 - INFO - Saved STIX file: Results_Topic_Para/FTA 1007 - Shamoon/topic_0/chunk_1/prediction_1.json
2024-12-03 06:26:00,318 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:26:00,477 - INFO - Saved STIX file: Results_Topic_Para/FTA 1007 - Shamoon/topic_1/chunk_1/prediction_1.json
2024-12-03 06:26:00,479 - INFO - Saved STIX file: Results_Topic_Para/FTA 1007 - Shamoon/topic_1/chunk_1/prediction_2.json
2024-12-03 06:26:00,480 - INFO - Processing chunk 1/1 of topic 2...
2

2024-12-03 06:26:05,801 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:26:05,804 - INFO - Use pytorch device_name: cuda
2024-12-03 06:26:05,805 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/16 [00:00<?, ?it/s]

2024-12-03 06:26:08,118 - BERTopic - Embedding - Completed ✓
2024-12-03 06:26:08,119 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:26:08,658 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:26:08,659 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:26:08,697 - BERTopic - Cluster - Completed ✓
2024-12-03 06:26:08,700 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:26:18,757 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/16 [00:00<?, ?it/s]

2024-12-03 06:26:19,307 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:26:19,309 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:26:19,309 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:26:19,320 - BERTopic - Cluster - Completed ✓


2024-12-03 06:26:19,321 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:26:19,322 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:26:19,499 - INFO - Step 5: Generating reports...
2024-12-03 06:26:19,501 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:26:19,509 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:26:19,524 - INFO - Saved STIX file: Results_Topic_Para/2022-Blockchain-Security-and-AML-Analysis-Annual-Report(EN)/topic_0/chunk_1/prediction_1.json
2024-12-03 06:26:19,525 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:26:19,531 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 06:26:19,546 - INFO - Saved STIX file: Results_Topic_Para/2022-Blockchain-Security-and-AML-Analysis-Annual-Report(EN)/topic_2/chunk_1/prediction_1.json
2024-12-03 06:26:19,547 - INFO - Saved STIX file: Results_Topic_Para/2022-Blockchain-Security-and-AML-Analysis-Annual-Report(EN)/topic_2/chunk_1/prediction_2.json
2

2024-12-03 06:26:31,211 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:26:31,213 - INFO - Use pytorch device_name: cuda
2024-12-03 06:26:31,214 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/10 [00:00<?, ?it/s]

2024-12-03 06:26:33,605 - BERTopic - Embedding - Completed ✓
2024-12-03 06:26:33,606 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:26:33,906 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:26:33,907 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:26:33,930 - BERTopic - Cluster - Completed ✓
2024-12-03 06:26:33,934 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:26:38,872 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/10 [00:00<?, ?it/s]

2024-12-03 06:26:39,189 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:26:39,192 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:26:39,192 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:26:39,200 - BERTopic - Cluster - Completed ✓


2024-12-03 06:26:39,201 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:26:39,202 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:26:39,326 - INFO - Step 5: Generating reports...
2024-12-03 06:26:39,327 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:26:39,429 - INFO - Saved STIX file: Results_Topic_Para/20231013_Lazarus_OP.Dream_Magic/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:26:39,430 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:26:39,506 - INFO - Saved STIX file: Results_Topic_Para/20231013_Lazarus_OP.Dream_Magic/topic_0/chunk_1/prediction_1.json
2024-12-03 06:26:39,507 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:26:39,540 - INFO - Saved STIX file: Results_Topic_Para/20231013_Lazarus_OP.Dream_Magic/topic_1/chunk_1/prediction_1.json
2024-12-03 06:26:39,542 - INFO - Saved STIX file: Results_Topic_Para/20231013_Lazarus_OP.Dream_Magic/topic_1/chunk_1/prediction_2.json
2024-12-03 06:26:

2024-12-03 06:26:49,912 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:26:49,914 - INFO - Use pytorch device_name: cuda
2024-12-03 06:26:49,914 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:26:52,162 - BERTopic - Embedding - Completed ✓
2024-12-03 06:26:52,162 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:26:52,260 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:26:52,261 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:26:52,272 - BERTopic - Cluster - Completed ✓
2024-12-03 06:26:52,274 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:26:54,765 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:26:54,921 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:26:54,922 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:26:54,923 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:26:54,926 - BERTopic - Cluster - Completed ✓


2024-12-03 06:26:54,927 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:26:54,928 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:26:54,972 - INFO - Step 5: Generating reports...
2024-12-03 06:26:54,972 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:26:55,032 - INFO - Saved STIX file: Results_Topic_Para/Sahel-Gabon-Coup-Playbook-PDF/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:26:55,036 - INFO - Saved STIX file: Results_Topic_Para/Sahel-Gabon-Coup-Playbook-PDF/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:26:55,037 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:26:55,050 - INFO - Saved STIX file: Results_Topic_Para/Sahel-Gabon-Coup-Playbook-PDF/topic_0/chunk_1/prediction_1.json
2024-12-03 06:26:55,051 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:26:55,192 - INFO - Saved STIX file: Results_Topic_Para/Sahel-Gabon-Coup-Playbook-PDF/topic_1/chunk_1/prediction_1.json
2024-12-03 06:26:55,194 

2024-12-03 06:26:56,827 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:26:56,829 - INFO - Use pytorch device_name: cuda
2024-12-03 06:26:56,830 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 06:26:59,046 - BERTopic - Embedding - Completed ✓
2024-12-03 06:26:59,048 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:26:59,103 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:26:59,103 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:26:59,111 - BERTopic - Cluster - Completed ✓
2024-12-03 06:26:59,113 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:27:00,473 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 06:27:00,576 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:27:00,577 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:27:00,578 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:27:00,580 - BERTopic - Cluster - Completed ✓


2024-12-03 06:27:00,581 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:27:00,582 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:27:00,614 - INFO - Step 5: Generating reports...
2024-12-03 06:27:00,615 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:27:00,662 - INFO - Saved STIX file: Results_Topic_Para/Inception Attackers Target Europe with Year-old Office Vulnerability/topic_0/chunk_1/prediction_1.json
2024-12-03 06:27:00,664 - INFO - Saved STIX file: Results_Topic_Para/Inception Attackers Target Europe with Year-old Office Vulnerability/topic_0/chunk_1/prediction_2.json
2024-12-03 06:27:00,664 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:27:00,778 - INFO - Saved STIX file: Results_Topic_Para/Inception Attackers Target Europe with Year-old Office Vulnerability/topic_1/chunk_1/prediction_1.json
2024-12-03 06:27:00,779 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 06:27:01,006 - INFO - Saved STIX

2024-12-03 06:27:03,086 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:27:03,088 - INFO - Use pytorch device_name: cuda
2024-12-03 06:27:03,089 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:27:05,277 - BERTopic - Embedding - Completed ✓
2024-12-03 06:27:05,278 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:27:05,375 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:27:05,376 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:27:05,388 - BERTopic - Cluster - Completed ✓
2024-12-03 06:27:05,391 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:27:08,120 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:27:08,307 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:27:08,308 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:27:08,309 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:27:08,312 - BERTopic - Cluster - Completed ✓


2024-12-03 06:27:08,313 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:27:08,313 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:27:08,370 - INFO - Step 5: Generating reports...
2024-12-03 06:27:08,371 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:27:08,388 - INFO - Saved STIX file: Results_Topic_Para/Evil Eye Threat Actor Resurfaces with iOS Exploit and Updated Implant/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:27:08,389 - INFO - Saved STIX file: Results_Topic_Para/Evil Eye Threat Actor Resurfaces with iOS Exploit and Updated Implant/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:27:08,390 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:27:08,418 - INFO - Saved STIX file: Results_Topic_Para/Evil Eye Threat Actor Resurfaces with iOS Exploit and Updated Implant/topic_0/chunk_1/prediction_1.json
2024-12-03 06:27:08,418 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:27:08,484 - INFO - Save

2024-12-03 06:27:12,089 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:27:12,091 - INFO - Use pytorch device_name: cuda
2024-12-03 06:27:12,091 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/7 [00:00<?, ?it/s]

2024-12-03 06:27:14,339 - BERTopic - Embedding - Completed ✓
2024-12-03 06:27:14,340 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:27:14,533 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:27:14,534 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:27:14,552 - BERTopic - Cluster - Completed ✓
2024-12-03 06:27:14,557 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:27:18,770 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/7 [00:00<?, ?it/s]

2024-12-03 06:27:19,064 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:27:19,067 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:27:19,067 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:27:19,072 - BERTopic - Cluster - Completed ✓


2024-12-03 06:27:19,073 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:27:19,073 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:27:19,174 - INFO - Step 5: Generating reports...
2024-12-03 06:27:19,175 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:27:19,300 - INFO - Saved STIX file: Results_Topic_Para/THOR_ Previously Unseen PlugX Variant Deployed During Microsoft Exchange Server Attacks by PKPLUG Group/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:27:19,301 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:27:19,436 - INFO - Saved STIX file: Results_Topic_Para/THOR_ Previously Unseen PlugX Variant Deployed During Microsoft Exchange Server Attacks by PKPLUG Group/topic_0/chunk_1/prediction_1.json
2024-12-03 06:27:19,437 - INFO - Saved STIX file: Results_Topic_Para/THOR_ Previously Unseen PlugX Variant Deployed During Microsoft Exchange Server Attacks by PKPLUG Group/topic_0/chunk_1/prediction_2.json
2024

2024-12-03 06:27:34,585 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:27:34,588 - INFO - Use pytorch device_name: cuda
2024-12-03 06:27:34,589 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/99 [00:00<?, ?it/s]

2024-12-03 06:27:37,953 - BERTopic - Embedding - Completed ✓
2024-12-03 06:27:37,955 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:27:48,882 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:27:48,883 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:27:49,187 - BERTopic - Cluster - Completed ✓
2024-12-03 06:27:49,191 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:28:55,936 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/99 [00:00<?, ?it/s]

2024-12-03 06:28:59,566 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:28:59,574 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:28:59,574 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:28:59,642 - BERTopic - Cluster - Completed ✓


2024-12-03 06:28:59,644 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:28:59,645 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:29:01,334 - INFO - Step 5: Generating reports...
2024-12-03 06:29:01,335 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:29:01,488 - INFO - Saved STIX file: Results_Topic_Para/A_Threat_Actor_Encyclopedia/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:29:01,496 - INFO - Saved STIX file: Results_Topic_Para/A_Threat_Actor_Encyclopedia/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:29:01,497 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:29:01,505 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:29:01,512 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 06:29:01,629 - INFO - Saved STIX file: Results_Topic_Para/A_Threat_Actor_Encyclopedia/topic_2/chunk_1/prediction_1.json
2024-12-03 06:29:01,629 - INFO - Processing chunk 1/1 of topic 3...
2024-12-03 06:29:01,654 - I

2024-12-03 06:29:51,841 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:29:51,844 - INFO - Use pytorch device_name: cuda
2024-12-03 06:29:51,844 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

2024-12-03 06:29:54,273 - BERTopic - Embedding - Completed ✓
2024-12-03 06:29:54,273 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:29:54,388 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:29:54,389 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:29:54,402 - BERTopic - Cluster - Completed ✓
2024-12-03 06:29:54,404 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:29:57,187 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

2024-12-03 06:29:57,365 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:29:57,366 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:29:57,367 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:29:57,370 - BERTopic - Cluster - Completed ✓


2024-12-03 06:29:57,371 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:29:57,371 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:29:57,425 - INFO - Step 5: Generating reports...
2024-12-03 06:29:57,426 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:29:57,441 - INFO - Saved STIX file: Results_Topic_Para/ChessMaster Adds Updated Tools to Its Arsenal/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:29:57,442 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:29:57,455 - INFO - Saved STIX file: Results_Topic_Para/ChessMaster Adds Updated Tools to Its Arsenal/topic_0/chunk_1/prediction_1.json
2024-12-03 06:29:57,456 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:29:57,472 - INFO - Saved STIX file: Results_Topic_Para/ChessMaster Adds Updated Tools to Its Arsenal/topic_1/chunk_1/prediction_1.json
2024-12-03 06:29:57,478 - INFO - Saved STIX file: Results_Topic_Para/ChessMaster Adds Updated Tools to Its Ars

2024-12-03 06:30:00,632 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:30:00,635 - INFO - Use pytorch device_name: cuda
2024-12-03 06:30:00,637 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:30:02,934 - BERTopic - Embedding - Completed ✓
2024-12-03 06:30:02,935 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:30:03,028 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:30:03,028 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:30:03,040 - BERTopic - Cluster - Completed ✓
2024-12-03 06:30:03,043 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:30:05,527 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:30:05,700 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:30:05,701 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:30:05,702 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:30:05,705 - BERTopic - Cluster - Completed ✓


2024-12-03 06:30:05,706 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:30:05,706 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:30:05,788 - INFO - Step 5: Generating reports...
2024-12-03 06:30:05,789 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:30:05,927 - INFO - Saved STIX file: Results_Topic_Para/[S2W LAB] Analysis of Clop Ransomware suspiciously related to the Recent Incident (English)/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:30:05,931 - INFO - Saved STIX file: Results_Topic_Para/[S2W LAB] Analysis of Clop Ransomware suspiciously related to the Recent Incident (English)/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:30:05,932 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:30:06,043 - INFO - Saved STIX file: Results_Topic_Para/[S2W LAB] Analysis of Clop Ransomware suspiciously related to the Recent Incident (English)/topic_0/chunk_1/prediction_1.json
2024-12-03 06:30:06,044 - INFO - Proces

2024-12-03 06:30:10,545 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:30:10,548 - INFO - Use pytorch device_name: cuda
2024-12-03 06:30:10,549 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/9 [00:00<?, ?it/s]

2024-12-03 06:30:13,944 - BERTopic - Embedding - Completed ✓
2024-12-03 06:30:13,945 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:30:14,204 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:30:14,205 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:30:14,229 - BERTopic - Cluster - Completed ✓
2024-12-03 06:30:14,232 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:30:20,052 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/9 [00:00<?, ?it/s]

2024-12-03 06:30:20,405 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:30:20,407 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:30:20,407 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:30:20,413 - BERTopic - Cluster - Completed ✓


2024-12-03 06:30:20,414 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:30:20,415 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:30:20,539 - INFO - Step 5: Generating reports...
2024-12-03 06:30:20,540 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:30:20,588 - INFO - Saved STIX file: Results_Topic_Para/swift_bae_report_Follow-The Money/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:30:20,589 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:30:20,602 - INFO - Saved STIX file: Results_Topic_Para/swift_bae_report_Follow-The Money/topic_0/chunk_1/prediction_1.json
2024-12-03 06:30:20,603 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:30:20,619 - INFO - Saved STIX file: Results_Topic_Para/swift_bae_report_Follow-The Money/topic_1/chunk_1/prediction_1.json
2024-12-03 06:30:20,621 - INFO - Saved STIX file: Results_Topic_Para/swift_bae_report_Follow-The Money/topic_1/chunk_1/prediction_2.json
2024-12-0

2024-12-03 06:30:27,076 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:30:27,079 - INFO - Use pytorch device_name: cuda
2024-12-03 06:30:27,080 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/8 [00:00<?, ?it/s]

2024-12-03 06:30:29,275 - BERTopic - Embedding - Completed ✓
2024-12-03 06:30:29,275 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:30:29,484 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:30:29,485 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:30:29,506 - BERTopic - Cluster - Completed ✓
2024-12-03 06:30:29,508 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:30:34,353 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/8 [00:00<?, ?it/s]

2024-12-03 06:30:34,642 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:30:34,644 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:30:34,645 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:30:34,651 - BERTopic - Cluster - Completed ✓


2024-12-03 06:30:34,652 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:30:34,652 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:30:34,754 - INFO - Step 5: Generating reports...
2024-12-03 06:30:34,754 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:30:34,803 - INFO - Saved STIX file: Results_Topic_Para/Operation Soft Cell_ A Worldwide Campaign Against Telecommunications Providers/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:30:34,804 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:30:34,989 - INFO - Saved STIX file: Results_Topic_Para/Operation Soft Cell_ A Worldwide Campaign Against Telecommunications Providers/topic_0/chunk_1/prediction_1.json
2024-12-03 06:30:34,990 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:30:35,014 - INFO - Saved STIX file: Results_Topic_Para/Operation Soft Cell_ A Worldwide Campaign Against Telecommunications Providers/topic_1/chunk_1/prediction_1.json
2024-12-03 

2024-12-03 06:30:42,170 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:30:42,172 - INFO - Use pytorch device_name: cuda
2024-12-03 06:30:42,173 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/7 [00:00<?, ?it/s]

2024-12-03 06:30:44,423 - BERTopic - Embedding - Completed ✓
2024-12-03 06:30:44,425 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:30:44,619 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:30:44,620 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:30:44,638 - BERTopic - Cluster - Completed ✓
2024-12-03 06:30:44,641 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:30:49,044 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/7 [00:00<?, ?it/s]

2024-12-03 06:30:49,310 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:30:49,311 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:30:49,312 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:30:49,317 - BERTopic - Cluster - Completed ✓


2024-12-03 06:30:49,317 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:30:49,318 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:30:49,414 - INFO - Step 5: Generating reports...
2024-12-03 06:30:49,415 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:30:49,519 - INFO - Saved STIX file: Results_Topic_Para/wp-global-energy-cyberattacks-night-dragon/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:30:49,520 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:30:49,529 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:30:49,542 - INFO - Saved STIX file: Results_Topic_Para/wp-global-energy-cyberattacks-night-dragon/topic_1/chunk_1/prediction_1.json
2024-12-03 06:30:49,543 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 06:30:49,558 - INFO - Saved STIX file: Results_Topic_Para/wp-global-energy-cyberattacks-night-dragon/topic_2/chunk_1/prediction_1.json
2024-12-03 06:30:49,559 - INFO - Processing chunk 1

2024-12-03 06:30:57,021 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:30:57,023 - INFO - Use pytorch device_name: cuda
2024-12-03 06:30:57,024 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/13 [00:00<?, ?it/s]

2024-12-03 06:30:59,320 - BERTopic - Embedding - Completed ✓
2024-12-03 06:30:59,321 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:30:59,738 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:30:59,739 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:30:59,771 - BERTopic - Cluster - Completed ✓
2024-12-03 06:30:59,774 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:31:08,351 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/13 [00:00<?, ?it/s]

2024-12-03 06:31:08,859 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:31:08,861 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:31:08,862 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:31:08,870 - BERTopic - Cluster - Completed ✓


2024-12-03 06:31:08,871 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:31:08,871 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:31:09,054 - INFO - Step 5: Generating reports...
2024-12-03 06:31:09,055 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:31:09,072 - INFO - Saved STIX file: Results_Topic_Para/chinese_darkweb_analysis/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:31:09,075 - INFO - Saved STIX file: Results_Topic_Para/chinese_darkweb_analysis/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:31:09,075 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:31:09,098 - INFO - Saved STIX file: Results_Topic_Para/chinese_darkweb_analysis/topic_0/chunk_1/prediction_1.json
2024-12-03 06:31:09,100 - INFO - Saved STIX file: Results_Topic_Para/chinese_darkweb_analysis/topic_0/chunk_1/prediction_2.json
2024-12-03 06:31:09,100 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:31:09,286 - INFO - Saved STIX 

2024-12-03 06:31:18,415 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:31:18,418 - INFO - Use pytorch device_name: cuda
2024-12-03 06:31:18,419 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

2024-12-03 06:31:20,678 - BERTopic - Embedding - Completed ✓
2024-12-03 06:31:20,679 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:31:20,823 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:31:20,824 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:31:20,839 - BERTopic - Cluster - Completed ✓
2024-12-03 06:31:20,841 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:31:24,374 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

2024-12-03 06:31:24,592 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:31:24,594 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:31:24,595 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:31:24,600 - BERTopic - Cluster - Completed ✓


2024-12-03 06:31:24,601 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:31:24,602 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:31:24,687 - INFO - Step 5: Generating reports...
2024-12-03 06:31:24,688 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:31:24,792 - INFO - Saved STIX file: Results_Topic_Para/In Pursuit of Optical Fibers and Troop Intel_ Targeted Attack Distributes PlugX in Russia _ Proofpoint/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:31:24,793 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:31:24,915 - INFO - Saved STIX file: Results_Topic_Para/In Pursuit of Optical Fibers and Troop Intel_ Targeted Attack Distributes PlugX in Russia _ Proofpoint/topic_0/chunk_1/prediction_1.json
2024-12-03 06:31:24,919 - INFO - Saved STIX file: Results_Topic_Para/In Pursuit of Optical Fibers and Troop Intel_ Targeted Attack Distributes PlugX in Russia _ Proofpoint/topic_0/chunk_1/prediction_2.json
2024-12

2024-12-03 06:31:29,595 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:31:29,597 - INFO - Use pytorch device_name: cuda
2024-12-03 06:31:29,598 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/8 [00:00<?, ?it/s]

2024-12-03 06:31:31,924 - BERTopic - Embedding - Completed ✓
2024-12-03 06:31:31,926 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:31:32,145 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:31:32,146 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:31:32,166 - BERTopic - Cluster - Completed ✓
2024-12-03 06:31:32,168 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:31:36,641 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/8 [00:00<?, ?it/s]

2024-12-03 06:31:36,923 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:31:36,925 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:31:36,925 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:31:36,931 - BERTopic - Cluster - Completed ✓


2024-12-03 06:31:36,932 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:31:36,933 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:31:37,006 - INFO - Step 5: Generating reports...
2024-12-03 06:31:37,006 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:31:37,028 - INFO - Saved STIX file: Results_Topic_Para/rpt-china-chopper/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:31:37,028 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:31:37,044 - INFO - Saved STIX file: Results_Topic_Para/rpt-china-chopper/topic_0/chunk_1/prediction_1.json
2024-12-03 06:31:37,047 - INFO - Saved STIX file: Results_Topic_Para/rpt-china-chopper/topic_0/chunk_1/prediction_2.json
2024-12-03 06:31:37,048 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:31:37,056 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 06:31:37,171 - INFO - Saved STIX file: Results_Topic_Para/rpt-china-chopper/topic_2/chunk_1/prediction_1.json
2024-

2024-12-03 06:31:44,770 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:31:44,772 - INFO - Use pytorch device_name: cuda
2024-12-03 06:31:44,773 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/18 [00:00<?, ?it/s]

2024-12-03 06:31:47,152 - BERTopic - Embedding - Completed ✓
2024-12-03 06:31:47,153 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:31:47,827 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:31:47,828 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:31:47,872 - BERTopic - Cluster - Completed ✓
2024-12-03 06:31:47,875 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:31:58,567 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/18 [00:00<?, ?it/s]

2024-12-03 06:31:59,198 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:31:59,201 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:31:59,201 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:31:59,214 - BERTopic - Cluster - Completed ✓


2024-12-03 06:31:59,215 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:31:59,215 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:31:59,421 - INFO - Step 5: Generating reports...
2024-12-03 06:31:59,423 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:31:59,506 - INFO - Saved STIX file: Results_Topic_Para/Targeted_Attacks_Lense_NGO/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:31:59,508 - INFO - Saved STIX file: Results_Topic_Para/Targeted_Attacks_Lense_NGO/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:31:59,509 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:31:59,524 - INFO - Saved STIX file: Results_Topic_Para/Targeted_Attacks_Lense_NGO/topic_0/chunk_1/prediction_1.json
2024-12-03 06:31:59,525 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:31:59,572 - INFO - Saved STIX file: Results_Topic_Para/Targeted_Attacks_Lense_NGO/topic_1/chunk_1/prediction_1.json
2024-12-03 06:31:59,573 - INFO - Pro

2024-12-03 06:32:12,421 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:32:12,423 - INFO - Use pytorch device_name: cuda
2024-12-03 06:32:12,423 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/20 [00:00<?, ?it/s]

2024-12-03 06:32:14,862 - BERTopic - Embedding - Completed ✓
2024-12-03 06:32:14,865 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:32:15,630 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:32:15,631 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:32:15,679 - BERTopic - Cluster - Completed ✓
2024-12-03 06:32:15,682 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:32:28,968 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/20 [00:00<?, ?it/s]

2024-12-03 06:32:29,862 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:32:29,864 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:32:29,865 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:32:29,878 - BERTopic - Cluster - Completed ✓


2024-12-03 06:32:29,879 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:32:29,879 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:32:30,265 - INFO - Step 5: Generating reports...
2024-12-03 06:32:30,266 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:32:30,284 - INFO - Saved STIX file: Results_Topic_Para/Offensive-Cyber-Capabilities-Proliferation-Report/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:32:30,286 - INFO - Saved STIX file: Results_Topic_Para/Offensive-Cyber-Capabilities-Proliferation-Report/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:32:30,286 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:32:30,477 - INFO - Saved STIX file: Results_Topic_Para/Offensive-Cyber-Capabilities-Proliferation-Report/topic_0/chunk_1/prediction_1.json
2024-12-03 06:32:30,477 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:32:30,492 - INFO - Saved STIX file: Results_Topic_Para/Offensive-Cyber-Capabilities

2024-12-03 06:32:43,705 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:32:43,707 - INFO - Use pytorch device_name: cuda
2024-12-03 06:32:43,708 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

2024-12-03 06:32:45,924 - BERTopic - Embedding - Completed ✓
2024-12-03 06:32:45,924 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:32:46,123 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:32:46,123 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:32:46,138 - BERTopic - Cluster - Completed ✓
2024-12-03 06:32:46,140 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:32:49,470 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

2024-12-03 06:32:49,675 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:32:49,678 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:32:49,678 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:32:49,685 - BERTopic - Cluster - Completed ✓


2024-12-03 06:32:49,685 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:32:49,686 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:32:49,761 - INFO - Step 5: Generating reports...
2024-12-03 06:32:49,763 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:32:49,849 - INFO - Saved STIX file: Results_Topic_Para/Hunting-Libyan-Scorpions-EN/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:32:49,858 - INFO - Saved STIX file: Results_Topic_Para/Hunting-Libyan-Scorpions-EN/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:32:49,859 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:32:50,101 - INFO - Saved STIX file: Results_Topic_Para/Hunting-Libyan-Scorpions-EN/topic_0/chunk_1/prediction_1.json
2024-12-03 06:32:50,106 - INFO - Saved STIX file: Results_Topic_Para/Hunting-Libyan-Scorpions-EN/topic_0/chunk_1/prediction_2.json
2024-12-03 06:32:50,107 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:32:50,115 - INFO -

2024-12-03 06:32:53,149 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:32:53,152 - INFO - Use pytorch device_name: cuda
2024-12-03 06:32:53,153 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 06:32:55,346 - BERTopic - Embedding - Completed ✓
2024-12-03 06:32:55,347 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:32:55,400 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:32:55,400 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:32:55,408 - BERTopic - Cluster - Completed ✓
2024-12-03 06:32:55,410 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:32:56,568 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 06:32:56,653 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:32:56,655 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:32:56,656 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:32:56,659 - BERTopic - Cluster - Completed ✓


2024-12-03 06:32:56,660 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:32:56,661 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:32:56,686 - INFO - Step 5: Generating reports...
2024-12-03 06:32:56,687 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:32:56,800 - INFO - Saved STIX file: Results_Topic_Para/massive-admedia-iframe-javascript-infection.html/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:32:56,801 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:32:56,819 - INFO - Saved STIX file: Results_Topic_Para/massive-admedia-iframe-javascript-infection.html/topic_0/chunk_1/prediction_1.json
2024-12-03 06:32:56,820 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:32:56,926 - INFO - Saved STIX file: Results_Topic_Para/massive-admedia-iframe-javascript-infection.html/topic_1/chunk_1/prediction_1.json
2024-12-03 06:32:56,928 - INFO - Saved STIX file: Results_Topic_Para/massive-admedia-iframe-javascrip

2024-12-03 06:32:58,800 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:32:58,802 - INFO - Use pytorch device_name: cuda
2024-12-03 06:32:58,803 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2024-12-03 06:33:01,148 - BERTopic - Embedding - Completed ✓
2024-12-03 06:33:01,149 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:33:01,214 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:33:01,215 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:33:01,224 - BERTopic - Cluster - Completed ✓
2024-12-03 06:33:01,226 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:33:02,858 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2024-12-03 06:33:02,969 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:33:02,970 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:33:02,970 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:33:02,973 - BERTopic - Cluster - Completed ✓


2024-12-03 06:33:02,973 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:33:02,974 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:33:03,006 - INFO - Step 5: Generating reports...
2024-12-03 06:33:03,007 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:33:03,116 - INFO - Saved STIX file: Results_Topic_Para/JSAC2020_3_takai_jp/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:33:03,117 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:33:03,391 - INFO - Saved STIX file: Results_Topic_Para/JSAC2020_3_takai_jp/topic_0/chunk_1/prediction_1.json
2024-12-03 06:33:03,402 - INFO - Saved STIX file: Results_Topic_Para/JSAC2020_3_takai_jp/topic_0/chunk_1/prediction_2.json
2024-12-03 06:33:03,414 - INFO - Saved STIX file: Results_Topic_Para/JSAC2020_3_takai_jp/topic_0/chunk_1/prediction_3.json
2024-12-03 06:33:03,425 - INFO - Saved STIX file: Results_Topic_Para/JSAC2020_3_takai_jp/topic_0/chunk_1/prediction_4.json
2024-12-03

2024-12-03 06:33:06,686 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:33:06,689 - INFO - Use pytorch device_name: cuda
2024-12-03 06:33:06,691 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2024-12-03 06:33:09,184 - BERTopic - Embedding - Completed ✓
2024-12-03 06:33:09,185 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:33:09,256 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:33:09,256 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:33:09,265 - BERTopic - Cluster - Completed ✓
2024-12-03 06:33:09,267 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:33:11,119 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2024-12-03 06:33:11,237 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:33:11,238 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:33:11,238 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:33:11,241 - BERTopic - Cluster - Completed ✓


2024-12-03 06:33:11,243 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:33:11,244 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:33:11,279 - INFO - Step 5: Generating reports...
2024-12-03 06:33:11,280 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:33:11,293 - INFO - Saved STIX file: Results_Topic_Para/MysterySnail attacks with Windows zero-day _ Securelist/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:33:11,293 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:33:11,307 - INFO - Saved STIX file: Results_Topic_Para/MysterySnail attacks with Windows zero-day _ Securelist/topic_0/chunk_1/prediction_1.json
2024-12-03 06:33:11,309 - INFO - Saved STIX file: Results_Topic_Para/MysterySnail attacks with Windows zero-day _ Securelist/topic_0/chunk_1/prediction_2.json
2024-12-03 06:33:11,310 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:33:11,326 - INFO - Saved STIX file: Results_Topic_Para/MysterySnai

2024-12-03 06:33:14,458 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:33:14,460 - INFO - Use pytorch device_name: cuda
2024-12-03 06:33:14,461 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/24 [00:00<?, ?it/s]

2024-12-03 06:33:16,883 - BERTopic - Embedding - Completed ✓
2024-12-03 06:33:16,884 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:33:17,993 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:33:17,993 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:33:18,052 - BERTopic - Cluster - Completed ✓
2024-12-03 06:33:18,055 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:33:30,055 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/24 [00:00<?, ?it/s]

2024-12-03 06:33:30,817 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:33:30,820 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:33:30,820 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:33:30,836 - BERTopic - Cluster - Completed ✓


2024-12-03 06:33:30,838 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:33:30,840 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:33:31,038 - INFO - Step 5: Generating reports...
2024-12-03 06:33:31,040 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:33:31,156 - INFO - Saved STIX file: Results_Topic_Para/C5_APT_SKHack/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:33:31,156 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:33:31,292 - INFO - Saved STIX file: Results_Topic_Para/C5_APT_SKHack/topic_0/chunk_1/prediction_1.json
2024-12-03 06:33:31,295 - INFO - Saved STIX file: Results_Topic_Para/C5_APT_SKHack/topic_0/chunk_1/prediction_2.json
2024-12-03 06:33:31,296 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:33:31,406 - INFO - Saved STIX file: Results_Topic_Para/C5_APT_SKHack/topic_1/chunk_1/prediction_1.json
2024-12-03 06:33:31,407 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 06:33:31,4

2024-12-03 06:33:53,606 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:33:53,608 - INFO - Use pytorch device_name: cuda
2024-12-03 06:33:53,609 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/21 [00:00<?, ?it/s]

2024-12-03 06:33:55,963 - BERTopic - Embedding - Completed ✓
2024-12-03 06:33:55,964 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:33:56,899 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:33:56,899 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:33:56,950 - BERTopic - Cluster - Completed ✓
2024-12-03 06:33:56,953 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:34:07,069 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/21 [00:00<?, ?it/s]

2024-12-03 06:34:07,687 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:34:07,690 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:34:07,693 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:34:07,708 - BERTopic - Cluster - Completed ✓


2024-12-03 06:34:07,709 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:34:07,711 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:34:07,917 - INFO - Step 5: Generating reports...
2024-12-03 06:34:07,918 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:34:07,942 - INFO - Saved STIX file: Results_Topic_Para/Talos_Cobalt_Strike/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:34:07,943 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:34:07,957 - INFO - Saved STIX file: Results_Topic_Para/Talos_Cobalt_Strike/topic_0/chunk_1/prediction_1.json
2024-12-03 06:34:07,957 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:34:07,979 - INFO - Saved STIX file: Results_Topic_Para/Talos_Cobalt_Strike/topic_1/chunk_1/prediction_1.json
2024-12-03 06:34:07,980 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 06:34:07,995 - INFO - Saved STIX file: Results_Topic_Para/Talos_Cobalt_Strike/topic_2/chunk_1/prediction_1.js

2024-12-03 06:34:19,245 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:34:19,247 - INFO - Use pytorch device_name: cuda
2024-12-03 06:34:19,247 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

2024-12-03 06:34:21,822 - BERTopic - Embedding - Completed ✓
2024-12-03 06:34:21,823 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:34:21,962 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:34:21,963 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:34:21,977 - BERTopic - Cluster - Completed ✓
2024-12-03 06:34:21,980 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:34:25,112 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

2024-12-03 06:34:25,322 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:34:25,326 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:34:25,327 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:34:25,331 - BERTopic - Cluster - Completed ✓


2024-12-03 06:34:25,332 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:34:25,333 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:34:25,400 - INFO - Step 5: Generating reports...
2024-12-03 06:34:25,402 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:34:25,428 - INFO - Saved STIX file: Results_Topic_Para/sophos-rotten-tomato-campaign/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:34:25,431 - INFO - Saved STIX file: Results_Topic_Para/sophos-rotten-tomato-campaign/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:34:25,432 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:34:25,492 - INFO - Saved STIX file: Results_Topic_Para/sophos-rotten-tomato-campaign/topic_0/chunk_1/prediction_1.json
2024-12-03 06:34:25,499 - INFO - Saved STIX file: Results_Topic_Para/sophos-rotten-tomato-campaign/topic_0/chunk_1/prediction_2.json
2024-12-03 06:34:25,499 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:34:25,555 

2024-12-03 06:34:29,272 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:34:29,274 - INFO - Use pytorch device_name: cuda
2024-12-03 06:34:29,274 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

2024-12-03 06:34:31,540 - BERTopic - Embedding - Completed ✓
2024-12-03 06:34:31,541 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:34:31,700 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:34:31,702 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:34:31,716 - BERTopic - Cluster - Completed ✓
2024-12-03 06:34:31,719 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:34:35,196 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

2024-12-03 06:34:35,424 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:34:35,427 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:34:35,427 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:34:35,433 - BERTopic - Cluster - Completed ✓


2024-12-03 06:34:35,434 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:34:35,435 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:34:35,514 - INFO - Step 5: Generating reports...
2024-12-03 06:34:35,515 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:34:35,538 - INFO - Saved STIX file: Results_Topic_Para/Cryptocurrency-Miners-XMRig-Based-CoinMiner-by-Blue-Mockingbird-Group/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:34:35,542 - INFO - Saved STIX file: Results_Topic_Para/Cryptocurrency-Miners-XMRig-Based-CoinMiner-by-Blue-Mockingbird-Group/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:34:35,543 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:34:35,576 - INFO - Saved STIX file: Results_Topic_Para/Cryptocurrency-Miners-XMRig-Based-CoinMiner-by-Blue-Mockingbird-Group/topic_0/chunk_1/prediction_1.json
2024-12-03 06:34:35,578 - INFO - Saved STIX file: Results_Topic_Para/Cryptocurrency-Miners-XMRig-Based-Co

2024-12-03 06:34:40,482 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:34:40,484 - INFO - Use pytorch device_name: cuda
2024-12-03 06:34:40,485 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/21 [00:00<?, ?it/s]

2024-12-03 06:34:42,830 - BERTopic - Embedding - Completed ✓
2024-12-03 06:34:42,830 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:34:43,711 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:34:43,712 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:34:43,763 - BERTopic - Cluster - Completed ✓
2024-12-03 06:34:43,766 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:34:53,459 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/21 [00:00<?, ?it/s]

2024-12-03 06:34:53,989 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:34:53,992 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:34:53,992 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:34:54,006 - BERTopic - Cluster - Completed ✓


2024-12-03 06:34:54,007 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:34:54,007 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:34:54,079 - INFO - Step 5: Generating reports...
2024-12-03 06:34:54,079 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:34:54,181 - INFO - Saved STIX file: Results_Topic_Para/Secrets_of_the_Comfoo_Masters/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:34:54,182 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:34:54,188 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:34:54,313 - INFO - Saved STIX file: Results_Topic_Para/Secrets_of_the_Comfoo_Masters/topic_1/chunk_1/prediction_1.json
2024-12-03 06:34:54,317 - INFO - Saved STIX file: Results_Topic_Para/Secrets_of_the_Comfoo_Masters/topic_1/chunk_1/prediction_2.json
2024-12-03 06:34:54,317 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 06:34:54,336 - INFO - Saved STIX file: Results_Topic_Para/Secrets_of_the_Comfoo

2024-12-03 06:35:12,320 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:35:12,322 - INFO - Use pytorch device_name: cuda
2024-12-03 06:35:12,322 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:35:14,553 - BERTopic - Embedding - Completed ✓
2024-12-03 06:35:14,554 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:35:14,660 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:35:14,661 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:35:14,672 - BERTopic - Cluster - Completed ✓
2024-12-03 06:35:14,675 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:35:17,373 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:35:17,559 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:35:17,560 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:35:17,561 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:35:17,564 - BERTopic - Cluster - Completed ✓


2024-12-03 06:35:17,564 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:35:17,565 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:35:17,637 - INFO - Step 5: Generating reports...
2024-12-03 06:35:17,638 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:35:17,845 - INFO - Saved STIX file: Results_Topic_Para/TAG-74/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:35:17,849 - INFO - Saved STIX file: Results_Topic_Para/TAG-74/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:35:17,849 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:35:17,869 - INFO - Saved STIX file: Results_Topic_Para/TAG-74/topic_0/chunk_1/prediction_1.json
2024-12-03 06:35:17,871 - INFO - Saved STIX file: Results_Topic_Para/TAG-74/topic_0/chunk_1/prediction_2.json
2024-12-03 06:35:17,871 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:35:17,880 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 06:35:17,887 - INFO - Processing chun

2024-12-03 06:35:21,850 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:35:21,852 - INFO - Use pytorch device_name: cuda
2024-12-03 06:35:21,853 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/8 [00:00<?, ?it/s]

2024-12-03 06:35:24,142 - BERTopic - Embedding - Completed ✓
2024-12-03 06:35:24,143 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:35:24,367 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:35:24,368 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:35:24,388 - BERTopic - Cluster - Completed ✓
2024-12-03 06:35:24,390 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:35:29,032 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/8 [00:00<?, ?it/s]

2024-12-03 06:35:29,322 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:35:29,323 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:35:29,324 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:35:29,330 - BERTopic - Cluster - Completed ✓


2024-12-03 06:35:29,332 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:35:29,332 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:35:29,429 - INFO - Step 5: Generating reports...
2024-12-03 06:35:29,430 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:35:29,547 - INFO - Saved STIX file: Results_Topic_Para/tenable-cloud-risk-report-2024/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:35:29,548 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:35:29,743 - INFO - Saved STIX file: Results_Topic_Para/tenable-cloud-risk-report-2024/topic_0/chunk_1/prediction_1.json
2024-12-03 06:35:29,744 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:35:29,779 - INFO - Saved STIX file: Results_Topic_Para/tenable-cloud-risk-report-2024/topic_1/chunk_1/prediction_1.json
2024-12-03 06:35:29,780 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 06:35:29,802 - INFO - Saved STIX file: Results_Topic_Para/tenable-cloud-risk

2024-12-03 06:35:35,486 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:35:35,490 - INFO - Use pytorch device_name: cuda
2024-12-03 06:35:35,491 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 06:35:37,669 - BERTopic - Embedding - Completed ✓
2024-12-03 06:35:37,671 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:35:37,720 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:35:37,721 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:35:37,728 - BERTopic - Cluster - Completed ✓
2024-12-03 06:35:37,730 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:35:38,883 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 06:35:38,974 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:35:38,975 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:35:38,976 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:35:38,978 - BERTopic - Cluster - Completed ✓


2024-12-03 06:35:38,978 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:35:38,979 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:35:39,003 - INFO - Step 5: Generating reports...
2024-12-03 06:35:39,004 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:35:39,048 - INFO - Saved STIX file: Results_Topic_Para/Tracking Tick Through Recent Campaigns Targeting East Asia/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:35:39,049 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:35:39,058 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:35:39,080 - INFO - Saved STIX file: Results_Topic_Para/Tracking Tick Through Recent Campaigns Targeting East Asia/topic_1/chunk_1/prediction_1.json
2024-12-03 06:35:39,082 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 06:35:39,171 - INFO - Saved STIX file: Results_Topic_Para/Tracking Tick Through Recent Campaigns Targeting East Asia/topic_2/chunk_1/prediction_1.json
202

2024-12-03 06:35:42,237 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:35:42,240 - INFO - Use pytorch device_name: cuda
2024-12-03 06:35:42,241 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/17 [00:00<?, ?it/s]

2024-12-03 06:35:45,045 - BERTopic - Embedding - Completed ✓
2024-12-03 06:35:45,046 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:35:45,670 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:35:45,671 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:35:45,711 - BERTopic - Cluster - Completed ✓
2024-12-03 06:35:45,714 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:35:55,585 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/17 [00:00<?, ?it/s]

2024-12-03 06:35:56,228 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:35:56,232 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:35:56,234 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:35:56,247 - BERTopic - Cluster - Completed ✓


2024-12-03 06:35:56,247 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:35:56,248 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:35:56,457 - INFO - Step 5: Generating reports...
2024-12-03 06:35:56,458 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:35:56,473 - INFO - Saved STIX file: Results_Topic_Para/Group5_ Syria and the Iranian Connection - The Citizen Lab/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:35:56,475 - INFO - Saved STIX file: Results_Topic_Para/Group5_ Syria and the Iranian Connection - The Citizen Lab/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:35:56,475 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:35:56,489 - INFO - Saved STIX file: Results_Topic_Para/Group5_ Syria and the Iranian Connection - The Citizen Lab/topic_0/chunk_1/prediction_1.json
2024-12-03 06:35:56,490 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:35:56,542 - INFO - Saved STIX file: Results_Topic_Para/G

2024-12-03 06:36:05,674 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:36:05,677 - INFO - Use pytorch device_name: cuda
2024-12-03 06:36:05,677 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:36:07,890 - BERTopic - Embedding - Completed ✓
2024-12-03 06:36:07,891 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:36:07,998 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:36:07,999 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:36:08,010 - BERTopic - Cluster - Completed ✓
2024-12-03 06:36:08,013 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:36:10,425 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:36:10,590 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:36:10,591 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:36:10,591 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:36:10,595 - BERTopic - Cluster - Completed ✓


2024-12-03 06:36:10,595 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:36:10,596 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:36:10,641 - INFO - Step 5: Generating reports...
2024-12-03 06:36:10,641 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:36:10,687 - INFO - Saved STIX file: Results_Topic_Para/hidden-cobra-targets-turkish-financial-sector-new-bankshot-implant/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:36:10,690 - INFO - Saved STIX file: Results_Topic_Para/hidden-cobra-targets-turkish-financial-sector-new-bankshot-implant/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:36:10,690 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:36:10,704 - INFO - Saved STIX file: Results_Topic_Para/hidden-cobra-targets-turkish-financial-sector-new-bankshot-implant/topic_0/chunk_1/prediction_1.json
2024-12-03 06:36:10,705 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:36:10,711 - INFO - Processing ch

2024-12-03 06:36:14,012 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:36:14,015 - INFO - Use pytorch device_name: cuda
2024-12-03 06:36:14,016 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/9 [00:00<?, ?it/s]

2024-12-03 06:36:16,282 - BERTopic - Embedding - Completed ✓
2024-12-03 06:36:16,284 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:36:16,555 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:36:16,556 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:36:16,579 - BERTopic - Cluster - Completed ✓
2024-12-03 06:36:16,582 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:36:21,937 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/9 [00:00<?, ?it/s]

2024-12-03 06:36:22,281 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:36:22,283 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:36:22,283 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:36:22,290 - BERTopic - Cluster - Completed ✓


2024-12-03 06:36:22,291 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:36:22,291 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:36:22,410 - INFO - Step 5: Generating reports...
2024-12-03 06:36:22,411 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:36:22,452 - INFO - Saved STIX file: Results_Topic_Para/Lazarus Group Recruitment_ Threat Hunters vs Head Hunters/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:36:22,453 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:36:22,470 - INFO - Saved STIX file: Results_Topic_Para/Lazarus Group Recruitment_ Threat Hunters vs Head Hunters/topic_0/chunk_1/prediction_1.json
2024-12-03 06:36:22,470 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:36:22,476 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 06:36:22,595 - INFO - Saved STIX file: Results_Topic_Para/Lazarus Group Recruitment_ Threat Hunters vs Head Hunters/topic_2/chunk_1/prediction_1.json
2024-1

2024-12-03 06:36:30,555 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:36:30,558 - INFO - Use pytorch device_name: cuda
2024-12-03 06:36:30,559 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 06:36:32,820 - BERTopic - Embedding - Completed ✓
2024-12-03 06:36:32,820 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:36:32,855 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:36:32,855 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:36:32,861 - BERTopic - Cluster - Completed ✓
2024-12-03 06:36:32,863 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:36:33,872 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 06:36:33,956 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:36:33,957 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:36:33,959 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:36:33,961 - BERTopic - Cluster - Completed ✓


2024-12-03 06:36:33,961 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:36:33,962 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:36:33,993 - INFO - Step 5: Generating reports...
2024-12-03 06:36:33,994 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:36:34,070 - INFO - Saved STIX file: Results_Topic_Para/down-the-h-w0rm-hole-with-houdinis-rat.html/topic_0/chunk_1/prediction_1.json
2024-12-03 06:36:34,072 - INFO - Saved STIX file: Results_Topic_Para/down-the-h-w0rm-hole-with-houdinis-rat.html/topic_0/chunk_1/prediction_2.json
2024-12-03 06:36:34,072 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:36:34,253 - INFO - Saved STIX file: Results_Topic_Para/down-the-h-w0rm-hole-with-houdinis-rat.html/topic_1/chunk_1/prediction_1.json
2024-12-03 06:36:34,254 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 06:36:34,271 - INFO - Saved STIX file: Results_Topic_Para/down-the-h-w0rm-hole-with-houdinis-rat.html/topic

2024-12-03 06:36:36,160 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:36:36,162 - INFO - Use pytorch device_name: cuda
2024-12-03 06:36:36,162 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:36:38,363 - BERTopic - Embedding - Completed ✓
2024-12-03 06:36:38,364 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:36:38,454 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:36:38,455 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:36:38,466 - BERTopic - Cluster - Completed ✓
2024-12-03 06:36:38,468 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:36:41,364 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:36:41,562 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:36:41,564 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:36:41,564 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:36:41,567 - BERTopic - Cluster - Completed ✓


2024-12-03 06:36:41,568 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:36:41,568 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:36:41,657 - INFO - Step 5: Generating reports...
2024-12-03 06:36:41,657 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:36:41,679 - INFO - Saved STIX file: Results_Topic_Para/tropic-trooper-targets-taiwanese/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:36:41,679 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:36:41,692 - INFO - Saved STIX file: Results_Topic_Para/tropic-trooper-targets-taiwanese/topic_0/chunk_1/prediction_1.json
2024-12-03 06:36:41,693 - INFO - Saved STIX file: Results_Topic_Para/tropic-trooper-targets-taiwanese/topic_0/chunk_1/prediction_2.json
2024-12-03 06:36:41,694 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:36:41,706 - INFO - Saved STIX file: Results_Topic_Para/tropic-trooper-targets-taiwanese/topic_1/chunk_1/prediction_1.json
2024-12-03 06

2024-12-03 06:36:45,446 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:36:45,449 - INFO - Use pytorch device_name: cuda
2024-12-03 06:36:45,449 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/7 [00:00<?, ?it/s]

2024-12-03 06:36:47,677 - BERTopic - Embedding - Completed ✓
2024-12-03 06:36:47,678 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:36:47,878 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:36:47,879 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:36:47,897 - BERTopic - Cluster - Completed ✓
2024-12-03 06:36:47,900 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:36:51,690 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/7 [00:00<?, ?it/s]

2024-12-03 06:36:51,936 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:36:51,938 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:36:51,938 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:36:51,944 - BERTopic - Cluster - Completed ✓


2024-12-03 06:36:51,946 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:36:51,947 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:36:52,013 - INFO - Step 5: Generating reports...
2024-12-03 06:36:52,015 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:36:52,201 - INFO - Saved STIX file: Results_Topic_Para/20230620_threat_inteligence_report_apt37_macos/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:36:52,202 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:36:52,221 - INFO - Saved STIX file: Results_Topic_Para/20230620_threat_inteligence_report_apt37_macos/topic_0/chunk_1/prediction_1.json
2024-12-03 06:36:52,223 - INFO - Saved STIX file: Results_Topic_Para/20230620_threat_inteligence_report_apt37_macos/topic_0/chunk_1/prediction_2.json
2024-12-03 06:36:52,224 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:36:52,277 - INFO - Saved STIX file: Results_Topic_Para/20230620_threat_inteligence_report_apt

2024-12-03 06:36:59,024 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:36:59,026 - INFO - Use pytorch device_name: cuda
2024-12-03 06:36:59,027 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

2024-12-03 06:37:01,216 - BERTopic - Embedding - Completed ✓
2024-12-03 06:37:01,217 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:37:01,349 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:37:01,350 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:37:01,366 - BERTopic - Cluster - Completed ✓
2024-12-03 06:37:01,368 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:37:04,545 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

2024-12-03 06:37:04,738 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:37:04,739 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:37:04,739 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:37:04,743 - BERTopic - Cluster - Completed ✓


2024-12-03 06:37:04,746 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:37:04,747 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:37:04,807 - INFO - Step 5: Generating reports...
2024-12-03 06:37:04,808 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:37:04,814 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:37:04,820 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:37:04,936 - INFO - Saved STIX file: Results_Topic_Para/MDM_India/topic_1/chunk_1/prediction_1.json
2024-12-03 06:37:04,937 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 06:37:04,960 - INFO - Saved STIX file: Results_Topic_Para/MDM_India/topic_2/chunk_1/prediction_1.json
2024-12-03 06:37:04,960 - INFO - Processing chunk 1/1 of topic 3...
2024-12-03 06:37:04,967 - INFO - Processing chunk 1/1 of topic 4...
2024-12-03 06:37:04,974 - INFO - Processing chunk 1/1 of topic 5...
2024-12-03 06:37:05,087 - INFO - Saved STIX file: Results_To

2024-12-03 06:37:11,024 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:37:11,028 - INFO - Use pytorch device_name: cuda
2024-12-03 06:37:11,029 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/27 [00:00<?, ?it/s]

2024-12-03 06:37:14,016 - BERTopic - Embedding - Completed ✓
2024-12-03 06:37:14,016 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:37:15,378 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:37:15,379 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:37:15,446 - BERTopic - Cluster - Completed ✓
2024-12-03 06:37:15,449 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:37:28,433 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/27 [00:00<?, ?it/s]

2024-12-03 06:37:29,398 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:37:29,401 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:37:29,402 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:37:29,419 - BERTopic - Cluster - Completed ✓


2024-12-03 06:37:29,420 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:37:29,420 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:37:29,815 - INFO - Step 5: Generating reports...
2024-12-03 06:37:29,815 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:37:29,928 - INFO - Saved STIX file: Results_Topic_Para/UAC0006_FC.pdf/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:37:29,929 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:37:29,944 - INFO - Saved STIX file: Results_Topic_Para/UAC0006_FC.pdf/topic_0/chunk_1/prediction_1.json
2024-12-03 06:37:29,945 - INFO - Saved STIX file: Results_Topic_Para/UAC0006_FC.pdf/topic_0/chunk_1/prediction_2.json
2024-12-03 06:37:29,946 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:37:29,973 - INFO - Saved STIX file: Results_Topic_Para/UAC0006_FC.pdf/topic_1/chunk_1/prediction_1.json
2024-12-03 06:37:29,974 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 06:37:

2024-12-03 06:37:49,079 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:37:49,082 - INFO - Use pytorch device_name: cuda
2024-12-03 06:37:49,082 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/26 [00:00<?, ?it/s]

2024-12-03 06:37:51,478 - BERTopic - Embedding - Completed ✓
2024-12-03 06:37:51,478 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:37:52,896 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:37:52,897 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:37:52,952 - BERTopic - Cluster - Completed ✓
2024-12-03 06:37:52,955 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:38:04,476 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/26 [00:00<?, ?it/s]

2024-12-03 06:38:05,240 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:38:05,243 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:38:05,245 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:38:05,263 - BERTopic - Cluster - Completed ✓


2024-12-03 06:38:05,264 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:38:05,264 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:38:05,469 - INFO - Step 5: Generating reports...
2024-12-03 06:38:05,470 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:38:05,525 - INFO - Saved STIX file: Results_Topic_Para/ClearSky-Fox-Kitten-Campaign-v1/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:38:05,528 - INFO - Saved STIX file: Results_Topic_Para/ClearSky-Fox-Kitten-Campaign-v1/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:38:05,530 - INFO - Saved STIX file: Results_Topic_Para/ClearSky-Fox-Kitten-Campaign-v1/topic_-1/chunk_1/prediction_3.json
2024-12-03 06:38:05,531 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:38:05,714 - INFO - Saved STIX file: Results_Topic_Para/ClearSky-Fox-Kitten-Campaign-v1/topic_0/chunk_1/prediction_1.json
2024-12-03 06:38:05,714 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:3

2024-12-03 06:38:29,826 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:38:29,829 - INFO - Use pytorch device_name: cuda
2024-12-03 06:38:29,831 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2024-12-03 06:38:32,026 - BERTopic - Embedding - Completed ✓
2024-12-03 06:38:32,027 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:38:32,091 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:38:32,092 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:38:32,102 - BERTopic - Cluster - Completed ✓
2024-12-03 06:38:32,105 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:38:33,627 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2024-12-03 06:38:33,723 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:38:33,724 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:38:33,725 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:38:33,727 - BERTopic - Cluster - Completed ✓


2024-12-03 06:38:33,727 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:38:33,728 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:38:33,755 - INFO - Step 5: Generating reports...
2024-12-03 06:38:33,756 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:38:33,775 - INFO - Saved STIX file: Results_Topic_Para/PowerShell-based Backdoor Found in Turkey Strikingly Similar to MuddyWater Tools/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:38:33,777 - INFO - Saved STIX file: Results_Topic_Para/PowerShell-based Backdoor Found in Turkey Strikingly Similar to MuddyWater Tools/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:38:33,777 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:38:33,791 - INFO - Saved STIX file: Results_Topic_Para/PowerShell-based Backdoor Found in Turkey Strikingly Similar to MuddyWater Tools/topic_0/chunk_1/prediction_1.json
2024-12-03 06:38:33,792 - INFO - Saved STIX file: Results_Topic_Para/Pow

2024-12-03 06:38:36,595 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:38:36,598 - INFO - Use pytorch device_name: cuda
2024-12-03 06:38:36,599 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/11 [00:00<?, ?it/s]

2024-12-03 06:38:38,877 - BERTopic - Embedding - Completed ✓
2024-12-03 06:38:38,878 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:38:39,208 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:38:39,209 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:38:39,235 - BERTopic - Cluster - Completed ✓
2024-12-03 06:38:39,238 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:38:45,553 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/11 [00:00<?, ?it/s]

2024-12-03 06:38:45,970 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:38:45,972 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:38:45,973 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:38:45,981 - BERTopic - Cluster - Completed ✓


2024-12-03 06:38:45,981 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:38:45,982 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:38:46,136 - INFO - Step 5: Generating reports...
2024-12-03 06:38:46,137 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:38:46,170 - INFO - Saved STIX file: Results_Topic_Para/CERTFR-2021-CTI-009/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:38:46,171 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:38:46,177 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:38:46,188 - INFO - Saved STIX file: Results_Topic_Para/CERTFR-2021-CTI-009/topic_1/chunk_1/prediction_1.json
2024-12-03 06:38:46,189 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 06:38:46,323 - INFO - Saved STIX file: Results_Topic_Para/CERTFR-2021-CTI-009/topic_2/chunk_1/prediction_1.json
2024-12-03 06:38:46,325 - INFO - Saved STIX file: Results_Topic_Para/CERTFR-2021-CTI-009/topic_2/chunk_1/prediction_2.js

2024-12-03 06:38:55,768 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:38:55,771 - INFO - Use pytorch device_name: cuda
2024-12-03 06:38:55,772 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2024-12-03 06:38:57,982 - BERTopic - Embedding - Completed ✓
2024-12-03 06:38:57,982 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:38:58,048 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:38:58,049 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:38:58,057 - BERTopic - Cluster - Completed ✓
2024-12-03 06:38:58,060 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:38:59,785 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2024-12-03 06:38:59,910 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:38:59,912 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:38:59,913 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:38:59,915 - BERTopic - Cluster - Completed ✓


2024-12-03 06:38:59,916 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:38:59,916 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:38:59,949 - INFO - Step 5: Generating reports...
2024-12-03 06:38:59,950 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:38:59,963 - INFO - Saved STIX file: Results_Topic_Para/The Naikon APT - Securelist/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:38:59,964 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:39:00,054 - INFO - Saved STIX file: Results_Topic_Para/The Naikon APT - Securelist/topic_0/chunk_1/prediction_1.json
2024-12-03 06:39:00,056 - INFO - Saved STIX file: Results_Topic_Para/The Naikon APT - Securelist/topic_0/chunk_1/prediction_2.json
2024-12-03 06:39:00,056 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:39:00,508 - INFO - Saved STIX file: Results_Topic_Para/The Naikon APT - Securelist/topic_1/chunk_1/prediction_1.json
2024-12-03 06:39:00,516 - INFO - 

2024-12-03 06:39:02,609 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:39:02,612 - INFO - Use pytorch device_name: cuda
2024-12-03 06:39:02,613 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 06:39:04,817 - BERTopic - Embedding - Completed ✓
2024-12-03 06:39:04,818 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:39:04,878 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:39:04,879 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:39:04,888 - BERTopic - Cluster - Completed ✓
2024-12-03 06:39:04,890 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:39:06,226 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 06:39:06,329 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:39:06,331 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:39:06,332 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:39:06,334 - BERTopic - Cluster - Completed ✓


2024-12-03 06:39:06,335 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:39:06,335 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:39:06,368 - INFO - Step 5: Generating reports...
2024-12-03 06:39:06,369 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:39:06,385 - INFO - Saved STIX file: Results_Topic_Para/FTA 1010 - njRAT The Saga Continues/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:39:06,386 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:39:06,670 - INFO - Saved STIX file: Results_Topic_Para/FTA 1010 - njRAT The Saga Continues/topic_0/chunk_1/prediction_1.json
2024-12-03 06:39:06,686 - INFO - Saved STIX file: Results_Topic_Para/FTA 1010 - njRAT The Saga Continues/topic_0/chunk_1/prediction_2.json
2024-12-03 06:39:06,687 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:39:06,790 - INFO - Saved STIX file: Results_Topic_Para/FTA 1010 - njRAT The Saga Continues/topic_1/chunk_1/prediction_1.json
2

2024-12-03 06:39:09,041 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:39:09,045 - INFO - Use pytorch device_name: cuda
2024-12-03 06:39:09,045 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 06:39:11,264 - BERTopic - Embedding - Completed ✓
2024-12-03 06:39:11,266 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:39:11,305 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:39:11,306 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:39:11,313 - BERTopic - Cluster - Completed ✓
2024-12-03 06:39:11,316 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:39:12,519 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 06:39:12,626 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:39:12,628 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:39:12,631 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:39:12,636 - BERTopic - Cluster - Completed ✓


2024-12-03 06:39:12,637 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:39:12,638 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:39:12,674 - INFO - Step 5: Generating reports...
2024-12-03 06:39:12,676 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:39:12,779 - INFO - Saved STIX file: Results_Topic_Para/Down the H-W0rm Hole with Houdini's RAT - Threat Geek/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:39:12,781 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:39:12,805 - INFO - Saved STIX file: Results_Topic_Para/Down the H-W0rm Hole with Houdini's RAT - Threat Geek/topic_0/chunk_1/prediction_1.json
2024-12-03 06:39:12,806 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:39:13,076 - INFO - Saved STIX file: Results_Topic_Para/Down the H-W0rm Hole with Houdini's RAT - Threat Geek/topic_1/chunk_1/prediction_1.json
2024-12-03 06:39:13,078 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 06:39:1

2024-12-03 06:39:15,081 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:39:15,083 - INFO - Use pytorch device_name: cuda
2024-12-03 06:39:15,083 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

2024-12-03 06:39:17,350 - BERTopic - Embedding - Completed ✓
2024-12-03 06:39:17,351 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:39:17,460 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:39:17,461 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:39:17,474 - BERTopic - Cluster - Completed ✓
2024-12-03 06:39:17,476 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:39:20,148 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

2024-12-03 06:39:20,332 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:39:20,334 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:39:20,335 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:39:20,338 - BERTopic - Cluster - Completed ✓


2024-12-03 06:39:20,339 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:39:20,339 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:39:20,393 - INFO - Step 5: Generating reports...
2024-12-03 06:39:20,394 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:39:20,537 - INFO - Saved STIX file: Results_Topic_Para/Operation_Honeybee/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:39:20,543 - INFO - Saved STIX file: Results_Topic_Para/Operation_Honeybee/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:39:20,543 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:39:20,560 - INFO - Saved STIX file: Results_Topic_Para/Operation_Honeybee/topic_0/chunk_1/prediction_1.json
2024-12-03 06:39:20,560 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:39:20,573 - INFO - Saved STIX file: Results_Topic_Para/Operation_Honeybee/topic_1/chunk_1/prediction_1.json
2024-12-03 06:39:20,574 - INFO - Processing chunk 1/1 of topic 2...


2024-12-03 06:39:24,038 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:39:24,041 - INFO - Use pytorch device_name: cuda
2024-12-03 06:39:24,042 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2024-12-03 06:39:26,654 - BERTopic - Embedding - Completed ✓
2024-12-03 06:39:26,655 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:39:26,709 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:39:26,709 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:39:26,716 - BERTopic - Cluster - Completed ✓
2024-12-03 06:39:26,719 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:39:28,083 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2024-12-03 06:39:28,189 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:39:28,193 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:39:28,193 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:39:28,196 - BERTopic - Cluster - Completed ✓


2024-12-03 06:39:28,196 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:39:28,197 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:39:28,225 - INFO - Step 5: Generating reports...
2024-12-03 06:39:28,226 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:39:28,240 - INFO - Saved STIX file: Results_Topic_Para/ScarCruft continues to evolve, introduces Bluetooth harvester _ Securelist/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:39:28,243 - INFO - Saved STIX file: Results_Topic_Para/ScarCruft continues to evolve, introduces Bluetooth harvester _ Securelist/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:39:28,243 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:39:28,258 - INFO - Saved STIX file: Results_Topic_Para/ScarCruft continues to evolve, introduces Bluetooth harvester _ Securelist/topic_0/chunk_1/prediction_1.json
2024-12-03 06:39:28,260 - INFO - Saved STIX file: Results_Topic_Para/ScarCruft continues t

2024-12-03 06:39:31,665 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:39:31,667 - INFO - Use pytorch device_name: cuda
2024-12-03 06:39:31,668 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/22 [00:00<?, ?it/s]

2024-12-03 06:39:34,042 - BERTopic - Embedding - Completed ✓
2024-12-03 06:39:34,047 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:39:34,962 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:39:34,964 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:39:35,018 - BERTopic - Cluster - Completed ✓
2024-12-03 06:39:35,021 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:39:47,803 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/22 [00:00<?, ?it/s]

2024-12-03 06:39:48,547 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:39:48,550 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:39:48,550 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:39:48,566 - BERTopic - Cluster - Completed ✓


2024-12-03 06:39:48,567 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:39:48,568 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:39:48,808 - INFO - Step 5: Generating reports...
2024-12-03 06:39:48,810 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:39:48,822 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:39:48,836 - INFO - Saved STIX file: Results_Topic_Para/MacMalware_2022/topic_0/chunk_1/prediction_1.json
2024-12-03 06:39:48,838 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:39:48,851 - INFO - Saved STIX file: Results_Topic_Para/MacMalware_2022/topic_1/chunk_1/prediction_1.json
2024-12-03 06:39:48,852 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 06:39:48,864 - INFO - Saved STIX file: Results_Topic_Para/MacMalware_2022/topic_2/chunk_1/prediction_1.json
2024-12-03 06:39:48,865 - INFO - Processing chunk 1/1 of topic 3...
2024-12-03 06:39:48,928 - INFO - Saved STIX file: Results_Topic_P

2024-12-03 06:40:04,017 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:40:04,019 - INFO - Use pytorch device_name: cuda
2024-12-03 06:40:04,020 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 06:40:06,198 - BERTopic - Embedding - Completed ✓
2024-12-03 06:40:06,199 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:40:06,246 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:40:06,247 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:40:06,254 - BERTopic - Cluster - Completed ✓
2024-12-03 06:40:06,256 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:40:07,349 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 06:40:07,430 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:40:07,431 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:40:07,431 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:40:07,433 - BERTopic - Cluster - Completed ✓


2024-12-03 06:40:07,434 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:40:07,434 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:40:07,453 - INFO - Step 5: Generating reports...
2024-12-03 06:40:07,454 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:40:07,468 - INFO - Saved STIX file: Results_Topic_Para/C2_Communication_of_ThreatNeedle/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:40:07,468 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:40:07,596 - INFO - Saved STIX file: Results_Topic_Para/C2_Communication_of_ThreatNeedle/topic_0/chunk_1/prediction_1.json
2024-12-03 06:40:07,598 - INFO - Saved STIX file: Results_Topic_Para/C2_Communication_of_ThreatNeedle/topic_0/chunk_1/prediction_2.json
2024-12-03 06:40:07,599 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:40:07,708 - INFO - Saved STIX file: Results_Topic_Para/C2_Communication_of_ThreatNeedle/topic_1/chunk_1/prediction_1.json
2024-12-03 06

2024-12-03 06:40:11,836 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:40:11,838 - INFO - Use pytorch device_name: cuda
2024-12-03 06:40:11,838 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/19 [00:00<?, ?it/s]

2024-12-03 06:40:14,196 - BERTopic - Embedding - Completed ✓
2024-12-03 06:40:14,197 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:40:14,899 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:40:14,900 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:40:14,947 - BERTopic - Cluster - Completed ✓
2024-12-03 06:40:14,950 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:40:26,401 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/19 [00:00<?, ?it/s]

2024-12-03 06:40:27,055 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:40:27,058 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:40:27,060 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:40:27,073 - BERTopic - Cluster - Completed ✓


2024-12-03 06:40:27,073 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:40:27,074 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:40:27,300 - INFO - Step 5: Generating reports...
2024-12-03 06:40:27,301 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:40:27,317 - INFO - Saved STIX file: Results_Topic_Para/packrat-report/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:40:27,318 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:40:27,334 - INFO - Saved STIX file: Results_Topic_Para/packrat-report/topic_0/chunk_1/prediction_1.json
2024-12-03 06:40:27,335 - INFO - Saved STIX file: Results_Topic_Para/packrat-report/topic_0/chunk_1/prediction_2.json
2024-12-03 06:40:27,339 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:40:27,452 - INFO - Saved STIX file: Results_Topic_Para/packrat-report/topic_1/chunk_1/prediction_1.json
2024-12-03 06:40:27,457 - INFO - Saved STIX file: Results_Topic_Para/packrat-report/t

2024-12-03 06:40:41,118 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:40:41,121 - INFO - Use pytorch device_name: cuda
2024-12-03 06:40:41,121 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/11 [00:00<?, ?it/s]

2024-12-03 06:40:43,377 - BERTopic - Embedding - Completed ✓
2024-12-03 06:40:43,378 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:40:43,705 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:40:43,705 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:40:43,734 - BERTopic - Cluster - Completed ✓
2024-12-03 06:40:43,736 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:40:50,102 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/11 [00:00<?, ?it/s]

2024-12-03 06:40:50,477 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:40:50,478 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:40:50,479 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:40:50,486 - BERTopic - Cluster - Completed ✓


2024-12-03 06:40:50,487 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:40:50,487 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:40:50,611 - INFO - Step 5: Generating reports...
2024-12-03 06:40:50,612 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:40:50,729 - INFO - Saved STIX file: Results_Topic_Para/eset-sednit-part3/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:40:50,730 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:40:50,744 - INFO - Saved STIX file: Results_Topic_Para/eset-sednit-part3/topic_0/chunk_1/prediction_1.json
2024-12-03 06:40:50,745 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:40:50,863 - INFO - Saved STIX file: Results_Topic_Para/eset-sednit-part3/topic_1/chunk_1/prediction_1.json
2024-12-03 06:40:50,864 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 06:40:50,999 - INFO - Saved STIX file: Results_Topic_Para/eset-sednit-part3/topic_2/chunk_1/prediction_1.json
2024-

2024-12-03 06:41:00,067 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:41:00,069 - INFO - Use pytorch device_name: cuda
2024-12-03 06:41:00,069 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-12-03 06:41:02,227 - BERTopic - Embedding - Completed ✓
2024-12-03 06:41:02,228 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:41:02,253 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:41:02,254 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:41:02,260 - BERTopic - Cluster - Completed ✓
2024-12-03 06:41:02,262 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:41:02,729 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-12-03 06:41:02,773 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:41:02,774 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:41:02,775 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:41:02,777 - BERTopic - Cluster - Completed ✓


2024-12-03 06:41:02,777 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:41:02,777 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:41:02,789 - INFO - Step 5: Generating reports...
2024-12-03 06:41:02,790 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:41:02,973 - INFO - Saved STIX file: Results_Topic_Para/Targeted Attack on Indian Ministry of External Affairs using Crimson RAT/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:41:02,974 - INFO - Saved STIX file: Results_Topic_Para/Targeted Attack on Indian Ministry of External Affairs using Crimson RAT/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:41:02,976 - INFO - Saved STIX file: Results_Topic_Para/Targeted Attack on Indian Ministry of External Affairs using Crimson RAT/topic_-1/chunk_1/prediction_3.json
2024-12-03 06:41:02,977 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:41:02,996 - INFO - Saved STIX file: Results_Topic_Para/Targeted Attack on Indian 

2024-12-03 06:41:03,964 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:41:03,968 - INFO - Use pytorch device_name: cuda
2024-12-03 06:41:03,969 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:41:06,184 - BERTopic - Embedding - Completed ✓
2024-12-03 06:41:06,186 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:41:06,277 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:41:06,278 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:41:06,288 - BERTopic - Cluster - Completed ✓
2024-12-03 06:41:06,291 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:41:08,504 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:41:08,663 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:41:08,664 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:41:08,664 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:41:08,667 - BERTopic - Cluster - Completed ✓


2024-12-03 06:41:08,668 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:41:08,668 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:41:08,729 - INFO - Step 5: Generating reports...
2024-12-03 06:41:08,730 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:41:08,924 - INFO - Saved STIX file: Results_Topic_Para/North Korea's Lazarus APT leverages Windows Update client, GitHub in latest campaign _ Malwarebytes Labs/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:41:08,930 - INFO - Saved STIX file: Results_Topic_Para/North Korea's Lazarus APT leverages Windows Update client, GitHub in latest campaign _ Malwarebytes Labs/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:41:08,935 - INFO - Saved STIX file: Results_Topic_Para/North Korea's Lazarus APT leverages Windows Update client, GitHub in latest campaign _ Malwarebytes Labs/topic_-1/chunk_1/prediction_3.json
2024-12-03 06:41:08,935 - INFO - Processing chunk 1/1 of topic 0...

2024-12-03 06:41:12,273 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:41:12,275 - INFO - Use pytorch device_name: cuda
2024-12-03 06:41:12,276 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 06:41:14,524 - BERTopic - Embedding - Completed ✓
2024-12-03 06:41:14,525 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:41:14,578 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:41:14,579 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:41:14,586 - BERTopic - Cluster - Completed ✓
2024-12-03 06:41:14,589 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:41:16,017 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 06:41:16,102 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:41:16,103 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:41:16,104 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:41:16,107 - BERTopic - Cluster - Completed ✓


2024-12-03 06:41:16,108 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:41:16,110 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:41:16,135 - INFO - Step 5: Generating reports...
2024-12-03 06:41:16,136 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:41:16,142 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:41:16,156 - INFO - Saved STIX file: Results_Topic_Para/APT40 Examining a China-Nexus Espionage Actor  APT40 Examining a China-Nexus Espionage Actor/topic_0/chunk_1/prediction_1.json
2024-12-03 06:41:16,157 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:41:16,244 - INFO - Saved STIX file: Results_Topic_Para/APT40 Examining a China-Nexus Espionage Actor  APT40 Examining a China-Nexus Espionage Actor/topic_1/chunk_1/prediction_1.json
2024-12-03 06:41:16,247 - INFO - Saved STIX file: Results_Topic_Para/APT40 Examining a China-Nexus Espionage Actor  APT40 Examining a China-Nexus Espionage Actor/topi

2024-12-03 06:41:17,634 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:41:17,636 - INFO - Use pytorch device_name: cuda
2024-12-03 06:41:17,636 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 06:41:19,796 - BERTopic - Embedding - Completed ✓
2024-12-03 06:41:19,797 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:41:19,846 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:41:19,847 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:41:19,855 - BERTopic - Cluster - Completed ✓
2024-12-03 06:41:19,858 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:41:21,082 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 06:41:21,165 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:41:21,166 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:41:21,167 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:41:21,169 - BERTopic - Cluster - Completed ✓


2024-12-03 06:41:21,169 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:41:21,170 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:41:21,191 - INFO - Step 5: Generating reports...
2024-12-03 06:41:21,192 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:41:21,205 - INFO - Saved STIX file: Results_Topic_Para/spear_phishing_techn.html/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:41:21,210 - INFO - Saved STIX file: Results_Topic_Para/spear_phishing_techn.html/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:41:21,210 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:41:21,309 - INFO - Saved STIX file: Results_Topic_Para/spear_phishing_techn.html/topic_0/chunk_1/prediction_1.json
2024-12-03 06:41:21,310 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:41:21,329 - INFO - Saved STIX file: Results_Topic_Para/spear_phishing_techn.html/topic_1/chunk_1/prediction_1.json
2024-12-03 06:41:21,332 - INFO - Saved S

2024-12-03 06:41:24,034 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:41:24,036 - INFO - Use pytorch device_name: cuda
2024-12-03 06:41:24,036 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/13 [00:00<?, ?it/s]

2024-12-03 06:41:26,262 - BERTopic - Embedding - Completed ✓
2024-12-03 06:41:26,263 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:41:26,669 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:41:26,671 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:41:26,703 - BERTopic - Cluster - Completed ✓
2024-12-03 06:41:26,706 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:41:34,260 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/13 [00:00<?, ?it/s]

2024-12-03 06:41:34,706 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:41:34,708 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:41:34,708 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:41:34,717 - BERTopic - Cluster - Completed ✓


2024-12-03 06:41:34,718 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:41:34,718 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:41:34,871 - INFO - Step 5: Generating reports...
2024-12-03 06:41:34,873 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:41:34,889 - INFO - Saved STIX file: Results_Topic_Para/Yoroi_Cybersecurity_Annual_-Security_Report_2020-ENGLISH_rMT-FINAL-1s/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:41:34,891 - INFO - Saved STIX file: Results_Topic_Para/Yoroi_Cybersecurity_Annual_-Security_Report_2020-ENGLISH_rMT-FINAL-1s/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:41:34,892 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:41:34,940 - INFO - Saved STIX file: Results_Topic_Para/Yoroi_Cybersecurity_Annual_-Security_Report_2020-ENGLISH_rMT-FINAL-1s/topic_0/chunk_1/prediction_1.json
2024-12-03 06:41:34,940 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:41:35,021 - INFO - Save

2024-12-03 06:41:42,690 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:41:42,692 - INFO - Use pytorch device_name: cuda
2024-12-03 06:41:42,693 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 06:41:45,244 - BERTopic - Embedding - Completed ✓
2024-12-03 06:41:45,246 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:41:45,297 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:41:45,298 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:41:45,306 - BERTopic - Cluster - Completed ✓
2024-12-03 06:41:45,309 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:41:46,667 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 06:41:46,759 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:41:46,761 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:41:46,761 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:41:46,763 - BERTopic - Cluster - Completed ✓


2024-12-03 06:41:46,764 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:41:46,764 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:41:46,799 - INFO - Step 5: Generating reports...
2024-12-03 06:41:46,800 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:41:46,823 - INFO - Saved STIX file: Results_Topic_Para/Global_APT_Research_Report_for_the_first_half_of_2021-360/topic_0/chunk_1/prediction_1.json
2024-12-03 06:41:46,829 - INFO - Saved STIX file: Results_Topic_Para/Global_APT_Research_Report_for_the_first_half_of_2021-360/topic_0/chunk_1/prediction_2.json
2024-12-03 06:41:46,829 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:41:47,015 - INFO - Saved STIX file: Results_Topic_Para/Global_APT_Research_Report_for_the_first_half_of_2021-360/topic_1/chunk_1/prediction_1.json
2024-12-03 06:41:47,015 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 06:41:47,304 - INFO - Saved STIX file: Results_Topic_Para/Global_

2024-12-03 06:41:49,880 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:41:49,884 - INFO - Use pytorch device_name: cuda
2024-12-03 06:41:49,885 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:41:52,108 - BERTopic - Embedding - Completed ✓
2024-12-03 06:41:52,113 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:41:52,207 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:41:52,208 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:41:52,220 - BERTopic - Cluster - Completed ✓
2024-12-03 06:41:52,223 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:41:54,580 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:41:54,755 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:41:54,757 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:41:54,757 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:41:54,761 - BERTopic - Cluster - Completed ✓


2024-12-03 06:41:54,761 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:41:54,761 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:41:54,846 - INFO - Step 5: Generating reports...
2024-12-03 06:41:54,847 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:41:55,145 - INFO - Saved STIX file: Results_Topic_Para/CSA_TTPs-of-Indicted-APT40-Actors-Associated-with-China-MSS-Hainan-State-Security-Department/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:41:55,146 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:41:55,184 - INFO - Saved STIX file: Results_Topic_Para/CSA_TTPs-of-Indicted-APT40-Actors-Associated-with-China-MSS-Hainan-State-Security-Department/topic_0/chunk_1/prediction_1.json
2024-12-03 06:41:55,185 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:41:55,299 - INFO - Saved STIX file: Results_Topic_Para/CSA_TTPs-of-Indicted-APT40-Actors-Associated-with-China-MSS-Hainan-State-Security-Department/top

2024-12-03 06:41:59,589 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:41:59,591 - INFO - Use pytorch device_name: cuda
2024-12-03 06:41:59,591 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/7 [00:00<?, ?it/s]

2024-12-03 06:42:01,815 - BERTopic - Embedding - Completed ✓
2024-12-03 06:42:01,817 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:42:01,997 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:42:01,998 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:42:02,015 - BERTopic - Cluster - Completed ✓
2024-12-03 06:42:02,018 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:42:06,165 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/7 [00:00<?, ?it/s]

2024-12-03 06:42:06,433 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:42:06,434 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:42:06,434 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:42:06,439 - BERTopic - Cluster - Completed ✓


2024-12-03 06:42:06,440 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:42:06,440 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:42:06,533 - INFO - Step 5: Generating reports...
2024-12-03 06:42:06,534 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:42:06,569 - INFO - Saved STIX file: Results_Topic_Para/The zero-day exploits of Operation WizardOpium _ Securelist/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:42:06,570 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:42:06,584 - INFO - Saved STIX file: Results_Topic_Para/The zero-day exploits of Operation WizardOpium _ Securelist/topic_0/chunk_1/prediction_1.json
2024-12-03 06:42:06,585 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:42:06,617 - INFO - Saved STIX file: Results_Topic_Para/The zero-day exploits of Operation WizardOpium _ Securelist/topic_1/chunk_1/prediction_1.json
2024-12-03 06:42:06,617 - INFO - Processing chunk 1/1 of topic 2...


2024-12-03 06:42:16,138 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:42:16,142 - INFO - Use pytorch device_name: cuda
2024-12-03 06:42:16,143 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/44 [00:00<?, ?it/s]

2024-12-03 06:42:19,108 - BERTopic - Embedding - Completed ✓
2024-12-03 06:42:19,110 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:42:21,766 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:42:21,767 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:42:21,880 - BERTopic - Cluster - Completed ✓
2024-12-03 06:42:21,883 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:42:49,512 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/44 [00:00<?, ?it/s]

2024-12-03 06:42:51,087 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:42:51,091 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:42:51,092 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:42:51,121 - BERTopic - Cluster - Completed ✓


2024-12-03 06:42:51,122 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:42:51,123 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:42:51,634 - INFO - Step 5: Generating reports...
2024-12-03 06:42:51,635 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:42:51,718 - INFO - Saved STIX file: Results_Topic_Para/FINDING BEACONS IN THE DARK 1650728751599/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:42:51,720 - INFO - Saved STIX file: Results_Topic_Para/FINDING BEACONS IN THE DARK 1650728751599/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:42:51,721 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:42:51,736 - INFO - Saved STIX file: Results_Topic_Para/FINDING BEACONS IN THE DARK 1650728751599/topic_0/chunk_1/prediction_1.json
2024-12-03 06:42:51,737 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:42:51,747 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 06:42:51,845 - INFO - Saved STIX file: Res

2024-12-03 06:43:25,320 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:43:25,324 - INFO - Use pytorch device_name: cuda
2024-12-03 06:43:25,324 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/31 [00:00<?, ?it/s]

2024-12-03 06:43:27,764 - BERTopic - Embedding - Completed ✓
2024-12-03 06:43:27,765 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:43:29,231 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:43:29,233 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:43:29,307 - BERTopic - Cluster - Completed ✓
2024-12-03 06:43:29,310 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:43:46,364 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/31 [00:00<?, ?it/s]

2024-12-03 06:43:47,353 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:43:47,357 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:43:47,357 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:43:47,377 - BERTopic - Cluster - Completed ✓


2024-12-03 06:43:47,378 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:43:47,379 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:43:47,642 - INFO - Step 5: Generating reports...
2024-12-03 06:43:47,643 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:43:47,666 - INFO - Saved STIX file: Results_Topic_Para/001/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:43:47,671 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:43:47,686 - INFO - Saved STIX file: Results_Topic_Para/001/topic_0/chunk_1/prediction_1.json
2024-12-03 06:43:47,687 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:43:47,707 - INFO - Saved STIX file: Results_Topic_Para/001/topic_1/chunk_1/prediction_1.json
2024-12-03 06:43:47,708 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 06:43:47,819 - INFO - Saved STIX file: Results_Topic_Para/001/topic_2/chunk_1/prediction_1.json
2024-12-03 06:43:47,819 - INFO - Processing chunk 1/1 of topi

2024-12-03 06:44:10,471 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:44:10,473 - INFO - Use pytorch device_name: cuda
2024-12-03 06:44:10,474 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:44:12,653 - BERTopic - Embedding - Completed ✓
2024-12-03 06:44:12,653 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:44:12,743 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:44:12,744 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:44:12,754 - BERTopic - Cluster - Completed ✓
2024-12-03 06:44:12,757 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:44:15,108 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:44:15,269 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:44:15,270 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:44:15,270 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:44:15,273 - BERTopic - Cluster - Completed ✓


2024-12-03 06:44:15,274 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:44:15,274 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:44:15,341 - INFO - Step 5: Generating reports...
2024-12-03 06:44:15,342 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:44:15,480 - INFO - Saved STIX file: Results_Topic_Para/targeted-attack-in-middle-east-by-apt34.html/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:44:15,483 - INFO - Saved STIX file: Results_Topic_Para/targeted-attack-in-middle-east-by-apt34.html/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:44:15,486 - INFO - Saved STIX file: Results_Topic_Para/targeted-attack-in-middle-east-by-apt34.html/topic_-1/chunk_1/prediction_3.json
2024-12-03 06:44:15,487 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:44:15,598 - INFO - Saved STIX file: Results_Topic_Para/targeted-attack-in-middle-east-by-apt34.html/topic_0/chunk_1/prediction_1.json
2024-12-03 06:44:15,598 - INFO 

2024-12-03 06:44:18,276 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:44:18,278 - INFO - Use pytorch device_name: cuda
2024-12-03 06:44:18,278 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-12-03 06:44:20,455 - BERTopic - Embedding - Completed ✓
2024-12-03 06:44:20,455 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:44:20,485 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:44:20,486 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:44:20,491 - BERTopic - Cluster - Completed ✓
2024-12-03 06:44:20,493 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:44:21,213 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-12-03 06:44:21,260 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:44:21,262 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:44:21,263 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:44:21,264 - BERTopic - Cluster - Completed ✓


2024-12-03 06:44:21,265 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:44:21,265 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:44:21,276 - INFO - Step 5: Generating reports...
2024-12-03 06:44:21,276 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:44:21,326 - INFO - Saved STIX file: Results_Topic_Para/blog_whos-who-in-the-zoo/topic_0/chunk_1/prediction_1.json
2024-12-03 06:44:21,327 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:44:21,438 - INFO - Saved STIX file: Results_Topic_Para/blog_whos-who-in-the-zoo/topic_1/chunk_1/prediction_1.json
2024-12-03 06:44:21,439 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 06:44:21,463 - INFO - Saved STIX file: Results_Topic_Para/blog_whos-who-in-the-zoo/topic_2/chunk_1/prediction_1.json
2024-12-03 06:44:21,463 - INFO - Processing chunk 1/1 of topic 3...
2024-12-03 06:44:21,616 - INFO - Saved STIX file: Results_Topic_Para/blog_whos-who-in-the-zoo/topic_3/chunk

2024-12-03 06:44:22,635 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:44:22,637 - INFO - Use pytorch device_name: cuda
2024-12-03 06:44:22,637 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:44:24,841 - BERTopic - Embedding - Completed ✓
2024-12-03 06:44:24,842 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:44:24,949 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:44:24,950 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:44:24,964 - BERTopic - Cluster - Completed ✓
2024-12-03 06:44:24,967 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:44:27,703 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:44:27,886 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:44:27,887 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:44:27,887 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:44:27,890 - BERTopic - Cluster - Completed ✓


2024-12-03 06:44:27,891 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:44:27,891 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:44:27,948 - INFO - Step 5: Generating reports...
2024-12-03 06:44:27,948 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:44:27,967 - INFO - Saved STIX file: Results_Topic_Para/Vicious Panda_ The COVID Campaign - Check Point Research/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:44:27,967 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:44:27,975 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:44:28,079 - INFO - Saved STIX file: Results_Topic_Para/Vicious Panda_ The COVID Campaign - Check Point Research/topic_1/chunk_1/prediction_1.json
2024-12-03 06:44:28,082 - INFO - Saved STIX file: Results_Topic_Para/Vicious Panda_ The COVID Campaign - Check Point Research/topic_1/chunk_1/prediction_2.json
2024-12-03 06:44:28,085 - INFO - Saved STIX file: Results_Topic_Para/Vicious 

2024-12-03 06:44:31,386 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:44:31,389 - INFO - Use pytorch device_name: cuda
2024-12-03 06:44:31,391 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

2024-12-03 06:44:33,578 - BERTopic - Embedding - Completed ✓
2024-12-03 06:44:33,581 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:44:33,753 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:44:33,755 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:44:33,772 - BERTopic - Cluster - Completed ✓
2024-12-03 06:44:33,774 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:44:38,074 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

2024-12-03 06:44:38,319 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:44:38,321 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:44:38,321 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:44:38,326 - BERTopic - Cluster - Completed ✓


2024-12-03 06:44:38,327 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:44:38,328 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:44:38,418 - INFO - Step 5: Generating reports...
2024-12-03 06:44:38,419 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:44:38,434 - INFO - Saved STIX file: Results_Topic_Para/North Korea’s Cyber Strategy/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:44:38,435 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:44:38,808 - INFO - Saved STIX file: Results_Topic_Para/North Korea’s Cyber Strategy/topic_0/chunk_1/prediction_1.json
2024-12-03 06:44:38,810 - INFO - Saved STIX file: Results_Topic_Para/North Korea’s Cyber Strategy/topic_0/chunk_1/prediction_2.json
2024-12-03 06:44:38,811 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:44:38,819 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 06:44:38,997 - INFO - Saved STIX file: Results_Topic_Para/North Korea’s Cyber Stra

2024-12-03 06:44:42,958 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:44:42,961 - INFO - Use pytorch device_name: cuda
2024-12-03 06:44:42,962 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

2024-12-03 06:44:45,158 - BERTopic - Embedding - Completed ✓
2024-12-03 06:44:45,160 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:44:45,281 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:44:45,283 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:44:45,296 - BERTopic - Cluster - Completed ✓
2024-12-03 06:44:45,298 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:44:48,332 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

2024-12-03 06:44:48,497 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:44:48,498 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:44:48,498 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:44:48,502 - BERTopic - Cluster - Completed ✓


2024-12-03 06:44:48,502 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:44:48,503 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:44:48,550 - INFO - Step 5: Generating reports...
2024-12-03 06:44:48,551 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:44:48,558 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:44:48,639 - INFO - Saved STIX file: Results_Topic_Para/rpt-southeast-asia-threat-landscape/topic_1/chunk_1/prediction_1.json
2024-12-03 06:44:48,640 - INFO - Saved STIX file: Results_Topic_Para/rpt-southeast-asia-threat-landscape/topic_1/chunk_1/prediction_2.json
2024-12-03 06:44:48,641 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 06:44:48,648 - INFO - Processing chunk 1/1 of topic 3...
2024-12-03 06:44:48,678 - INFO - Saved STIX file: Results_Topic_Para/rpt-southeast-asia-threat-landscape/topic_3/chunk_1/prediction_1.json
2024-12-03 06:44:48,679 - INFO - Saved STIX file: Results_Topic_Para/rpt-s

2024-12-03 06:44:52,431 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:44:52,433 - INFO - Use pytorch device_name: cuda
2024-12-03 06:44:52,433 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/11 [00:00<?, ?it/s]

2024-12-03 06:44:54,766 - BERTopic - Embedding - Completed ✓
2024-12-03 06:44:54,768 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:44:55,142 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:44:55,144 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:44:55,172 - BERTopic - Cluster - Completed ✓
2024-12-03 06:44:55,175 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:45:01,505 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/11 [00:00<?, ?it/s]

2024-12-03 06:45:01,898 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:45:01,899 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:45:01,900 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:45:01,907 - BERTopic - Cluster - Completed ✓


2024-12-03 06:45:01,908 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:45:01,908 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:45:02,029 - INFO - Step 5: Generating reports...
2024-12-03 06:45:02,031 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:45:02,133 - INFO - Saved STIX file: Results_Topic_Para/cta-2023-0727-1/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:45:02,134 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:45:02,231 - INFO - Saved STIX file: Results_Topic_Para/cta-2023-0727-1/topic_0/chunk_1/prediction_1.json
2024-12-03 06:45:02,232 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:45:02,329 - INFO - Saved STIX file: Results_Topic_Para/cta-2023-0727-1/topic_1/chunk_1/prediction_1.json
2024-12-03 06:45:02,330 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 06:45:02,530 - INFO - Saved STIX file: Results_Topic_Para/cta-2023-0727-1/topic_2/chunk_1/prediction_1.json
2024-12-03 06

2024-12-03 06:45:12,226 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:45:12,229 - INFO - Use pytorch device_name: cuda
2024-12-03 06:45:12,230 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/9 [00:00<?, ?it/s]

2024-12-03 06:45:14,502 - BERTopic - Embedding - Completed ✓
2024-12-03 06:45:14,503 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:45:14,749 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:45:14,750 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:45:14,771 - BERTopic - Cluster - Completed ✓
2024-12-03 06:45:14,773 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:45:19,780 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/9 [00:00<?, ?it/s]

2024-12-03 06:45:20,101 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:45:20,102 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:45:20,103 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:45:20,108 - BERTopic - Cluster - Completed ✓


2024-12-03 06:45:20,109 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:45:20,109 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:45:20,223 - INFO - Step 5: Generating reports...
2024-12-03 06:45:20,224 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:45:20,257 - INFO - Saved STIX file: Results_Topic_Para/tech-brief-untangling-the-patchwork-cyberespionage-group/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:45:20,259 - INFO - Saved STIX file: Results_Topic_Para/tech-brief-untangling-the-patchwork-cyberespionage-group/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:45:20,259 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:45:20,266 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:45:20,274 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 06:45:20,410 - INFO - Saved STIX file: Results_Topic_Para/tech-brief-untangling-the-patchwork-cyberespionage-group/topic_2/chunk_1/prediction_1.json
2024-12-

2024-12-03 06:45:26,042 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:45:26,045 - INFO - Use pytorch device_name: cuda
2024-12-03 06:45:26,045 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2024-12-03 06:45:28,301 - BERTopic - Embedding - Completed ✓
2024-12-03 06:45:28,301 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:45:28,368 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:45:28,368 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:45:28,377 - BERTopic - Cluster - Completed ✓
2024-12-03 06:45:28,379 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:45:29,992 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2024-12-03 06:45:30,103 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:45:30,105 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:45:30,105 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:45:30,107 - BERTopic - Cluster - Completed ✓


2024-12-03 06:45:30,108 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:45:30,108 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:45:30,138 - INFO - Step 5: Generating reports...
2024-12-03 06:45:30,140 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:45:30,156 - INFO - Saved STIX file: Results_Topic_Para/APT10 Targeting Japanese Corporations Using Updated TTPs/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:45:30,157 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:45:30,174 - INFO - Saved STIX file: Results_Topic_Para/APT10 Targeting Japanese Corporations Using Updated TTPs/topic_0/chunk_1/prediction_1.json
2024-12-03 06:45:30,177 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:45:30,508 - INFO - Saved STIX file: Results_Topic_Para/APT10 Targeting Japanese Corporations Using Updated TTPs/topic_1/chunk_1/prediction_1.json
2024-12-03 06:45:30,509 - INFO - Saved STIX file: Results_Topic_Para/APT10 Ta

2024-12-03 06:45:33,211 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:45:33,215 - INFO - Use pytorch device_name: cuda
2024-12-03 06:45:33,215 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/8 [00:00<?, ?it/s]

2024-12-03 06:45:35,775 - BERTopic - Embedding - Completed ✓
2024-12-03 06:45:35,777 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:45:35,985 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:45:35,986 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:45:36,006 - BERTopic - Cluster - Completed ✓
2024-12-03 06:45:36,009 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:45:40,807 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/8 [00:00<?, ?it/s]

2024-12-03 06:45:41,142 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:45:41,144 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:45:41,144 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:45:41,150 - BERTopic - Cluster - Completed ✓


2024-12-03 06:45:41,151 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:45:41,151 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:45:41,306 - INFO - Step 5: Generating reports...
2024-12-03 06:45:41,306 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:45:41,323 - INFO - Saved STIX file: Results_Topic_Para/The State of OT:IoT Routers in the Software Supply Chain/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:45:41,323 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:45:41,338 - INFO - Saved STIX file: Results_Topic_Para/The State of OT:IoT Routers in the Software Supply Chain/topic_0/chunk_1/prediction_1.json
2024-12-03 06:45:41,339 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:45:41,381 - INFO - Saved STIX file: Results_Topic_Para/The State of OT:IoT Routers in the Software Supply Chain/topic_1/chunk_1/prediction_1.json
2024-12-03 06:45:41,382 - INFO - Processing chunk 1/1 of topic 2...
2024-12-0

2024-12-03 06:45:46,905 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:45:46,907 - INFO - Use pytorch device_name: cuda
2024-12-03 06:45:46,908 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2024-12-03 06:45:49,056 - BERTopic - Embedding - Completed ✓
2024-12-03 06:45:49,057 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:45:49,115 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:45:49,115 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:45:49,124 - BERTopic - Cluster - Completed ✓
2024-12-03 06:45:49,126 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:45:50,575 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2024-12-03 06:45:50,666 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:45:50,667 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:45:50,667 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:45:50,669 - BERTopic - Cluster - Completed ✓


2024-12-03 06:45:50,669 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:45:50,670 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:45:50,699 - INFO - Step 5: Generating reports...
2024-12-03 06:45:50,699 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:45:50,713 - INFO - Saved STIX file: Results_Topic_Para/2020.11.18_Zooming_into_Darknet_Threats_Targeting_Japanese_Organizations/topic_0/chunk_1/prediction_1.json
2024-12-03 06:45:50,714 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:45:50,728 - INFO - Saved STIX file: Results_Topic_Para/2020.11.18_Zooming_into_Darknet_Threats_Targeting_Japanese_Organizations/topic_1/chunk_1/prediction_1.json
2024-12-03 06:45:50,729 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 06:45:50,775 - INFO - Saved STIX file: Results_Topic_Para/2020.11.18_Zooming_into_Darknet_Threats_Targeting_Japanese_Organizations/topic_2/chunk_1/prediction_1.json
2024-12-03 06:45:50,776 - INFO 

2024-12-03 06:45:54,849 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:45:54,852 - INFO - Use pytorch device_name: cuda
2024-12-03 06:45:54,852 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/27 [00:00<?, ?it/s]

2024-12-03 06:45:57,585 - BERTopic - Embedding - Completed ✓
2024-12-03 06:45:57,586 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:45:58,791 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:45:58,793 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:45:58,861 - BERTopic - Cluster - Completed ✓
2024-12-03 06:45:58,864 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:46:13,244 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/27 [00:00<?, ?it/s]

2024-12-03 06:46:14,137 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:46:14,140 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:46:14,140 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:46:14,161 - BERTopic - Cluster - Completed ✓


2024-12-03 06:46:14,162 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:46:14,162 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:46:14,411 - INFO - Step 5: Generating reports...
2024-12-03 06:46:14,412 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:46:14,463 - INFO - Saved STIX file: Results_Topic_Para/Emotet_Exposed_A_Look_Inside_the_Cybercriminal_Supply_Chain/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:46:14,465 - INFO - Saved STIX file: Results_Topic_Para/Emotet_Exposed_A_Look_Inside_the_Cybercriminal_Supply_Chain/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:46:14,465 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:46:14,472 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:46:14,479 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 06:46:14,492 - INFO - Saved STIX file: Results_Topic_Para/Emotet_Exposed_A_Look_Inside_the_Cybercriminal_Supply_Chain/topic_2/chunk_1/prediction_1.json

2024-12-03 06:46:32,975 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:46:32,977 - INFO - Use pytorch device_name: cuda
2024-12-03 06:46:32,978 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/16 [00:00<?, ?it/s]

2024-12-03 06:46:35,423 - BERTopic - Embedding - Completed ✓
2024-12-03 06:46:35,423 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:46:36,142 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:46:36,143 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:46:36,182 - BERTopic - Cluster - Completed ✓
2024-12-03 06:46:36,185 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:46:41,983 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/16 [00:00<?, ?it/s]

2024-12-03 06:46:42,329 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:46:42,331 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:46:42,331 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:46:42,342 - BERTopic - Cluster - Completed ✓


2024-12-03 06:46:42,343 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:46:42,343 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:46:42,383 - INFO - Step 5: Generating reports...
2024-12-03 06:46:42,384 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:46:42,518 - INFO - Saved STIX file: Results_Topic_Para/scanv_1/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:46:42,521 - INFO - Saved STIX file: Results_Topic_Para/scanv_1/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:46:42,521 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:46:42,705 - INFO - Saved STIX file: Results_Topic_Para/scanv_1/topic_0/chunk_1/prediction_1.json
2024-12-03 06:46:42,706 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:46:42,885 - INFO - Saved STIX file: Results_Topic_Para/scanv_1/topic_1/chunk_1/prediction_1.json
2024-12-03 06:46:42,885 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 06:46:43,065 - INFO - Saved STIX 

2024-12-03 06:46:57,474 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:46:57,476 - INFO - Use pytorch device_name: cuda
2024-12-03 06:46:57,477 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 06:46:59,681 - BERTopic - Embedding - Completed ✓
2024-12-03 06:46:59,682 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:46:59,721 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:46:59,722 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:46:59,729 - BERTopic - Cluster - Completed ✓
2024-12-03 06:46:59,731 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:47:00,516 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 06:47:00,576 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:47:00,578 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:47:00,578 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:47:00,581 - BERTopic - Cluster - Completed ✓


2024-12-03 06:47:00,581 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:47:00,582 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:47:00,594 - INFO - Step 5: Generating reports...
2024-12-03 06:47:00,595 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:47:00,611 - INFO - Saved STIX file: Results_Topic_Para/The_destruction_of_APT3/topic_0/chunk_1/prediction_1.json
2024-12-03 06:47:00,612 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:47:00,875 - INFO - Saved STIX file: Results_Topic_Para/The_destruction_of_APT3/topic_1/chunk_1/prediction_1.json
2024-12-03 06:47:00,876 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 06:47:00,987 - INFO - Saved STIX file: Results_Topic_Para/The_destruction_of_APT3/topic_2/chunk_1/prediction_1.json
2024-12-03 06:47:00,988 - INFO - Processing chunk 1/1 of topic 3...
2024-12-03 06:47:01,107 - INFO - Saved STIX file: Results_Topic_Para/The_destruction_of_APT3/topic_3/chunk_1/p

2024-12-03 06:47:02,873 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:47:02,876 - INFO - Use pytorch device_name: cuda
2024-12-03 06:47:02,876 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:47:05,083 - BERTopic - Embedding - Completed ✓
2024-12-03 06:47:05,084 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:47:05,189 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:47:05,190 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:47:05,204 - BERTopic - Cluster - Completed ✓
2024-12-03 06:47:05,206 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:47:07,581 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:47:07,722 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:47:07,723 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:47:07,723 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:47:07,727 - BERTopic - Cluster - Completed ✓


2024-12-03 06:47:07,727 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:47:07,728 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:47:07,766 - INFO - Step 5: Generating reports...
2024-12-03 06:47:07,767 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:47:07,786 - INFO - Saved STIX file: Results_Topic_Para/black-ddos/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:47:07,788 - INFO - Saved STIX file: Results_Topic_Para/black-ddos/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:47:07,788 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:47:07,805 - INFO - Saved STIX file: Results_Topic_Para/black-ddos/topic_0/chunk_1/prediction_1.json
2024-12-03 06:47:07,805 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:47:07,898 - INFO - Saved STIX file: Results_Topic_Para/black-ddos/topic_1/chunk_1/prediction_1.json
2024-12-03 06:47:07,899 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 06:47:07,927 - INFO -

2024-12-03 06:47:10,805 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:47:10,807 - INFO - Use pytorch device_name: cuda
2024-12-03 06:47:10,809 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

2024-12-03 06:47:13,014 - BERTopic - Embedding - Completed ✓
2024-12-03 06:47:13,015 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:47:13,160 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:47:13,161 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:47:13,178 - BERTopic - Cluster - Completed ✓
2024-12-03 06:47:13,180 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:47:16,540 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

2024-12-03 06:47:16,817 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:47:16,818 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:47:16,819 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:47:16,824 - BERTopic - Cluster - Completed ✓


2024-12-03 06:47:16,824 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:47:16,825 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:47:16,957 - INFO - Step 5: Generating reports...
2024-12-03 06:47:16,958 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:47:16,975 - INFO - Saved STIX file: Results_Topic_Para/iranian-threat-group-updates-ttps-in-spear-phishing-campaign/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:47:16,977 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:47:17,219 - INFO - Saved STIX file: Results_Topic_Para/iranian-threat-group-updates-ttps-in-spear-phishing-campaign/topic_0/chunk_1/prediction_1.json
2024-12-03 06:47:17,222 - INFO - Saved STIX file: Results_Topic_Para/iranian-threat-group-updates-ttps-in-spear-phishing-campaign/topic_0/chunk_1/prediction_2.json
2024-12-03 06:47:17,224 - INFO - Saved STIX file: Results_Topic_Para/iranian-threat-group-updates-ttps-in-spear-phishing-campaign/top

2024-12-03 06:47:22,298 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:47:22,302 - INFO - Use pytorch device_name: cuda
2024-12-03 06:47:22,303 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:47:24,927 - BERTopic - Embedding - Completed ✓
2024-12-03 06:47:24,928 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:47:25,022 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:47:25,022 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:47:25,034 - BERTopic - Cluster - Completed ✓
2024-12-03 06:47:25,036 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:47:27,646 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:47:27,810 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:47:27,812 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:47:27,812 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:47:27,815 - BERTopic - Cluster - Completed ✓


2024-12-03 06:47:27,816 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:47:27,816 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:47:27,873 - INFO - Step 5: Generating reports...
2024-12-03 06:47:27,874 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:47:27,897 - INFO - Saved STIX file: Results_Topic_Para/CosmicStrand_ the discovery of a sophisticated UEFI firmware rootkit _ Securelist/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:47:27,898 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:47:27,906 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:47:28,106 - INFO - Saved STIX file: Results_Topic_Para/CosmicStrand_ the discovery of a sophisticated UEFI firmware rootkit _ Securelist/topic_1/chunk_1/prediction_1.json
2024-12-03 06:47:28,106 - INFO - Processing chunk 1/1 of topic 2...
2024-12-03 06:47:28,243 - INFO - Saved STIX file: Results_Topic_Para/CosmicStrand_ the discovery of a sophisticated UEF

2024-12-03 06:47:32,206 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:47:32,209 - INFO - Use pytorch device_name: cuda
2024-12-03 06:47:32,210 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

2024-12-03 06:47:34,579 - BERTopic - Embedding - Completed ✓
2024-12-03 06:47:34,585 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:47:34,766 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:47:34,767 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:47:34,784 - BERTopic - Cluster - Completed ✓
2024-12-03 06:47:34,787 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:47:38,746 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

2024-12-03 06:47:38,998 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:47:38,999 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:47:39,000 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:47:39,005 - BERTopic - Cluster - Completed ✓


2024-12-03 06:47:39,005 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:47:39,006 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:47:39,091 - INFO - Step 5: Generating reports...
2024-12-03 06:47:39,091 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:47:39,112 - INFO - Saved STIX file: Results_Topic_Para/Weaponizing a Lazarus Group Implant/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:47:39,118 - INFO - Saved STIX file: Results_Topic_Para/Weaponizing a Lazarus Group Implant/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:47:39,118 - INFO - Processing chunk 1/1 of topic 0...
2024-12-03 06:47:39,216 - INFO - Saved STIX file: Results_Topic_Para/Weaponizing a Lazarus Group Implant/topic_0/chunk_1/prediction_1.json
2024-12-03 06:47:39,217 - INFO - Processing chunk 1/1 of topic 1...
2024-12-03 06:47:39,231 - INFO - Saved STIX file: Results_Topic_Para/Weaponizing a Lazarus Group Implant/topic_1/chunk_1/prediction_1.json


# Finish