# Installing Necessary Libraries 

In [None]:
# !pip install PyMuPDF
# !pip install spacy
# !pip install stix2
# !pip install bertopic
# !pip install sentence-transformers

In [None]:
!pip install umap-learn

In [None]:
!python -m spacy download en_core_web_lg

In [None]:
spacy.cli.download("en_core_web_sm")

# Importing Libraries 

In [1]:
import pymupdf as fitz
import re
from stix2 import Indicator, AttackPattern, Relationship, Report, Bundle
import json
from datetime import datetime
import TTPelement
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
from umap.umap_ import UMAP
from bertopic import BERTopic
from hdbscan import HDBSCAN
from bertopic.representation import KeyBERTInspired, PartOfSpeech, MaximalMarginalRelevance
from sklearn.feature_extraction.text import CountVectorizer
import pickle
import pandas as pd
import torch
import pickle
import numpy as np
import os
import spacy
from transformers import DistilBertTokenizer, AutoTokenizer
from transformers import DistilBertForSequenceClassification, AutoModelForSequenceClassification
import logging
import sys
import time

In [2]:
nlp = spacy.load("en_core_web_sm")

# PDF text Extraction

In [5]:
def extract_text_from_pdf(pdf_path):
    with fitz.open(pdf_path) as doc:
        text = ""
        for page in doc:
            text += page.get_text()
    return text

In [20]:
text=extract_text_from_pdf('Reports/(3) Kimsuky is targeting an arms manufacturer in Europe. _ LinkedIn.pdf')

In [21]:
text

'Kimsuky is targeting an arms manufacturer in Europe.\nlinkedin.com/pulse/kimsuky-targeting-arms-manufacturer-europe-dmitry-melikov-dquge\nDmitry Melikov\nDmitry Melikov\nCyber threats researcher. Malware researcher.\n2 articles\nJune 7, 2024\nDate of the report (05/24/2024)\nSummary\nOn May 16, 2024, we discovered attempted intrusions targeting organizations that\nproduce weapons components in western Europe. We assess with high confidence that\nthe state-sponsored group known as Kimsuky is behind these attacks. This report details\nthe attacker\'s methods and tools and provides indicators to detect future activity.\nKey Findings:\n1)The threat actor used new espionage tools.\n2)The primary target appears to be an western European weapons manufacturer.\n3)The threat actor used the "General Dynamics" brand, a prominent military contractor, as\na visual lure.\nContext\nNorth Korean state-sponsored threat actors have long targeted weapons-producing\norganizations. At various times, Diffe

# Text Cleaning and Chunking

In [22]:
def CleanText(text):
  cleaned_text = re.sub(r'\s+', ' ', text)    # remove blank spaces
  cleaned_text = re.sub(r'[^a-zA-Z0-9\s.,!?:/()\[\]@_-]+', '', cleaned_text)     # keep only required characters

  return cleaned_text
    
def chunk_sentences(text):
    """Chunk the cleaned text into sentences using spaCy."""
    doc = nlp(text)  # Process the text with spaCy
    sentences = [sent.text.strip() for sent in doc.sents]  # Extract sentences
    return [s for s in sentences if s]  # Filter out empty strings

In [23]:
cleaned_text = CleanText(text)
clean=chunk_sentences(cleaned_text)

In [24]:
clean

['Kimsuky is targeting an arms manufacturer in Europe.',
 'linkedin.com/pulse/kimsuky-targeting-arms-manufacturer-europe-dmitry-melikov-dquge',
 'Dmitry Melikov Dmitry Melikov Cyber threats researcher.',
 'Malware researcher.',
 '2 articles June 7, 2024 Date of the report (05/24/2024) Summary On May 16, 2024, we discovered attempted intrusions targeting organizations that produce weapons components in western Europe.',
 'We assess with high confidence that the state-sponsored group known as Kimsuky is behind these attacks.',
 'This report details the attackers methods and tools and provides indicators to detect future activity.',
 'Key Findings: 1)The threat actor used new espionage tools.',
 '2)The primary target appears to be an western European weapons manufacturer.',
 '3)The threat actor used the General Dynamics brand, a prominent military contractor, as a visual lure.',
 'Context North Korean state-sponsored threat actors have long targeted weapons-producing organizations.',
 'At

# Topic Modelling

In [25]:
class StemTokenizer:
    def __init__(self):
        self.stemmer = PorterStemmer()

    def __call__(self, doc):
        return [self.stemmer.stem(t) for t in word_tokenize(doc)]

def create_pos_patterns():
    """Define POS patterns for the PartOfSpeech representation model."""
    return [
        [{'POS': 'ADJ'}, {'POS': 'NOUN'}],  # e.g., "malicious software"
        [{'POS': 'NOUN'}],                  # e.g., "malware", "ransomware"
        [{'POS': 'VERB'}, {'POS': 'NOUN'}], # e.g., "exploit vulnerability"
        [{'POS': 'NOUN'}, {'POS': 'NOUN'}], # e.g., "threat actor", "data breach"
        [{'POS': 'PROPN'}],                 # e.g., Specific group names like "APT29"
        [{'POS': 'NOUN'}, {'POS': 'PROPN'}] # e.g., "attacks by Lazarus Group"
    ]

def build_representation_model():
    """Build the representation model for BERTopic."""
    pos_patterns = create_pos_patterns()
    return {
        "Main": KeyBERTInspired(),
        "POS": [
            PartOfSpeech("en_core_web_sm", pos_patterns=pos_patterns),
            MaximalMarginalRelevance(diversity=0.4)
        ],
    }

def create_vectorizer():
    """Create a custom CountVectorizer with a stemming tokenizer."""
    return CountVectorizer(
        min_df=1,         # Capture rare words
        max_df=0.95,      # Avoid overly common words
        ngram_range=(1, 2),
        stop_words='english',
        tokenizer=StemTokenizer()
    )

def create_umap_model():
    """Create a UMAP model with adjusted parameters for smaller datasets."""
    return UMAP(
        n_neighbors=3,
        n_components=5,
        min_dist=0.0,
        metric='cosine',
        random_state=42
    )

def create_hdbscan_model():
    """Create an HDBSCAN model with adjusted parameters for smaller datasets."""
    return HDBSCAN(
        min_cluster_size=2,
        min_samples=1,
        metric='euclidean',
        cluster_selection_method='eom',
        prediction_data=True
    )

def build_topic_model(clean_data):
    """
    Build and fit a BERTopic model using the custom components.

    Parameters:
        clean_data (list of str): The preprocessed and cleaned dataset.

    Returns:
        tuple: The fitted BERTopic model, topics, and initial probabilities.
    """
    representation_model = build_representation_model()
    vectorizer_model = create_vectorizer()
    umap_model = create_umap_model()
    hdbscan_model = create_hdbscan_model()

    topic_model = BERTopic(
        nr_topics=None,  # Adjust or set to None for automatic determination
        vectorizer_model=vectorizer_model,
        representation_model=representation_model,
        umap_model=umap_model,
        hdbscan_model=hdbscan_model,
        verbose=True
    )

    # Fit the model
    topic_model.fit(clean_data)

    # Transform the data
    topics, ini_probs = topic_model.transform(clean_data)
    return topic_model, topics, ini_probs

In [26]:
# Build and fit the BERTopic model
topic_model, topics, ini_probs = build_topic_model(clean)

2024-12-03 22:36:33,968 - BERTopic - Embedding - Transforming documents to embeddings.


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2024-12-03 22:36:36,294 - BERTopic - Embedding - Completed ✓
2024-12-03 22:36:36,294 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 22:36:36,393 - BERTopic - Dimensionality - Completed ✓
2024-12-03 22:36:36,394 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 22:36:36,404 - BERTopic - Cluster - Completed ✓
2024-12-03 22:36:36,407 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 22:36:38,182 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2024-12-03 22:36:38,303 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 22:36:38,304 - BERTopic - Dimensionality - Completed ✓
2024-12-03 22:36:38,304 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 22:36:38,307 - BERTopic - Cluster - Completed ✓


In [27]:
topics = topic_model.get_representative_docs()

In [28]:
topics

{0: ['6/25/24, 3:09 PM (3) Kimsuky is targeting an arms manufacturer in Europe.',
  '6/25/24, 3:09 PM (3) Kimsuky is targeting an arms manufacturer in Europe.',
  '6/25/24, 3:09 PM (3) Kimsuky is targeting an arms manufacturer in Europe.'],
 1: ['Retrieve the full path of running processes and send it to the C2 server.',
  '4. Establish socket connections to specified IP addresses and report the connection status to the C2 server.',
  'The attacker can use this tool to: 1. Enumerate directories and files, exfiltrating the information to the C2 server.'],
 2: ['The program inscribes itself in the following registry key, ensuring its permanent presence in the system.',
  'Persistence and Communication During startup, the program creates a new service called CacheDB with the startauto parameter.',
  'HKEY_LOCAL_MACHINESoftwareMicrosoftWindowsCurrentVersionRun When establishing the connection for the very first time, the server registers the infected system with a particular unique identif

In [29]:
for topic_id, docs in topics.items():
    print(f"Topic {topic_id}:")
    for doc in docs:
        print(f"- {doc}")
    print("\n")

Topic 0:
- 6/25/24, 3:09 PM (3) Kimsuky is targeting an arms manufacturer in Europe.
- 6/25/24, 3:09 PM (3) Kimsuky is targeting an arms manufacturer in Europe.
- 6/25/24, 3:09 PM (3) Kimsuky is targeting an arms manufacturer in Europe.


Topic 1:
- Retrieve the full path of running processes and send it to the C2 server.
- 4. Establish socket connections to specified IP addresses and report the connection status to the C2 server.
- The attacker can use this tool to: 1. Enumerate directories and files, exfiltrating the information to the C2 server.


Topic 2:
- The program inscribes itself in the following registry key, ensuring its permanent presence in the system.
- Persistence and Communication During startup, the program creates a new service called CacheDB with the startauto parameter.
- HKEY_LOCAL_MACHINESoftwareMicrosoftWindowsCurrentVersionRun When establishing the connection for the very first time, the server registers the infected system with a particular unique identifier.


# IOC Extraction and Soft Tagging

In [8]:
def get_ttp_elements_from_chunks(chunks):
    elements = []
    counts = []
    replaced = []

    print(chunks)

    for chunk in chunks:
        elems, count, replaced_values = TTPelement.GetTTPelements(chunk)
        elements.append(elems)
        counts.append(count)
        replaced.append(replaced_values)
        
    print("Elements:", elements)
    print("Counts:", counts)
    print("Replaced Values:", replaced)

    return elements, counts, replaced

In [None]:
chunks=get_ttp_elements_from_chunks(text)

In [30]:
def process_dict_with_ttp(data):
    processed_results = {}  # Dictionary to store processed results

    for key, sentences in data.items():
        # Ensure sentences is a list; skip if not
        if not isinstance(sentences, list):
            raise ValueError(f"Expected a list of sentences for key '{key}', got {type(sentences)}")

        # Process the sentences using get_ttp_elements_from_chunks
        elements, counts, replaced = get_ttp_elements_from_chunks(sentences)
        
        # Structure the processed results
        processed_results[key] = {
            "Original Sentences": sentences,
            "Elements": elements,
            "Counts": counts,  # Updated key name to "Observables"
            "Replaced Sentences": replaced,
        }

    return processed_results

In [31]:
# Process the dictionary
results = process_dict_with_ttp(topics)

# Display results for each key
for key, result in results.items():
    print(f"Results for Key {key}:\n")
    for i, sentence in enumerate(result["Original Sentences"]):
        print(f"Sentence: {sentence}")
        print(f"  Elements: {result['Elements'][i]}")
        print(f"  Word Count: {result['Counts'][i]}")
        print(f"  Replaced Sentence: {result['Replaced Sentences'][i]}")
        print()


Elements: [{'ipv4': [], 'ipv6': [], 'asn': [], 'domain': [], 'email': [], 'filename': [], 'url': [], 'hash': [], 'filepath': [], 'cve': [], 'regkey': [], 'encodeencryptalgorithms': [], 'communicationprotocol': [], 'dataobject': []}, {'ipv4': [], 'ipv6': [], 'asn': [], 'domain': [], 'email': [], 'filename': [], 'url': [], 'hash': [], 'filepath': [], 'cve': [], 'regkey': [], 'encodeencryptalgorithms': [], 'communicationprotocol': [], 'dataobject': []}, {'ipv4': [], 'ipv6': [], 'asn': [], 'domain': [], 'email': [], 'filename': [], 'url': [], 'hash': [], 'filepath': [], 'cve': [], 'regkey': [], 'encodeencryptalgorithms': [], 'communicationprotocol': [], 'dataobject': []}]
Counts: [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
Replaced Values: ['6/25/24, 3:09 pm (3) kimsuky is targeting an arms manufacturer in europe.', '6/25/24, 3:09 pm (3) kimsuky is targeting an arms manufacturer in europe.', '6/25/24, 

# Model

In [8]:
# Load the model, tokenizer, and multilabel binarizer
model_dir = "distilbert-finedtune"

In [9]:
# Load the model
model = DistilBertForSequenceClassification.from_pretrained(model_dir)
model.eval()  # Set to evaluation mode

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): DistilBertSdpaAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)


In [10]:
# Load the tokenizer
tokenizer = DistilBertTokenizer.from_pretrained(model_dir)

In [11]:
# Load the multilabel binarizer
with open("multilabel_binarizer.pkl", "rb") as f:
    multilabel_binarizer = pickle.load(f)

In [12]:
# Load the CSV file to create a label-to-column mapping
csv_path = "Labels.csv"
label_data = pd.read_csv(csv_path)

In [13]:
label_data = label_data.drop(['Unnamed: 0'],axis=1)

In [14]:
# Normalize columns in the CSV
label_data['Tactic-Name'] = label_data['Tactic-Name'].str.strip().str.lower()
label_data['Technique-Name'] = label_data['Technique-Name'].str.strip().str.lower()
label_data['SubTechnique-Name'] = label_data['SubTechnique-Name'].str.strip().str.lower()
label_data['id'] = label_data['id'].str.strip().str.lower()

In [15]:
# Create a label-to-column mapping
tactic_labels = label_data['Tactic-Name'].unique().tolist()
technique_labels = label_data['Technique-Name'].unique().tolist()
sub_technique_labels = label_data['SubTechnique-Name'].unique().tolist()
technique_id_labels = label_data['id'].unique().tolist()

In [16]:
def categorize_label(label):
    label = label.strip().lower()
    if label in tactic_labels:
        return "Tactic Name"
    elif label in technique_labels:
        return "Technique Name"
    elif label in sub_technique_labels:
        return "Sub-Technique Name"
    elif label in technique_id_labels:
        return "Technique ID"
    else:
        return "Unknown"

In [17]:
def analyze_output(structured_output):
    # Normalize all values in structured_output for consistency
    structured_output = {key: [val.strip().lower() for val in value] for key, value in structured_output.items()}
    
    # Check if all four fields are given and valid
    if all(len(structured_output[key]) > 0 for key in structured_output):
        matching_rows = label_data[(label_data['Tactic-Name'].isin(structured_output['Tactic Name'])) &
                                   (label_data['Technique-Name'].isin(structured_output['Technique Name'])) &
                                   (label_data['SubTechnique-Name'].isin(structured_output['Sub-Technique Name'])) &
                                   (label_data['id'].isin(structured_output['Technique ID']))]
        if not matching_rows.empty:
            # print(f"[DEBUG] All four fields match:\n{matching_rows}\n")
            return structured_output
    
    # Handle cases with three fields given
    if len(structured_output['Tactic Name']) > 0 and len(structured_output['Technique Name']) > 0 and len(structured_output['Sub-Technique Name']) > 0:
        matching_rows = label_data[(label_data['Tactic-Name'].isin(structured_output['Tactic Name'])) &
                                   (label_data['Technique-Name'].isin(structured_output['Technique Name'])) &
                                   (label_data['SubTechnique-Name'].isin(structured_output['Sub-Technique Name']))]
        if not matching_rows.empty:
            return find_missing_labels_from_rows(structured_output, matching_rows)

    if len(structured_output['Tactic Name']) > 0 and len(structured_output['Technique Name']) > 0 and len(structured_output['Technique ID']) > 0:
        matching_rows = label_data[(label_data['Tactic-Name'].isin(structured_output['Tactic Name'])) &
                                   (label_data['Technique-Name'].isin(structured_output['Technique Name'])) &
                                   (label_data['id'].isin(structured_output['Technique ID']))]
        if not matching_rows.empty:
            return find_missing_labels_from_rows(structured_output, matching_rows)

    if len(structured_output['Tactic Name']) > 0 and len(structured_output['Sub-Technique Name']) > 0 and len(structured_output['Technique ID']) > 0:
        matching_rows = label_data[(label_data['Tactic-Name'].isin(structured_output['Tactic Name'])) &
                                   (label_data['SubTechnique-Name'].isin(structured_output['Sub-Technique Name'])) &
                                   (label_data['id'].isin(structured_output['Technique ID']))]
        if not matching_rows.empty:
            return find_missing_labels_from_rows(structured_output, matching_rows)

    if len(structured_output['Technique Name']) > 0 and len(structured_output['Sub-Technique Name']) > 0 and len(structured_output['Technique ID']) > 0:
        matching_rows = label_data[(label_data['Technique-Name'].isin(structured_output['Technique Name'])) &
                                   (label_data['SubTechnique-Name'].isin(structured_output['Sub-Technique Name'])) &
                                   (label_data['id'].isin(structured_output['Technique ID']))]
        if not matching_rows.empty:
            return find_missing_labels_from_rows(structured_output, matching_rows)

    # Handle cases with two fields given
    valid_pairs = []
    matching_rows_dict = []
    keys = list(structured_output.keys())
    for i in range(len(keys)):
        for j in range(len(keys)):
            if i != j:
                key1, key2 = keys[i], keys[j]
                values1, values2 = structured_output[key1], structured_output[key2]
                for value1 in values1:
                    for value2 in values2:
                        if key1 == "Tactic Name" and key2 == "Technique Name":
                            matching_rows = label_data[(label_data['Tactic-Name'] == value1) &
                                                       (label_data['Technique-Name'] == value2)]
                        elif key1 == "Tactic Name" and key2 == "Sub-Technique Name":
                            matching_rows = label_data[(label_data['Tactic-Name'] == value1) &
                                                       (label_data['SubTechnique-Name'] == value2)]
                        elif key1 == "Tactic Name" and key2 == "Technique ID":
                            matching_rows = label_data[(label_data['Tactic-Name'] == value1) &
                                                       (label_data['id'] == value2)]
                        elif key1 == "Technique Name" and key2 == "Sub-Technique Name":
                            matching_rows = label_data[(label_data['Technique-Name'] == value1) &
                                                       (label_data['SubTechnique-Name'] == value2)]
                        elif key1 == "Technique Name" and key2 == "Technique ID":
                            matching_rows = label_data[(label_data['Technique-Name'] == value1) &
                                                       (label_data['id'] == value2)]
                        elif key1 == "Sub-Technique Name" and key2 == "Technique ID":
                            matching_rows = label_data[(label_data['SubTechnique-Name'] == value1) &
                                                       (label_data['id'] == value2)]
                        else:
                            matching_rows = pd.DataFrame()
                        
                        if not matching_rows.empty:
                            valid_pairs.append((key1, value1, key2, value2))
                            matching_rows_dict.append(matching_rows)

    # Debug: Print valid pairs formed
    # print(f"[DEBUG] Valid pairs formed: {valid_pairs}\n")
    
    # Create new outputs for each valid pair and find missing labels
    merged_output = {"Tactic Name": [], "Technique Name": [], "Sub-Technique Name": [], "Technique ID": []}
    for idx, pair in enumerate(valid_pairs):
        key1, value1, key2, value2 = pair
        matching_rows = matching_rows_dict[idx]
        temp_output = {"Tactic Name": [], "Technique Name": [], "Sub-Technique Name": [], "Technique ID": []}
        temp_output[key1].append(value1)
        temp_output[key2].append(value2)
        completed_output = find_missing_labels_from_rows(temp_output, matching_rows)
        for key in merged_output:
            merged_output[key].extend([val for val in completed_output[key] if val not in merged_output[key]])
    
    # Handle cases with a single field given
    for key in structured_output:
        if len(structured_output[key]) > 0:
            column_name = key.replace(' ', '-')
            if column_name in label_data.columns:
                matching_rows = label_data[label_data[column_name].isin(structured_output[key])]
                if not matching_rows.empty:
                    completed_output = find_missing_labels_from_rows(structured_output, matching_rows)
                    for key in merged_output:
                        merged_output[key].extend([val for val in completed_output[key] if val not in merged_output[key]])
    
    return merged_output

In [18]:
def find_missing_labels_from_rows(filtered_output, matching_rows):
    # Normalize all values in filtered_output for consistency
    filtered_output = {key: [val.strip().lower() for val in value] for key, value in filtered_output.items()}

    # Filter rows that match all non-empty fields in filtered_output
    condition = pd.Series(True, index=matching_rows.index)
    for key in filtered_output:
        if filtered_output[key]:
            column_name = None
            if key == "Tactic Name":
                column_name = 'Tactic-Name'
            elif key == "Technique Name":
                column_name = 'Technique-Name'
            elif key == "Sub-Technique Name":
                column_name = 'SubTechnique-Name'
            elif key == "Technique ID":
                column_name = 'id'

            if column_name:
                condition &= matching_rows[column_name].isin(filtered_output[key])

    filtered_matching_rows = matching_rows[condition]

    # Debug: Print filtered matching rows
    # print(f"[DEBUG] Filtered matching rows based on existing output:\n{filtered_matching_rows}\n")

    # Find missing labels based on the filtered output using filtered matching rows
    if not filtered_matching_rows.empty:
        for _, row in filtered_matching_rows.iterrows():
            if len(filtered_output["Tactic Name"]) > 0 and len(filtered_output["Technique Name"]) > 0:
                if len(filtered_output["Sub-Technique Name"]) == 0 and pd.notna(row['SubTechnique-Name']):
                    filtered_output["Sub-Technique Name"].append(row['SubTechnique-Name'])
                if len(filtered_output["Technique ID"]) == 0 and pd.notna(row['id']):
                    filtered_output["Technique ID"].append(row['id'])

            if len(filtered_output["Tactic Name"]) > 0 and len(filtered_output["Sub-Technique Name"]) > 0:
                if len(filtered_output["Technique Name"]) == 0 and pd.notna(row['Technique-Name']):
                    filtered_output["Technique Name"].append(row['Technique-Name'])
                if len(filtered_output["Technique ID"]) == 0 and pd.notna(row['id']):
                    filtered_output["Technique ID"].append(row['id'])

            if len(filtered_output["Technique Name"]) > 0 and len(filtered_output["Sub-Technique Name"]) > 0:
                if len(filtered_output["Tactic Name"]) == 0 and pd.notna(row['Tactic-Name']):
                    filtered_output["Tactic Name"].append(row['Tactic-Name'])
                if len(filtered_output["Technique ID"]) == 0 and pd.notna(row['id']):
                    filtered_output["Technique ID"].append(row['id'])

    return filtered_output

In [19]:
def predict(text):   
    # Tokenize the input text
    encoding = tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=512)
    
    # Move to GPU if available
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    encoding = {key: value.to(device) for key, value in encoding.items()}

    # Perform inference
    with torch.no_grad():
        outputs = model(**encoding)
        logits = outputs.logits

    # Apply sigmoid to get probabilities
    sigmoid = torch.nn.Sigmoid()
    probs = sigmoid(logits).cpu().numpy()

    # Set threshold and get predictions
    threshold = 0.3
    preds = (probs >= threshold).astype(int)

    # Transform predictions back to original labels
    labels = multilabel_binarizer.inverse_transform(preds)

    #Debug: Print the model output
    # print(f"[DEBUG] Model output labels: {labels}\n")

    # Prepare structured output
    structured_output = {"Tactic Name": [], "Technique Name": [], "Sub-Technique Name": [], "Technique ID": []}
    for label_set in labels:
        for label in label_set:
            category = categorize_label(label)
            if category != "Unknown":
                structured_output[category].append(label)
    
    # Analyze the output to filter valid pairs and find missing labels
    final_output = analyze_output(structured_output)

    # Append the original text as a chunk in the final output
    final_output["Chunk"] = [text]

    return final_output

In [None]:
# Example usage
if __name__ == "__main__":
    test_text = "Phishing impersonating governmental entities as the spreading method. In some campaigns, particularly those involving financial attacks, the group impersonates banking institutions."
    predictions = predict(test_text)
    for label_type, value in predictions.items():
        print(f"{label_type}: {', '.join(value)}")

In [None]:
# Example usage
if __name__ == "__main__":
    test_text = "Sophisticated cyberattacks frequently involve phishing campaigns, where attackers utilize malicious URLs and email addresses to deceive victims."
    predictions = predict(test_text)
    for label_type, value in predictions.items():
        print(f"{label_type}: {', '.join(value)}")

In [None]:
predictions

# Final stix bundles)


In [20]:
def CreateStixIndicator(IOCtype, IOCvalue):
    # Define a mapping for IOC types to their STIX patterns
    ioc_mapping = {
        "ipv4": "[ipv4-addr:value = '{}']",
        "ipv6": "[ipv6-addr:value = '{}']",
        "asn": "[autonomous-system:number = '{}']",
        "domain": "[domain-name:value = '{}']",
        "email": "[email-addr:value = '{}']",
        "filename": "[file:name = '{}']",
        "url": "[url:value = '{}']",
        "hash": "[file:hash = '{}']",
        "filepath": "[file:file_path = '{}']",
        "cve": "[vulnerability:cve = '{}']",
        "regkey": "[windows-registry-key:key = '{}']",
        "encodeencryptalgorithms": "[crypto-algorithm:name = '{}']",
        "communicationprotocol": "[network-traffic:protocols[*] = '{}']",
        "dataobject": "[artifact:payload_bin = '{}']"
    }

    # Ensure the IOC type is valid
    if IOCtype not in ioc_mapping:
        raise ValueError(f"Unsupported IOC type: {IOCtype}")

    # Replace backslashes in IOCvalue and format the pattern
    pattern_val = ioc_mapping[IOCtype].format(IOCvalue.replace('\\', '/'))

    # Create the STIX Indicator
    indicator = Indicator(
        name=IOCtype,
        description=f"Extracted {IOCtype}",
        indicator_types=["malicious-activity"],
        pattern=pattern_val,
        labels=["malicious"],
        pattern_type="stix"
    )

    return indicator

In [21]:
def return_stix_objects(counts, tactic_name, technique_name, sub_tech_name, technique_id, iocs, chunks, do_include_non_ioc=False):
    logging.basicConfig(level=logging.INFO)

    indicators_stix = []
    relationships_stix = []
    attack_patterns_stix = []

    for i in range(len(counts)):
        # Use provided values or fallback to defaults
        tactic = tactic_name[i] if i < len(tactic_name) and tactic_name[i] else "Unknown Tactic"
        technique = technique_name[i] if i < len(technique_name) and technique_name[i] else "Unknown Technique"
        sub_tech = sub_tech_name[i] if i < len(sub_tech_name) and sub_tech_name[i] else "No Sub-Technique"
        tech_id = technique_id[i] if i < len(technique_id) and technique_id[i] else "TXXXX"
        ioc = iocs[i] if i < len(iocs) and iocs[i] else {}
        description = chunks[i] if i < len(chunks) and chunks[i] else "No description available."

        # Normalize tactic names (handle both single and multiple tactics)
        tactic_names = tactic if isinstance(tactic, list) else [tactic]

        for tactic in tactic_names:
            # Create the AttackPattern STIX object
            phase_name = tactic.lower().replace(' ', '-').replace('_', '-')
            attack_pattern = AttackPattern(
                name=f"{technique} - {sub_tech}",
                description=description,
                external_references=[{
                    "source_name": "mitre-attack",
                    "external_id": tech_id
                }],
                kill_chain_phases=[{
                    "kill_chain_name": "mitre-attack",
                    "phase_name": phase_name
                }],
                aliases=[technique, sub_tech]
            )
            attack_patterns_stix.append(attack_pattern)

            # Handle IOCs if present
            if ioc:
                for ioc_type, ioc_list in ioc.items():
                    for ioc_value in ioc_list:
                        try:
                            indicator = CreateStixIndicator(ioc_type, str(ioc_value))
                            indicators_stix.append(indicator)

                            # Create relationship between Indicator and AttackPattern
                            relationship = Relationship(
                                source_ref=indicator.id,
                                target_ref=attack_pattern.id,
                                relationship_type="indicates",
                                description=f"Indicator ({ioc_value}) is associated with Attack Pattern ({technique} - {sub_tech}).",
                                start_time=datetime.now(),
                                allow_custom=True
                            )
                            relationships_stix.append(relationship)
                        except Exception as e:
                            logging.error(f"Error creating indicator for IOC ({ioc_value}): {e}")
            elif do_include_non_ioc:
                # Include a "No IOC available" relationship if configured
                logging.info(f"No IOCs found for chunk {i}. Creating placeholder relationship.")
                relationship = Relationship(
                    source_ref=None,
                    target_ref=attack_pattern.id,
                    relationship_type="indicates",
                    description="No IOC available.",
                    start_time=datetime.now(),
                    allow_custom=True
                )
                relationships_stix.append(relationship)

    return indicators_stix, relationships_stix, attack_patterns_stix

In [22]:
def make_stix(indicators, relationships, attack_patterns, clean_text, report_name, labels=None, published=None):
    logging.basicConfig(level=logging.INFO)

    # Default values for optional parameters
    labels = labels or []
    labels.append("threat-report")
    published = published or datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%fZ')

    try:
        # Validate inputs
        if not all(isinstance(obj_list, (list, tuple)) for obj_list in [indicators, relationships, attack_patterns]):
            raise ValueError("Indicators, relationships, and attack patterns must be iterable (e.g., list or tuple).")

        # Collect all object IDs
        all_ids = [obj.id for obj in indicators + relationships + attack_patterns]

        # Log a warning if no objects exist
        if not all_ids:
            logging.warning(f"No STIX objects to include in the report '{report_name}'. Creating an empty bundle.")
            return Bundle(objects=[])

        # Create the report
        report = Report(
            name=report_name,
            description=clean_text,
            published=published,
            object_refs=all_ids,
            labels=labels,
        )

        # Collect all objects into a bundle
        objects = indicators + attack_patterns + relationships + [report]
        bundle = Bundle(objects=objects)

        return bundle

    except Exception as e:
        logging.error(f"An error occurred while creating the STIX bundle for report '{report_name}': {e}")
        return Bundle(objects=[])

# Main Workflow Code

In [None]:
chunks = [
    """
    The APT group launched a watering hole attack targeting specific industry websites to deliver malware. The IP address 45.33.32.156 was 
    identified as hosting the malicious content, alongside the domain compromised-site.com. Visitors were redirected to this domain and 
    prompted to download a fake software update named UpdateInstaller.exe, which contained malware. The malware would establish persistence 
    by creating registry keys and communicating with the C2 server at 203.0.113.60 using HTTPS.
    """,

    """
    In an escalation of their tactics, the same APT group deployed ransomware via malvertising on popular websites. Users who clicked on the ad were redirected to a download page 
    hosted at malicious-ads.io, where the ransomware executable ransom_payload_v3.2.exe was served. Once executed, the ransomware 
    encrypted files on the victim’s machine and displayed a ransom note directing the victim to contact the attacker via email at 
    support@ransom.com. During encryption, the ransomware communicated with IP addresses like 185.60.218.35 and 192.168.0.1, sending 
    encrypted session keys and victim data.
    """,

    """
    The group also launched brute-force attacks targeting SSH servers within specific critical infrastructure organizations. IP addresses such as 103.21.244.1 and 102.51.145.2 were detected 
    repeatedly attempting login attempts. Once access was obtained, the attackers deployed a reverse shell to 198.51.100.50 over port 443, 
    allowing them to control the compromised system. Further reconnaissance commands were executed to gather system information, which was 
    then exfiltrated to the command and control server. The tactic of initial access through brute force and persistence through remote 
    access tools was identified.
    """,

    """
    Phishing emails were distributed with the subject line “Account Security Notice” to lure users into clicking a malicious link. The emails, 
    sent from noreply@security-alerts.com, claimed to be from the user's bank and included a link to verify account details. The link directed 
    users to http://phishing-site.com/login, a site that mimicked the bank's login page. This page harvested login credentials, which were then 
    used in unauthorized access attempts on financial systems. This is another technique in the APT group's playbook, showing tactics of credential access through phishing and credential dumping.
    """,

    """
    A supply chain attack was discovered, revealing that the APT group compromised a legitimate software vendor’s update server. They injected malware 
    into the update package hosted on update.vendor-software.com. End users who downloaded the update unknowingly installed the malware, which 
    created a backdoor to a remote IP address 203.0.113.45. This backdoor allowed attackers to execute arbitrary commands and exfiltrate data 
    from infected systems. The tactics of lateral movement and exfiltration through command and control were observed.
    """,

    """
    Cybercriminals within the same APT group also targeted e-commerce platforms with SQL injection attacks, attempting to extract credit card details from databases. The 
    attackers operated from IP addresses 198.51.100.88 and 102.51.67.30. Upon successful extraction, the data was encrypted and exfiltrated to 
    a remote server at 203.0.113.77. The exfiltration utilized SSL to secure the data transfer, making detection more challenging. Techniques 
    of data exfiltration and defense evasion were identified in this attack.
    """,

    """
    The group distributed a banking trojan through spam campaigns that claimed to be from a well-known financial institution. The email urged recipients 
    to download a “security patch” from http://safe-banking-update.com/patch.exe, which was actually the trojan. Once installed, the trojan 
    collected browser session data, including saved passwords and autofill information, and exfiltrated this data to the server at 45.33.32.111. 
    The tactics of credential access and data collection were associated with this campaign.
    """,

    """
    Finally, a botnet campaign linked to this APT group was identified, leveraging IoT devices for launching DDoS attacks against critical services. The devices connected to IP addresses 198.51.100.20 and 
    192.0.2.12, forming a network of compromised systems. The botnet periodically sent commands to these IPs to execute coordinated DDoS attacks 
    against high-profile targets. The tactic of impact through service disruption was noted, along with persistence through IoT device compromise.
    """
]

In [None]:
# Parent directory for all STIX reports
base_output_folder = "stix_reports_1"
os.makedirs(base_output_folder, exist_ok=True)

In [None]:
def main_workflow(chunks):
    # Step 1: Extract IoCs and perform soft tagging for all chunks
    elements_list, counts_list, replaced_chunks = get_ttp_elements_from_chunks(chunks)
    
    for idx, replaced_chunk in enumerate(replaced_chunks):
        # Create a unique output folder for each report
        report_folder = os.path.join(base_output_folder, f"report_chunk_{idx+1}")
        os.makedirs(report_folder, exist_ok=True)
        
        # Extract corresponding elements and counts
        elements = elements_list[idx]
        counts = counts_list[idx]
        original_chunk = chunks[idx]
        
        # Step 2: Pass the replaced chunk to the model for prediction
        predictions = predict(replaced_chunk)
        
        # Extract predictions data
        tactic_names = predictions.get('Tactic Name', [])
        technique_names = predictions.get('Technique Name', [])
        sub_tech_names = predictions.get('Sub-Technique Name', [])
        technique_ids = predictions.get('Technique ID', [])
        
        # Adjust list lengths to be equal
        max_len = max(len(tactic_names), len(technique_names), len(sub_tech_names), len(technique_ids))
        tactic_names = (tactic_names or ["Unknown"]) * max_len if len(tactic_names) == 0 else tactic_names * (max_len // len(tactic_names))
        technique_names = (technique_names or ["Unknown"]) * max_len if len(technique_names) == 0 else technique_names * (max_len // len(technique_names))
        sub_tech_names = (sub_tech_names or ["Unknown"]) * max_len if len(sub_tech_names) == 0 else sub_tech_names * (max_len // len(sub_tech_names))
        technique_ids = (technique_ids or ["Unknown"]) * max_len if len(technique_ids) == 0 else technique_ids * (max_len // len(technique_ids))
        
        # Ensure lists are now equal in length
        tactic_names = tactic_names[:max_len]
        technique_names = technique_names[:max_len]
        sub_tech_names = sub_tech_names[:max_len]
        technique_ids = technique_ids[:max_len]
        
        # Step 3: Generate STIX indicators, relationships, and attack patterns for each prediction pair
        iocs = [elements]  # elements is a dictionary of IoCs for this chunk
        counts_list_for_stix = [counts]  # counts is a list of counts for this chunk
        chunks_for_stix = [original_chunk]  # Use the original chunk for STIX description
        do_include_non_ioc = False
        
        # Iterate over each prediction set and generate STIX objects
        for i, (tactic, technique, sub_technique, tech_id) in enumerate(
                zip(tactic_names, technique_names, sub_tech_names, technique_ids)):
            
            # Generate STIX objects
            indicators, relationships, attack_patterns = return_stix_objects(
                counts_list_for_stix,
                [tactic],
                [technique],
                [sub_technique],
                [tech_id],
                iocs,
                chunks_for_stix,
                do_include_non_ioc
            )
            
            # Step 4: Generate the STIX bundle and save as JSON
            report_name = f"report_chunk_{idx+1}_prediction_{i+1}"
            bundle = make_stix(indicators, relationships, attack_patterns, original_chunk, report_name)
            
            # Save the bundle to a JSON file in the unique report folder
            stix_file_path = os.path.join(report_folder, f"{report_name}.json")
            with open(stix_file_path, "w") as f:
                json.dump(json.loads(bundle.serialize()), f, indent=4)
            
            print(f"STIX file saved: {stix_file_path}")

# Main

In [23]:
def main_workflow(pdf_path, base_output_folder):
    logging.basicConfig(level=logging.INFO)
    logging.info(f"Processing {pdf_path} and saving results in {base_output_folder}")

    # Step 0: Extract and Clean Text
    logging.info("Step 0: Extracting text from PDF...")
    raw_text = extract_text_from_pdf(pdf_path)

    logging.info("Step 0: Cleaning the extracted text...")
    clean_text = CleanText(raw_text)

    # Step 1: Chunk the cleaned text into sentences
    logging.info("Step 1: Chunking the cleaned text into sentences...")
    chunks = chunk_sentences(clean_text)

    # Step 2: Build a Topic Model
    logging.info("Step 2: Building the topic model...")
    topic_model, topics, ini_probs = build_topic_model(chunks)

    # Step 3: Fetch representative documents
    logging.info("Step 3: Fetching representative documents for each topic...")
    representative_docs = topic_model.get_representative_docs()

    # Step 4: Process topics using IoC extraction
    logging.info("Step 4: Processing topics for IoC extraction...")
    processed_topics = process_dict_with_ttp(representative_docs)

    # Step 5: Process each topic and generate reports
    logging.info("Step 5: Generating reports...")
    for topic_id, result in processed_topics.items():
        topic_sentences = result["Original Sentences"]
        elements_list = result["Elements"]
        counts_list = result["Counts"]
        replaced_chunks = result["Replaced Sentences"]

        # Create parent folder for the topic
        topic_folder = os.path.join(base_output_folder, f"topic_{topic_id}")
        os.makedirs(topic_folder, exist_ok=True)

        for idx, replaced_chunk in enumerate(replaced_chunks):
            chunk_folder = os.path.join(topic_folder, f"chunk_{idx + 1}")
            os.makedirs(chunk_folder, exist_ok=True)

            logging.info(f"Processing chunk {idx + 1}/{len(replaced_chunks)} of topic {topic_id}...")

            # Step 6: Predict TTPs
            predictions = predict(replaced_chunk)
            if not predictions:
                logging.warning(f"No predictions available for chunk {idx + 1} of topic {topic_id}. Skipping.")
                continue

            # Extract predictions data
            tactic_names = predictions.get('Tactic Name', ["Unknown"])
            technique_names = predictions.get('Technique Name', ["Unknown"])
            sub_tech_names = predictions.get('Sub-Technique Name', ["Unknown"])
            technique_ids = predictions.get('Technique ID', ["Unknown"])

            # Ensure consistent list lengths for predictions
            max_len = max(len(tactic_names), len(technique_names), len(sub_tech_names), len(technique_ids))
            tactic_names = (tactic_names or ["Unknown"]) * max_len
            technique_names = (technique_names or ["Unknown"]) * max_len
            sub_tech_names = (sub_tech_names or ["Unknown"]) * max_len
            technique_ids = (technique_ids or ["Unknown"]) * max_len

            # Step 7: Generate STIX Objects
            iocs = [elements_list[idx]]
            counts_list_for_stix = [counts_list[idx]]
            chunks_for_stix = [replaced_chunk]
            do_include_non_ioc = False

            for i, (tactic, technique, sub_technique, tech_id) in enumerate(zip(tactic_names, technique_names, sub_tech_names, technique_ids)):
                try:
                    indicators, relationships, attack_patterns = return_stix_objects(
                        counts_list_for_stix,
                        [tactic],
                        [technique],
                        [sub_technique],
                        [tech_id],
                        iocs,
                        chunks_for_stix,
                        do_include_non_ioc
                    )

                    # Skip if no objects were created
                    if not (indicators or relationships or attack_patterns):
                        logging.warning(f"No STIX objects created for chunk {idx + 1}, prediction {i + 1}. Skipping.")
                        continue

                    # Create STIX bundle
                    report_name = f"prediction_{i + 1}"
                    bundle = make_stix(indicators, relationships, attack_patterns, replaced_chunk, report_name)

                    if bundle is None:
                        logging.warning(f"STIX bundle creation failed for chunk {idx + 1}, prediction {i + 1}.")
                        continue

                    # Save the STIX bundle
                    stix_file_path = os.path.join(chunk_folder, f"{report_name}.json")
                    with open(stix_file_path, "w") as f:
                        json.dump(json.loads(bundle.serialize()), f, indent=4)

                    logging.info(f"Saved STIX file: {stix_file_path}")

                except Exception as e:
                    logging.error(f"Error processing chunk {idx + 1}, prediction {i + 1}: {e}")

    logging.info("Workflow completed successfully!")

In [None]:
# Run main workflow
if __name__ == "__main__":
     main_workflow("aptreport.pdf", 'results')

In [24]:
def process_reports(reports_folder, results_base_folder):
    # Configure logging
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s - %(levelname)s - %(message)s",
        handlers=[
            logging.StreamHandler(sys.stdout),  # Log to console
            logging.FileHandler("processing.log", mode="w"),  # Log to file
        ],
    )

    # Create the base results folder if it doesn't exist
    os.makedirs(results_base_folder, exist_ok=True)

    logging.info(f"Starting PDF processing from folder: {reports_folder}")
    logging.info(f"Results will be saved in: {results_base_folder}\n")

    processed_files = 0
    skipped_files = 0
    errored_files = []
    total_files = len([f for f in os.listdir(reports_folder) if f.endswith(".pdf")])

    # Start time tracking
    start_time = time.time()

    # Iterate through each file in the Reports folder
    for report_file in os.listdir(reports_folder):
        if report_file.endswith(".pdf"):  # Process only PDF files
            if processed_files >= 100:  # Stop after processing 100 files
                break

            input_path = os.path.join(reports_folder, report_file)
            report_name = os.path.splitext(report_file)[0]  # Remove the file extension
            output_folder = os.path.join(results_base_folder, report_name)

            # Create a folder for each report's results
            os.makedirs(output_folder, exist_ok=True)

            logging.info(f"Processing file: {report_file}")
            logging.info(f"Input Path: {input_path}")
            logging.info(f"Output Folder: {output_folder}\n")

            try:
                # Call the main workflow for each PDF
                main_workflow(input_path, output_folder)
                processed_files += 1
                logging.info(f"Finished processing: {report_file}\n")
            except Exception as e:
                errored_files.append(report_file)
                logging.error(f"Error processing file {report_file}: {e}\n")

        else:
            skipped_files += 1
            logging.warning(f"Skipping non-PDF file: {report_file}")

    # Time taken to process first 100 reports
    end_time = time.time()
    time_taken = end_time - start_time

    # Summary
    logging.info("PDF processing completed.")
    logging.info(f"Processed files: {processed_files}")
    logging.info(f"Skipped files: {skipped_files}")
    if errored_files:
        logging.error(f"Errored files: {len(errored_files)}")
        for file in errored_files:
            logging.error(f" - {file}")

    logging.info(f"Time taken to process first 100 reports: {time_taken:.2f} seconds.")
    logging.info("All tasks have been completed.\n")

In [25]:
if __name__ == "__main__":
    # Default folder paths
    reports_folder = "Reports"
    results_base_folder = "Results_Topic_Sentences"

    # Check if the reports folder exists
    if not os.path.exists(reports_folder):
        print(f"Error: Reports folder '{reports_folder}' does not exist.")
        sys.exit(1)

    # Call the processing function
    process_reports(reports_folder, results_base_folder)

2024-12-03 06:18:25,433 - INFO - Starting PDF processing from folder: Reports
2024-12-03 06:18:25,434 - INFO - Results will be saved in: Results_Topic_Sentences

2024-12-03 06:18:25,439 - INFO - Processing file: Technical-threat-report-Arid-Viper-April-2021.pdf
2024-12-03 06:18:25,439 - INFO - Input Path: Reports/Technical-threat-report-Arid-Viper-April-2021.pdf
2024-12-03 06:18:25,440 - INFO - Output Folder: Results_Topic_Sentences/Technical-threat-report-Arid-Viper-April-2021

2024-12-03 06:18:25,440 - INFO - Processing Reports/Technical-threat-report-Arid-Viper-April-2021.pdf and saving results in Results_Topic_Sentences/Technical-threat-report-Arid-Viper-April-2021
2024-12-03 06:18:25,441 - INFO - Step 0: Extracting text from PDF...
2024-12-03 06:18:25,500 - INFO - Step 0: Cleaning the extracted text...
2024-12-03 06:18:25,506 - INFO - Step 1: Chunking the cleaned text into sentences...
2024-12-03 06:18:26,593 - INFO - Step 2: Building the topic model...


2024-12-03 06:18:27,053 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:18:27,055 - INFO - Use pytorch device_name: cuda
2024-12-03 06:18:27,056 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/8 [00:00<?, ?it/s]

2024-12-03 06:18:30,035 - BERTopic - Embedding - Completed ✓
2024-12-03 06:18:30,035 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:18:34,707 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:18:34,708 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:18:34,730 - BERTopic - Cluster - Completed ✓
2024-12-03 06:18:34,733 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:18:40,297 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/8 [00:00<?, ?it/s]

2024-12-03 06:18:40,650 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:18:40,652 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:18:40,652 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:18:40,659 - BERTopic - Cluster - Completed ✓


2024-12-03 06:18:40,660 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:18:40,660 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:18:40,802 - INFO - Step 5: Generating reports...
2024-12-03 06:18:40,803 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:18:40,881 - INFO - Saved STIX file: Results_Topic_Sentences/Technical-threat-report-Arid-Viper-April-2021/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:18:40,882 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:18:40,928 - INFO - Saved STIX file: Results_Topic_Sentences/Technical-threat-report-Arid-Viper-April-2021/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:18:40,929 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:18:40,943 - INFO - Saved STIX file: Results_Topic_Sentences/Technical-threat-report-Arid-Viper-April-2021/topic_-1/chunk_3/prediction_1.json
2024-12-03 06:18:40,947 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:18:41,068

2024-12-03 06:18:57,180 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:18:57,182 - INFO - Use pytorch device_name: cuda
2024-12-03 06:18:57,183 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

2024-12-03 06:18:59,715 - BERTopic - Embedding - Completed ✓
2024-12-03 06:18:59,715 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:18:59,880 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:18:59,881 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:18:59,897 - BERTopic - Cluster - Completed ✓
2024-12-03 06:18:59,899 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:19:03,914 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

2024-12-03 06:19:04,154 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:19:04,155 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:19:04,155 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:19:04,160 - BERTopic - Cluster - Completed ✓


2024-12-03 06:19:04,160 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:19:04,161 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:19:04,251 - INFO - Step 5: Generating reports...
2024-12-03 06:19:04,252 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:19:04,347 - INFO - Saved STIX file: Results_Topic_Sentences/2022 Adversary Infrastructure Report/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:19:04,350 - INFO - Saved STIX file: Results_Topic_Sentences/2022 Adversary Infrastructure Report/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:19:04,352 - INFO - Saved STIX file: Results_Topic_Sentences/2022 Adversary Infrastructure Report/topic_-1/chunk_1/prediction_3.json
2024-12-03 06:19:04,353 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:19:04,434 - INFO - Saved STIX file: Results_Topic_Sentences/2022 Adversary Infrastructure Report/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:19:04,436 - INFO - Saved ST

2024-12-03 06:19:22,290 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:19:22,292 - INFO - Use pytorch device_name: cuda
2024-12-03 06:19:22,293 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/23 [00:00<?, ?it/s]

2024-12-03 06:19:24,664 - BERTopic - Embedding - Completed ✓
2024-12-03 06:19:24,665 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:19:25,642 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:19:25,643 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:19:25,699 - BERTopic - Cluster - Completed ✓
2024-12-03 06:19:25,702 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:19:39,234 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/23 [00:00<?, ?it/s]

2024-12-03 06:19:40,047 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:19:40,050 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:19:40,050 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:19:40,066 - BERTopic - Cluster - Completed ✓


2024-12-03 06:19:40,067 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:19:40,067 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:19:40,378 - INFO - Step 5: Generating reports...
2024-12-03 06:19:40,379 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:19:40,469 - INFO - Saved STIX file: Results_Topic_Sentences/MacMalware_2023/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:19:40,470 - INFO - Saved STIX file: Results_Topic_Sentences/MacMalware_2023/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:19:40,471 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:19:40,484 - INFO - Saved STIX file: Results_Topic_Sentences/MacMalware_2023/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:19:40,486 - INFO - Saved STIX file: Results_Topic_Sentences/MacMalware_2023/topic_-1/chunk_2/prediction_2.json
2024-12-03 06:19:40,486 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:19:40,500 - INFO - Saved STIX file: Result

2024-12-03 06:20:17,088 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:20:17,091 - INFO - Use pytorch device_name: cuda
2024-12-03 06:20:17,091 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/8 [00:00<?, ?it/s]

2024-12-03 06:20:19,411 - BERTopic - Embedding - Completed ✓
2024-12-03 06:20:19,412 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:20:19,625 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:20:19,626 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:20:19,646 - BERTopic - Cluster - Completed ✓
2024-12-03 06:20:19,648 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:20:24,306 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/8 [00:00<?, ?it/s]

2024-12-03 06:20:24,626 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:20:24,629 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:20:24,630 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:20:24,636 - BERTopic - Cluster - Completed ✓


2024-12-03 06:20:24,637 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:20:24,637 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:20:24,758 - INFO - Step 5: Generating reports...
2024-12-03 06:20:24,759 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:20:24,772 - INFO - Saved STIX file: Results_Topic_Sentences/The-Desert-Falcons-targeted-attacks/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:20:24,774 - INFO - Saved STIX file: Results_Topic_Sentences/The-Desert-Falcons-targeted-attacks/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:20:24,775 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:20:24,793 - INFO - Saved STIX file: Results_Topic_Sentences/The-Desert-Falcons-targeted-attacks/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:20:24,797 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:20:24,861 - INFO - Saved STIX file: Results_Topic_Sentences/The-Desert-Falcons-targeted-attacks/topic_-1/ch

2024-12-03 06:20:40,202 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:20:40,204 - INFO - Use pytorch device_name: cuda
2024-12-03 06:20:40,205 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/7 [00:00<?, ?it/s]

2024-12-03 06:20:42,494 - BERTopic - Embedding - Completed ✓
2024-12-03 06:20:42,496 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:20:42,724 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:20:42,726 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:20:42,745 - BERTopic - Cluster - Completed ✓
2024-12-03 06:20:42,748 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:20:47,529 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/7 [00:00<?, ?it/s]

2024-12-03 06:20:47,847 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:20:47,849 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:20:47,851 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:20:47,857 - BERTopic - Cluster - Completed ✓


2024-12-03 06:20:47,858 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:20:47,858 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:20:48,008 - INFO - Step 5: Generating reports...
2024-12-03 06:20:48,009 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:20:48,062 - INFO - Saved STIX file: Results_Topic_Sentences/A_year_of_Russian_hybrid_warfare_in_Ukraine_MS_Threat_Intelligence/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:20:48,063 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:20:48,161 - INFO - Saved STIX file: Results_Topic_Sentences/A_year_of_Russian_hybrid_warfare_in_Ukraine_MS_Threat_Intelligence/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:20:48,164 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:20:48,207 - INFO - Saved STIX file: Results_Topic_Sentences/A_year_of_Russian_hybrid_warfare_in_Ukraine_MS_Threat_Intelligence/topic_-1/chunk_3/prediction_1.json
2024-12-03 06:20:48,208 - IN

2024-12-03 06:21:01,952 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:21:01,954 - INFO - Use pytorch device_name: cuda
2024-12-03 06:21:01,955 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/9 [00:00<?, ?it/s]

2024-12-03 06:21:04,245 - BERTopic - Embedding - Completed ✓
2024-12-03 06:21:04,246 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:21:04,509 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:21:04,511 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:21:04,535 - BERTopic - Cluster - Completed ✓
2024-12-03 06:21:04,538 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:21:09,951 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/9 [00:00<?, ?it/s]

2024-12-03 06:21:10,297 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:21:10,300 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:21:10,302 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:21:10,309 - BERTopic - Cluster - Completed ✓


2024-12-03 06:21:10,310 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:21:10,311 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:21:10,457 - INFO - Step 5: Generating reports...
2024-12-03 06:21:10,458 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:21:10,490 - INFO - Saved STIX file: Results_Topic_Sentences/aa23-347a-russian-foreign-intelligence-service-svr-exploiting-jetbrains-teamcity-cve-globally/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:21:10,492 - INFO - Saved STIX file: Results_Topic_Sentences/aa23-347a-russian-foreign-intelligence-service-svr-exploiting-jetbrains-teamcity-cve-globally/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:21:10,493 - INFO - Saved STIX file: Results_Topic_Sentences/aa23-347a-russian-foreign-intelligence-service-svr-exploiting-jetbrains-teamcity-cve-globally/topic_-1/chunk_1/prediction_3.json
2024-12-03 06:21:10,494 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:21

2024-12-03 06:21:28,527 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:21:28,529 - INFO - Use pytorch device_name: cuda
2024-12-03 06:21:28,530 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-12-03 06:21:30,771 - BERTopic - Embedding - Completed ✓
2024-12-03 06:21:30,771 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:21:30,804 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:21:30,804 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:21:30,809 - BERTopic - Cluster - Completed ✓
2024-12-03 06:21:30,812 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:21:31,417 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-12-03 06:21:31,460 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:21:31,461 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:21:31,461 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:21:31,463 - BERTopic - Cluster - Completed ✓


2024-12-03 06:21:31,463 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:21:31,464 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:21:31,475 - INFO - Step 5: Generating reports...
2024-12-03 06:21:31,476 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:21:31,667 - INFO - Saved STIX file: Results_Topic_Sentences/APT37.blog/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:21:31,670 - INFO - Saved STIX file: Results_Topic_Sentences/APT37.blog/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:21:31,670 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:21:31,852 - INFO - Saved STIX file: Results_Topic_Sentences/APT37.blog/topic_0/chunk_1/prediction_1.json
2024-12-03 06:21:31,853 - INFO - Processing chunk 2/3 of topic 0...
2024-12-03 06:21:31,868 - INFO - Saved STIX file: Results_Topic_Sentences/APT37.blog/topic_0/chunk_2/prediction_1.json
2024-12-03 06:21:31,868 - INFO - Processing chunk 3/3 of topic 0...
2024-12-03 0

2024-12-03 06:21:33,635 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:21:33,638 - INFO - Use pytorch device_name: cuda
2024-12-03 06:21:33,639 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-12-03 06:21:35,801 - BERTopic - Embedding - Completed ✓
2024-12-03 06:21:35,802 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:21:35,821 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:21:35,822 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:21:35,826 - BERTopic - Cluster - Completed ✓
2024-12-03 06:21:35,828 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:21:36,182 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-12-03 06:21:36,211 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:21:36,211 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:21:36,212 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:21:36,213 - BERTopic - Cluster - Completed ✓


2024-12-03 06:21:36,213 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:21:36,214 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:21:36,221 - INFO - Step 5: Generating reports...
2024-12-03 06:21:36,221 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:21:36,308 - INFO - Saved STIX file: Results_Topic_Sentences/Dust_Storm_Infographic_v4/topic_0/chunk_1/prediction_1.json
2024-12-03 06:21:36,310 - INFO - Saved STIX file: Results_Topic_Sentences/Dust_Storm_Infographic_v4/topic_0/chunk_1/prediction_2.json
2024-12-03 06:21:36,311 - INFO - Processing chunk 2/3 of topic 0...
2024-12-03 06:21:36,356 - INFO - Saved STIX file: Results_Topic_Sentences/Dust_Storm_Infographic_v4/topic_0/chunk_2/prediction_1.json
2024-12-03 06:21:36,357 - INFO - Processing chunk 3/3 of topic 0...
2024-12-03 06:21:36,369 - INFO - Saved STIX file: Results_Topic_Sentences/Dust_Storm_Infographic_v4/topic_0/chunk_3/prediction_1.json
2024-12-03 06:21:36,370

2024-12-03 06:21:39,179 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:21:39,181 - INFO - Use pytorch device_name: cuda
2024-12-03 06:21:39,181 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/16 [00:00<?, ?it/s]

2024-12-03 06:21:41,518 - BERTopic - Embedding - Completed ✓
2024-12-03 06:21:41,519 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:21:42,093 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:21:42,094 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:21:42,131 - BERTopic - Cluster - Completed ✓
2024-12-03 06:21:42,134 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:21:52,200 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/16 [00:00<?, ?it/s]

2024-12-03 06:21:52,752 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:21:52,754 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:21:52,754 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:21:52,765 - BERTopic - Cluster - Completed ✓


2024-12-03 06:21:52,766 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:21:52,766 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:21:52,955 - INFO - Step 5: Generating reports...
2024-12-03 06:21:52,956 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:21:52,962 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:21:52,977 - INFO - Saved STIX file: Results_Topic_Sentences/Bartholomew-GuerreroSaade-VB2016/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:21:52,979 - INFO - Saved STIX file: Results_Topic_Sentences/Bartholomew-GuerreroSaade-VB2016/topic_-1/chunk_2/prediction_2.json
2024-12-03 06:21:52,979 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:21:52,993 - INFO - Saved STIX file: Results_Topic_Sentences/Bartholomew-GuerreroSaade-VB2016/topic_-1/chunk_3/prediction_1.json
2024-12-03 06:21:52,994 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:21:53,000 - INFO - Processing chunk 2/3 of topic

2024-12-03 06:22:23,205 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:22:23,208 - INFO - Use pytorch device_name: cuda
2024-12-03 06:22:23,208 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

2024-12-03 06:22:25,431 - BERTopic - Embedding - Completed ✓
2024-12-03 06:22:25,432 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:22:25,542 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:22:25,543 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:22:25,554 - BERTopic - Cluster - Completed ✓
2024-12-03 06:22:25,556 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:22:28,107 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

2024-12-03 06:22:28,274 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:22:28,275 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:22:28,275 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:22:28,278 - BERTopic - Cluster - Completed ✓


2024-12-03 06:22:28,279 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:22:28,279 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:22:28,334 - INFO - Step 5: Generating reports...
2024-12-03 06:22:28,334 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:22:28,349 - INFO - Saved STIX file: Results_Topic_Sentences/callisto-group/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:22:28,349 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:22:28,510 - INFO - Saved STIX file: Results_Topic_Sentences/callisto-group/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:22:28,513 - INFO - Saved STIX file: Results_Topic_Sentences/callisto-group/topic_-1/chunk_2/prediction_2.json
2024-12-03 06:22:28,517 - INFO - Saved STIX file: Results_Topic_Sentences/callisto-group/topic_-1/chunk_2/prediction_3.json
2024-12-03 06:22:28,519 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:22:28,598 - INFO - Saved STIX file: Results_To

2024-12-03 06:22:36,443 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:22:36,445 - INFO - Use pytorch device_name: cuda
2024-12-03 06:22:36,445 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2024-12-03 06:22:38,607 - BERTopic - Embedding - Completed ✓
2024-12-03 06:22:38,608 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:22:38,666 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:22:38,667 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:22:38,674 - BERTopic - Cluster - Completed ✓
2024-12-03 06:22:38,676 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:22:40,095 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2024-12-03 06:22:40,214 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:22:40,215 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:22:40,216 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:22:40,219 - BERTopic - Cluster - Completed ✓


2024-12-03 06:22:40,220 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:22:40,220 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:22:40,271 - INFO - Step 5: Generating reports...
2024-12-03 06:22:40,272 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:22:40,372 - INFO - Saved STIX file: Results_Topic_Sentences/Musical Chairs Playing Tetris/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:22:40,373 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:22:40,403 - INFO - Saved STIX file: Results_Topic_Sentences/Musical Chairs Playing Tetris/topic_0/chunk_1/prediction_1.json
2024-12-03 06:22:40,410 - INFO - Saved STIX file: Results_Topic_Sentences/Musical Chairs Playing Tetris/topic_0/chunk_1/prediction_2.json
2024-12-03 06:22:40,412 - INFO - Processing chunk 2/3 of topic 0...
2024-12-03 06:22:40,450 - INFO - Saved STIX file: Results_Topic_Sentences/Musical Chairs Playing Tetris/topic_0/chunk_2/prediction_1.json
2024-

2024-12-03 06:22:47,029 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:22:47,031 - INFO - Use pytorch device_name: cuda
2024-12-03 06:22:47,031 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

2024-12-03 06:22:49,204 - BERTopic - Embedding - Completed ✓
2024-12-03 06:22:49,208 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:22:49,330 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:22:49,330 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:22:49,342 - BERTopic - Cluster - Completed ✓
2024-12-03 06:22:49,344 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:22:52,409 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

2024-12-03 06:22:52,604 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:22:52,605 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:22:52,606 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:22:52,609 - BERTopic - Cluster - Completed ✓


2024-12-03 06:22:52,610 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:22:52,610 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:22:52,687 - INFO - Step 5: Generating reports...
2024-12-03 06:22:52,689 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:22:52,703 - INFO - Saved STIX file: Results_Topic_Sentences/BlueDelta Exploits Ukrainian Government Roundcube Mail Servers to Support Espionage Activities/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:22:52,704 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:22:52,728 - INFO - Saved STIX file: Results_Topic_Sentences/BlueDelta Exploits Ukrainian Government Roundcube Mail Servers to Support Espionage Activities/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:22:52,729 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:22:52,751 - INFO - Saved STIX file: Results_Topic_Sentences/BlueDelta Exploits Ukrainian Government Roundcube Mail Servers to Support 

2024-12-03 06:23:01,799 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:23:01,802 - INFO - Use pytorch device_name: cuda
2024-12-03 06:23:01,804 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/23 [00:00<?, ?it/s]

2024-12-03 06:23:04,343 - BERTopic - Embedding - Completed ✓
2024-12-03 06:23:04,344 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:23:05,312 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:23:05,314 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:23:05,370 - BERTopic - Cluster - Completed ✓
2024-12-03 06:23:05,373 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:23:19,223 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/23 [00:00<?, ?it/s]

2024-12-03 06:23:19,990 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:23:19,993 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:23:19,993 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:23:20,008 - BERTopic - Cluster - Completed ✓


2024-12-03 06:23:20,008 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:23:20,009 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:23:20,305 - INFO - Step 5: Generating reports...
2024-12-03 06:23:20,306 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:23:20,318 - INFO - Saved STIX file: Results_Topic_Sentences/ESET_Threat_Report_Q32020/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:23:20,319 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:23:20,354 - INFO - Saved STIX file: Results_Topic_Sentences/ESET_Threat_Report_Q32020/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:23:20,356 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:23:20,371 - INFO - Saved STIX file: Results_Topic_Sentences/ESET_Threat_Report_Q32020/topic_-1/chunk_3/prediction_1.json
2024-12-03 06:23:20,373 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:23:20,394 - INFO - Saved STIX file: Results_Topic_Sentences/ESET_Thre

2024-12-03 06:23:57,159 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:23:57,163 - INFO - Use pytorch device_name: cuda
2024-12-03 06:23:57,163 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/8 [00:00<?, ?it/s]

2024-12-03 06:23:59,401 - BERTopic - Embedding - Completed ✓
2024-12-03 06:23:59,402 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:23:59,826 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:23:59,826 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:23:59,845 - BERTopic - Cluster - Completed ✓
2024-12-03 06:23:59,847 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:24:03,926 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/8 [00:00<?, ?it/s]

2024-12-03 06:24:04,233 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:24:04,235 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:24:04,235 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:24:04,240 - BERTopic - Cluster - Completed ✓


2024-12-03 06:24:04,241 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:24:04,242 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:24:04,353 - INFO - Step 5: Generating reports...
2024-12-03 06:24:04,354 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:24:04,367 - INFO - Saved STIX file: Results_Topic_Sentences/xHunt Campaign_ New BumbleBee Webshell and SSH Tunnels Used for Lateral Movement/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:24:04,370 - INFO - Saved STIX file: Results_Topic_Sentences/xHunt Campaign_ New BumbleBee Webshell and SSH Tunnels Used for Lateral Movement/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:24:04,371 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:24:04,468 - INFO - Saved STIX file: Results_Topic_Sentences/xHunt Campaign_ New BumbleBee Webshell and SSH Tunnels Used for Lateral Movement/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:24:04,469 - INFO - Processing chunk 3/3 o

2024-12-03 06:24:19,426 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:24:19,428 - INFO - Use pytorch device_name: cuda
2024-12-03 06:24:19,428 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2024-12-03 06:24:21,614 - BERTopic - Embedding - Completed ✓
2024-12-03 06:24:21,616 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:24:21,691 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:24:21,692 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:24:21,702 - BERTopic - Cluster - Completed ✓
2024-12-03 06:24:21,705 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:24:23,521 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2024-12-03 06:24:23,658 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:24:23,661 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:24:23,661 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:24:23,665 - BERTopic - Cluster - Completed ✓


2024-12-03 06:24:23,666 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:24:23,667 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:24:23,709 - INFO - Step 5: Generating reports...
2024-12-03 06:24:23,710 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:24:24,005 - INFO - Saved STIX file: Results_Topic_Sentences/Iranian Fileless Attack Infiltrates Israeli Organizations/topic_0/chunk_1/prediction_1.json
2024-12-03 06:24:24,006 - INFO - Saved STIX file: Results_Topic_Sentences/Iranian Fileless Attack Infiltrates Israeli Organizations/topic_0/chunk_1/prediction_2.json
2024-12-03 06:24:24,008 - INFO - Saved STIX file: Results_Topic_Sentences/Iranian Fileless Attack Infiltrates Israeli Organizations/topic_0/chunk_1/prediction_3.json
2024-12-03 06:24:24,009 - INFO - Saved STIX file: Results_Topic_Sentences/Iranian Fileless Attack Infiltrates Israeli Organizations/topic_0/chunk_1/prediction_4.json
2024-12-03 06:24:24,010 - INFO 

2024-12-03 06:24:28,940 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:24:28,943 - INFO - Use pytorch device_name: cuda
2024-12-03 06:24:28,943 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-12-03 06:24:31,285 - BERTopic - Embedding - Completed ✓
2024-12-03 06:24:31,287 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:24:31,314 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:24:31,315 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:24:31,319 - BERTopic - Cluster - Completed ✓
2024-12-03 06:24:31,321 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:24:31,529 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-12-03 06:24:31,560 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:24:31,561 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:24:31,561 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:24:31,563 - BERTopic - Cluster - Completed ✓


2024-12-03 06:24:31,564 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:24:31,564 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:24:31,569 - INFO - Step 5: Generating reports...
2024-12-03 06:24:31,570 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:24:31,682 - INFO - Saved STIX file: Results_Topic_Sentences/US23-Heiding-Devicing-and-Detecting-Phishing/topic_0/chunk_1/prediction_1.json
2024-12-03 06:24:31,683 - INFO - Processing chunk 2/3 of topic 0...
2024-12-03 06:24:31,795 - INFO - Saved STIX file: Results_Topic_Sentences/US23-Heiding-Devicing-and-Detecting-Phishing/topic_0/chunk_2/prediction_1.json
2024-12-03 06:24:31,796 - INFO - Processing chunk 3/3 of topic 0...
2024-12-03 06:24:31,909 - INFO - Saved STIX file: Results_Topic_Sentences/US23-Heiding-Devicing-and-Detecting-Phishing/topic_0/chunk_3/prediction_1.json
2024-12-03 06:24:31,910 - INFO - Processing chunk 1/3 of topic 1...
2024-12-03 06:24:32,201 - INFO -

2024-12-03 06:24:34,340 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:24:34,342 - INFO - Use pytorch device_name: cuda
2024-12-03 06:24:34,343 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

2024-12-03 06:24:36,591 - BERTopic - Embedding - Completed ✓
2024-12-03 06:24:36,591 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:24:36,742 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:24:36,743 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:24:36,760 - BERTopic - Cluster - Completed ✓
2024-12-03 06:24:36,762 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:24:41,062 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

2024-12-03 06:24:41,340 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:24:41,343 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:24:41,344 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:24:41,350 - BERTopic - Cluster - Completed ✓


2024-12-03 06:24:41,351 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:24:41,352 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:24:41,470 - INFO - Step 5: Generating reports...
2024-12-03 06:24:41,472 - INFO - Processing chunk 1/2 of topic -1...
2024-12-03 06:24:41,486 - INFO - Saved STIX file: Results_Topic_Sentences/RedFoxtrot_group/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:24:41,487 - INFO - Processing chunk 2/2 of topic -1...
2024-12-03 06:24:41,593 - INFO - Saved STIX file: Results_Topic_Sentences/RedFoxtrot_group/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:24:41,594 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:24:41,708 - INFO - Saved STIX file: Results_Topic_Sentences/RedFoxtrot_group/topic_0/chunk_1/prediction_1.json
2024-12-03 06:24:41,709 - INFO - Processing chunk 2/3 of topic 0...
2024-12-03 06:24:41,763 - INFO - Saved STIX file: Results_Topic_Sentences/RedFoxtrot_group/topic_0/chunk_2/predi

2024-12-03 06:24:54,086 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:24:54,089 - INFO - Use pytorch device_name: cuda
2024-12-03 06:24:54,089 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:24:56,275 - BERTopic - Embedding - Completed ✓
2024-12-03 06:24:56,276 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:24:56,375 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:24:56,375 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:24:56,386 - BERTopic - Cluster - Completed ✓
2024-12-03 06:24:56,388 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:24:59,007 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:24:59,186 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:24:59,187 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:24:59,188 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:24:59,191 - BERTopic - Cluster - Completed ✓


2024-12-03 06:24:59,192 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:24:59,194 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:24:59,264 - INFO - Step 5: Generating reports...
2024-12-03 06:24:59,265 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:24:59,273 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:24:59,289 - INFO - Saved STIX file: Results_Topic_Sentences/cta-cn-2024-0624/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:24:59,290 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:24:59,341 - INFO - Saved STIX file: Results_Topic_Sentences/cta-cn-2024-0624/topic_-1/chunk_3/prediction_1.json
2024-12-03 06:24:59,342 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:24:59,393 - INFO - Saved STIX file: Results_Topic_Sentences/cta-cn-2024-0624/topic_0/chunk_1/prediction_1.json
2024-12-03 06:24:59,394 - INFO - Processing chunk 2/3 of topic 0...
2024-12-03 06:24:59,401 - INFO - Processing

2024-12-03 06:25:05,676 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:25:05,678 - INFO - Use pytorch device_name: cuda
2024-12-03 06:25:05,679 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

2024-12-03 06:25:07,947 - BERTopic - Embedding - Completed ✓
2024-12-03 06:25:07,949 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:25:08,079 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:25:08,080 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:25:08,093 - BERTopic - Cluster - Completed ✓
2024-12-03 06:25:08,096 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:25:11,297 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

2024-12-03 06:25:11,508 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:25:11,510 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:25:11,510 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:25:11,514 - BERTopic - Cluster - Completed ✓


2024-12-03 06:25:11,515 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:25:11,516 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:25:11,618 - INFO - Step 5: Generating reports...
2024-12-03 06:25:11,619 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:25:11,639 - INFO - Saved STIX file: Results_Topic_Sentences/Iron Tiger APT Updates Toolkit With Evolved SysUpdate Malware/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:25:11,641 - INFO - Saved STIX file: Results_Topic_Sentences/Iron Tiger APT Updates Toolkit With Evolved SysUpdate Malware/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:25:11,643 - INFO - Saved STIX file: Results_Topic_Sentences/Iron Tiger APT Updates Toolkit With Evolved SysUpdate Malware/topic_-1/chunk_1/prediction_3.json
2024-12-03 06:25:11,644 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:25:11,765 - INFO - Saved STIX file: Results_Topic_Sentences/Iron Tiger APT Updates Toolkit With Ev

2024-12-03 06:25:22,697 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:25:22,699 - INFO - Use pytorch device_name: cuda
2024-12-03 06:25:22,700 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/14 [00:00<?, ?it/s]

2024-12-03 06:25:25,030 - BERTopic - Embedding - Completed ✓
2024-12-03 06:25:25,032 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:25:25,521 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:25:25,522 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:25:25,554 - BERTopic - Cluster - Completed ✓
2024-12-03 06:25:25,560 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:25:34,058 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/14 [00:00<?, ?it/s]

2024-12-03 06:25:34,583 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:25:34,585 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:25:34,588 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:25:34,598 - BERTopic - Cluster - Completed ✓


2024-12-03 06:25:34,599 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:25:34,599 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:25:34,816 - INFO - Step 5: Generating reports...
2024-12-03 06:25:34,817 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:25:34,834 - INFO - Saved STIX file: Results_Topic_Sentences/rpt-apt38-2018-web_v4/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:25:34,835 - INFO - Saved STIX file: Results_Topic_Sentences/rpt-apt38-2018-web_v4/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:25:34,839 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:25:34,972 - INFO - Saved STIX file: Results_Topic_Sentences/rpt-apt38-2018-web_v4/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:25:34,974 - INFO - Saved STIX file: Results_Topic_Sentences/rpt-apt38-2018-web_v4/topic_-1/chunk_2/prediction_2.json
2024-12-03 06:25:34,979 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:25:35,162 - INFO -

2024-12-03 06:26:03,693 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:26:03,697 - INFO - Use pytorch device_name: cuda
2024-12-03 06:26:03,698 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/34 [00:00<?, ?it/s]

2024-12-03 06:26:06,203 - BERTopic - Embedding - Completed ✓
2024-12-03 06:26:06,204 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:26:08,031 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:26:08,032 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:26:08,112 - BERTopic - Cluster - Completed ✓
2024-12-03 06:26:08,114 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:26:26,442 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/34 [00:00<?, ?it/s]

2024-12-03 06:26:27,568 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:26:27,571 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:26:27,573 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:26:27,595 - BERTopic - Cluster - Completed ✓


2024-12-03 06:26:27,598 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:26:27,598 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:26:28,073 - INFO - Step 5: Generating reports...
2024-12-03 06:26:28,073 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:26:28,088 - INFO - Saved STIX file: Results_Topic_Sentences/Kaspersky_Lab_crouching_yeti_appendixes_eng_final/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:26:28,089 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:26:28,186 - INFO - Saved STIX file: Results_Topic_Sentences/Kaspersky_Lab_crouching_yeti_appendixes_eng_final/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:26:28,189 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:26:28,205 - INFO - Saved STIX file: Results_Topic_Sentences/Kaspersky_Lab_crouching_yeti_appendixes_eng_final/topic_-1/chunk_3/prediction_1.json
2024-12-03 06:26:28,207 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 

2024-12-03 06:27:29,992 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:27:29,994 - INFO - Use pytorch device_name: cuda
2024-12-03 06:27:29,995 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/9 [00:00<?, ?it/s]

2024-12-03 06:27:32,661 - BERTopic - Embedding - Completed ✓
2024-12-03 06:27:32,662 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:27:32,979 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:27:32,981 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:27:33,003 - BERTopic - Cluster - Completed ✓
2024-12-03 06:27:33,005 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:27:36,746 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/9 [00:00<?, ?it/s]

2024-12-03 06:27:37,050 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:27:37,056 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:27:37,056 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:27:37,063 - BERTopic - Cluster - Completed ✓


2024-12-03 06:27:37,064 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:27:37,064 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:27:37,135 - INFO - Step 5: Generating reports...
2024-12-03 06:27:37,137 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:27:37,382 - INFO - Saved STIX file: Results_Topic_Sentences/20151028_codeblue_apt-en/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:27:37,384 - INFO - Saved STIX file: Results_Topic_Sentences/20151028_codeblue_apt-en/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:27:37,387 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:27:37,458 - INFO - Saved STIX file: Results_Topic_Sentences/20151028_codeblue_apt-en/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:27:37,458 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:27:37,572 - INFO - Saved STIX file: Results_Topic_Sentences/20151028_codeblue_apt-en/topic_-1/chunk_3/prediction_1.json
2024-12-03 06:27:37,

2024-12-03 06:27:54,462 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:27:54,464 - INFO - Use pytorch device_name: cuda
2024-12-03 06:27:54,465 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/16 [00:00<?, ?it/s]

2024-12-03 06:27:56,796 - BERTopic - Embedding - Completed ✓
2024-12-03 06:27:56,796 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:27:57,357 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:27:57,357 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:27:57,395 - BERTopic - Cluster - Completed ✓
2024-12-03 06:27:57,397 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:28:06,917 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/16 [00:00<?, ?it/s]

2024-12-03 06:28:07,485 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:28:07,486 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:28:07,487 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:28:07,497 - BERTopic - Cluster - Completed ✓


2024-12-03 06:28:07,497 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:28:07,498 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:28:07,715 - INFO - Step 5: Generating reports...
2024-12-03 06:28:07,715 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:28:07,895 - INFO - Saved STIX file: Results_Topic_Sentences/ICS_eng/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:28:07,900 - INFO - Saved STIX file: Results_Topic_Sentences/ICS_eng/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:28:07,900 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:28:07,915 - INFO - Saved STIX file: Results_Topic_Sentences/ICS_eng/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:28:07,916 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:28:07,933 - INFO - Saved STIX file: Results_Topic_Sentences/ICS_eng/topic_-1/chunk_3/prediction_1.json
2024-12-03 06:28:07,934 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:28:07,

2024-12-03 06:28:41,790 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:28:41,793 - INFO - Use pytorch device_name: cuda
2024-12-03 06:28:41,794 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

2024-12-03 06:28:44,006 - BERTopic - Embedding - Completed ✓
2024-12-03 06:28:44,007 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:28:44,122 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:28:44,122 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:28:44,136 - BERTopic - Cluster - Completed ✓
2024-12-03 06:28:44,138 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:28:46,981 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

2024-12-03 06:28:47,173 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:28:47,176 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:28:47,176 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:28:47,181 - BERTopic - Cluster - Completed ✓


2024-12-03 06:28:47,182 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:28:47,183 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:28:47,246 - INFO - Step 5: Generating reports...
2024-12-03 06:28:47,247 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:28:47,262 - INFO - Saved STIX file: Results_Topic_Sentences/FTA 1007 - Shamoon/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:28:47,263 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:28:47,277 - INFO - Saved STIX file: Results_Topic_Sentences/FTA 1007 - Shamoon/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:28:47,278 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:28:47,283 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:28:47,311 - INFO - Saved STIX file: Results_Topic_Sentences/FTA 1007 - Shamoon/topic_0/chunk_1/prediction_1.json
2024-12-03 06:28:47,313 - INFO - Saved STIX file: Results_Topic_Sentences/FTA 1007 - Shamoon/topic_0/chun

2024-12-03 06:28:58,770 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:28:58,772 - INFO - Use pytorch device_name: cuda
2024-12-03 06:28:58,772 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/16 [00:00<?, ?it/s]

2024-12-03 06:29:01,080 - BERTopic - Embedding - Completed ✓
2024-12-03 06:29:01,081 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:29:01,606 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:29:01,607 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:29:01,647 - BERTopic - Cluster - Completed ✓
2024-12-03 06:29:01,650 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:29:11,431 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/16 [00:00<?, ?it/s]

2024-12-03 06:29:11,993 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:29:11,996 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:29:11,997 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:29:12,007 - BERTopic - Cluster - Completed ✓


2024-12-03 06:29:12,008 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:29:12,008 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:29:12,206 - INFO - Step 5: Generating reports...
2024-12-03 06:29:12,207 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:29:12,255 - INFO - Saved STIX file: Results_Topic_Sentences/2022-Blockchain-Security-and-AML-Analysis-Annual-Report(EN)/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:29:12,255 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:29:12,367 - INFO - Saved STIX file: Results_Topic_Sentences/2022-Blockchain-Security-and-AML-Analysis-Annual-Report(EN)/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:29:12,368 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:29:12,411 - INFO - Saved STIX file: Results_Topic_Sentences/2022-Blockchain-Security-and-AML-Analysis-Annual-Report(EN)/topic_-1/chunk_3/prediction_1.json
2024-12-03 06:29:12,412 - INFO - Processing chunk

2024-12-03 06:29:41,685 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:29:41,688 - INFO - Use pytorch device_name: cuda
2024-12-03 06:29:41,688 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/10 [00:00<?, ?it/s]

2024-12-03 06:29:43,927 - BERTopic - Embedding - Completed ✓
2024-12-03 06:29:43,928 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:29:44,233 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:29:44,234 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:29:44,258 - BERTopic - Cluster - Completed ✓
2024-12-03 06:29:44,261 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:29:49,234 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/10 [00:00<?, ?it/s]

2024-12-03 06:29:49,536 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:29:49,538 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:29:49,538 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:29:49,545 - BERTopic - Cluster - Completed ✓


2024-12-03 06:29:49,546 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:29:49,546 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:29:49,680 - INFO - Step 5: Generating reports...
2024-12-03 06:29:49,680 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:29:49,696 - INFO - Saved STIX file: Results_Topic_Sentences/20231013_Lazarus_OP.Dream_Magic/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:29:49,697 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:29:49,703 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:29:49,803 - INFO - Saved STIX file: Results_Topic_Sentences/20231013_Lazarus_OP.Dream_Magic/topic_-1/chunk_3/prediction_1.json
2024-12-03 06:29:49,804 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:29:49,811 - INFO - Processing chunk 2/3 of topic 0...
2024-12-03 06:29:49,838 - INFO - Saved STIX file: Results_Topic_Sentences/20231013_Lazarus_OP.Dream_Magic/topic_0/chunk_2/prediction_1.jso

2024-12-03 06:30:17,069 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:30:17,071 - INFO - Use pytorch device_name: cuda
2024-12-03 06:30:17,072 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:30:19,277 - BERTopic - Embedding - Completed ✓
2024-12-03 06:30:19,278 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:30:19,383 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:30:19,385 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:30:19,398 - BERTopic - Cluster - Completed ✓
2024-12-03 06:30:19,400 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:30:21,789 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:30:21,933 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:30:21,935 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:30:21,935 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:30:21,938 - BERTopic - Cluster - Completed ✓


2024-12-03 06:30:21,939 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:30:21,941 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:30:21,988 - INFO - Step 5: Generating reports...
2024-12-03 06:30:21,989 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:30:22,037 - INFO - Saved STIX file: Results_Topic_Sentences/Sahel-Gabon-Coup-Playbook-PDF/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:30:22,037 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:30:22,123 - INFO - Saved STIX file: Results_Topic_Sentences/Sahel-Gabon-Coup-Playbook-PDF/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:30:22,124 - INFO - Saved STIX file: Results_Topic_Sentences/Sahel-Gabon-Coup-Playbook-PDF/topic_-1/chunk_2/prediction_2.json
2024-12-03 06:30:22,125 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:30:22,216 - INFO - Saved STIX file: Results_Topic_Sentences/Sahel-Gabon-Coup-Playbook-PDF/topic_-1/chunk_3/prediction_1.json


2024-12-03 06:30:28,349 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:30:28,352 - INFO - Use pytorch device_name: cuda
2024-12-03 06:30:28,352 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 06:30:30,595 - BERTopic - Embedding - Completed ✓
2024-12-03 06:30:30,596 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:30:30,648 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:30:30,649 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:30:30,657 - BERTopic - Cluster - Completed ✓
2024-12-03 06:30:30,660 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:30:31,967 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 06:30:32,066 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:30:32,068 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:30:32,069 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:30:32,072 - BERTopic - Cluster - Completed ✓


2024-12-03 06:30:32,073 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:30:32,073 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:30:32,108 - INFO - Step 5: Generating reports...
2024-12-03 06:30:32,109 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:30:32,122 - INFO - Saved STIX file: Results_Topic_Sentences/Inception Attackers Target Europe with Year-old Office Vulnerability/topic_0/chunk_1/prediction_1.json
2024-12-03 06:30:32,123 - INFO - Processing chunk 2/3 of topic 0...
2024-12-03 06:30:32,139 - INFO - Saved STIX file: Results_Topic_Sentences/Inception Attackers Target Europe with Year-old Office Vulnerability/topic_0/chunk_2/prediction_1.json
2024-12-03 06:30:32,140 - INFO - Processing chunk 3/3 of topic 0...
2024-12-03 06:30:32,191 - INFO - Saved STIX file: Results_Topic_Sentences/Inception Attackers Target Europe with Year-old Office Vulnerability/topic_0/chunk_3/prediction_1.json
2024-12-03 06:30:32,194 - IN

2024-12-03 06:30:36,502 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:30:36,504 - INFO - Use pytorch device_name: cuda
2024-12-03 06:30:36,505 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:30:38,759 - BERTopic - Embedding - Completed ✓
2024-12-03 06:30:38,759 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:30:38,867 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:30:38,867 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:30:38,879 - BERTopic - Cluster - Completed ✓
2024-12-03 06:30:38,882 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:30:41,913 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:30:42,086 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:30:42,089 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:30:42,092 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:30:42,097 - BERTopic - Cluster - Completed ✓


2024-12-03 06:30:42,098 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:30:42,098 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:30:42,159 - INFO - Step 5: Generating reports...
2024-12-03 06:30:42,161 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:30:42,227 - INFO - Saved STIX file: Results_Topic_Sentences/Evil Eye Threat Actor Resurfaces with iOS Exploit and Updated Implant/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:30:42,229 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:30:42,245 - INFO - Saved STIX file: Results_Topic_Sentences/Evil Eye Threat Actor Resurfaces with iOS Exploit and Updated Implant/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:30:42,246 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:30:42,346 - INFO - Saved STIX file: Results_Topic_Sentences/Evil Eye Threat Actor Resurfaces with iOS Exploit and Updated Implant/topic_-1/chunk_3/prediction_1.json
2024-12-03 06:30:42

2024-12-03 06:30:49,365 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:30:49,369 - INFO - Use pytorch device_name: cuda
2024-12-03 06:30:49,369 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/7 [00:00<?, ?it/s]

2024-12-03 06:30:51,755 - BERTopic - Embedding - Completed ✓
2024-12-03 06:30:51,756 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:30:51,951 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:30:51,952 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:30:51,971 - BERTopic - Cluster - Completed ✓
2024-12-03 06:30:51,974 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:30:56,104 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/7 [00:00<?, ?it/s]

2024-12-03 06:30:56,385 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:30:56,386 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:30:56,387 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:30:56,392 - BERTopic - Cluster - Completed ✓


2024-12-03 06:30:56,392 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:30:56,393 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:30:56,507 - INFO - Step 5: Generating reports...
2024-12-03 06:30:56,508 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:30:56,528 - INFO - Saved STIX file: Results_Topic_Sentences/THOR_ Previously Unseen PlugX Variant Deployed During Microsoft Exchange Server Attacks by PKPLUG Group/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:30:56,532 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:30:56,634 - INFO - Saved STIX file: Results_Topic_Sentences/THOR_ Previously Unseen PlugX Variant Deployed During Microsoft Exchange Server Attacks by PKPLUG Group/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:30:56,635 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:30:56,652 - INFO - Saved STIX file: Results_Topic_Sentences/THOR_ Previously Unseen PlugX Variant Deployed During Mi

2024-12-03 06:31:18,859 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:31:18,861 - INFO - Use pytorch device_name: cuda
2024-12-03 06:31:18,862 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/99 [00:00<?, ?it/s]

2024-12-03 06:31:22,550 - BERTopic - Embedding - Completed ✓
2024-12-03 06:31:22,551 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:31:33,857 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:31:33,858 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:31:34,164 - BERTopic - Cluster - Completed ✓
2024-12-03 06:31:34,168 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:32:39,177 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/99 [00:00<?, ?it/s]

2024-12-03 06:32:42,820 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:32:42,828 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:32:42,829 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:32:42,897 - BERTopic - Cluster - Completed ✓


2024-12-03 06:32:42,900 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:32:42,901 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:32:44,693 - INFO - Step 5: Generating reports...
2024-12-03 06:32:44,694 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:32:44,702 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:32:44,717 - INFO - Saved STIX file: Results_Topic_Sentences/A_Threat_Actor_Encyclopedia/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:32:44,719 - INFO - Saved STIX file: Results_Topic_Sentences/A_Threat_Actor_Encyclopedia/topic_-1/chunk_2/prediction_2.json
2024-12-03 06:32:44,719 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:32:44,733 - INFO - Saved STIX file: Results_Topic_Sentences/A_Threat_Actor_Encyclopedia/topic_-1/chunk_3/prediction_1.json
2024-12-03 06:32:44,735 - INFO - Saved STIX file: Results_Topic_Sentences/A_Threat_Actor_Encyclopedia/topic_-1/chunk_3/prediction_2.json
2024-12-

2024-12-03 06:35:14,151 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:35:14,153 - INFO - Use pytorch device_name: cuda
2024-12-03 06:35:14,154 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

2024-12-03 06:35:16,479 - BERTopic - Embedding - Completed ✓
2024-12-03 06:35:16,480 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:35:16,597 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:35:16,597 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:35:16,610 - BERTopic - Cluster - Completed ✓
2024-12-03 06:35:16,612 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:35:19,428 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

2024-12-03 06:35:19,611 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:35:19,612 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:35:19,613 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:35:19,617 - BERTopic - Cluster - Completed ✓


2024-12-03 06:35:19,618 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:35:19,620 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:35:19,685 - INFO - Step 5: Generating reports...
2024-12-03 06:35:19,686 - INFO - Processing chunk 1/2 of topic -1...
2024-12-03 06:35:19,862 - INFO - Saved STIX file: Results_Topic_Sentences/ChessMaster Adds Updated Tools to Its Arsenal/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:35:19,864 - INFO - Saved STIX file: Results_Topic_Sentences/ChessMaster Adds Updated Tools to Its Arsenal/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:35:19,866 - INFO - Saved STIX file: Results_Topic_Sentences/ChessMaster Adds Updated Tools to Its Arsenal/topic_-1/chunk_1/prediction_3.json
2024-12-03 06:35:19,866 - INFO - Processing chunk 2/2 of topic -1...
2024-12-03 06:35:19,882 - INFO - Saved STIX file: Results_Topic_Sentences/ChessMaster Adds Updated Tools to Its Arsenal/topic_-1/chunk_2/prediction_1.json
2024-

2024-12-03 06:35:27,869 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:35:27,872 - INFO - Use pytorch device_name: cuda
2024-12-03 06:35:27,876 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:35:30,110 - BERTopic - Embedding - Completed ✓
2024-12-03 06:35:30,111 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:35:30,216 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:35:30,217 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:35:30,229 - BERTopic - Cluster - Completed ✓
2024-12-03 06:35:30,231 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:35:32,680 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:35:32,849 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:35:32,850 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:35:32,850 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:35:32,853 - BERTopic - Cluster - Completed ✓


2024-12-03 06:35:32,854 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:35:32,854 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:35:32,944 - INFO - Step 5: Generating reports...
2024-12-03 06:35:32,945 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:35:32,969 - INFO - Saved STIX file: Results_Topic_Sentences/[S2W LAB] Analysis of Clop Ransomware suspiciously related to the Recent Incident (English)/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:35:32,970 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:35:32,977 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:35:33,085 - INFO - Saved STIX file: Results_Topic_Sentences/[S2W LAB] Analysis of Clop Ransomware suspiciously related to the Recent Incident (English)/topic_-1/chunk_3/prediction_1.json
2024-12-03 06:35:33,086 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:35:33,101 - INFO - Saved STIX file: Results_Topic_Sentences/[S2W LAB] An

2024-12-03 06:35:41,658 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:35:41,661 - INFO - Use pytorch device_name: cuda
2024-12-03 06:35:41,661 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/9 [00:00<?, ?it/s]

2024-12-03 06:35:43,940 - BERTopic - Embedding - Completed ✓
2024-12-03 06:35:43,940 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:35:44,192 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:35:44,193 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:35:44,225 - BERTopic - Cluster - Completed ✓
2024-12-03 06:35:44,228 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:35:50,042 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/9 [00:00<?, ?it/s]

2024-12-03 06:35:50,392 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:35:50,395 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:35:50,395 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:35:50,401 - BERTopic - Cluster - Completed ✓


2024-12-03 06:35:50,402 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:35:50,403 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:35:50,535 - INFO - Step 5: Generating reports...
2024-12-03 06:35:50,536 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:35:50,582 - INFO - Saved STIX file: Results_Topic_Sentences/swift_bae_report_Follow-The Money/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:35:50,583 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:35:50,630 - INFO - Saved STIX file: Results_Topic_Sentences/swift_bae_report_Follow-The Money/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:35:50,631 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:35:50,773 - INFO - Saved STIX file: Results_Topic_Sentences/swift_bae_report_Follow-The Money/topic_-1/chunk_3/prediction_1.json
2024-12-03 06:35:50,775 - INFO - Saved STIX file: Results_Topic_Sentences/swift_bae_report_Follow-The Money/topic_-1/chunk_3/pr

2024-12-03 06:36:06,390 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:36:06,392 - INFO - Use pytorch device_name: cuda
2024-12-03 06:36:06,393 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/8 [00:00<?, ?it/s]

2024-12-03 06:36:08,707 - BERTopic - Embedding - Completed ✓
2024-12-03 06:36:08,709 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:36:08,942 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:36:08,943 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:36:08,963 - BERTopic - Cluster - Completed ✓
2024-12-03 06:36:08,965 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:36:13,732 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/8 [00:00<?, ?it/s]

2024-12-03 06:36:14,021 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:36:14,023 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:36:14,024 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:36:14,030 - BERTopic - Cluster - Completed ✓


2024-12-03 06:36:14,031 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:36:14,031 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:36:14,146 - INFO - Step 5: Generating reports...
2024-12-03 06:36:14,147 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:36:14,278 - INFO - Saved STIX file: Results_Topic_Sentences/Operation Soft Cell_ A Worldwide Campaign Against Telecommunications Providers/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:36:14,280 - INFO - Saved STIX file: Results_Topic_Sentences/Operation Soft Cell_ A Worldwide Campaign Against Telecommunications Providers/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:36:14,280 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:36:14,331 - INFO - Saved STIX file: Results_Topic_Sentences/Operation Soft Cell_ A Worldwide Campaign Against Telecommunications Providers/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:36:14,333 - INFO - Saved STIX file: Results_Top

2024-12-03 06:36:29,225 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:36:29,227 - INFO - Use pytorch device_name: cuda
2024-12-03 06:36:29,228 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/7 [00:00<?, ?it/s]

2024-12-03 06:36:31,823 - BERTopic - Embedding - Completed ✓
2024-12-03 06:36:31,823 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:36:32,011 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:36:32,012 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:36:32,030 - BERTopic - Cluster - Completed ✓
2024-12-03 06:36:32,032 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:36:36,208 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/7 [00:00<?, ?it/s]

2024-12-03 06:36:36,464 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:36:36,466 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:36:36,466 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:36:36,471 - BERTopic - Cluster - Completed ✓


2024-12-03 06:36:36,472 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:36:36,473 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:36:36,572 - INFO - Step 5: Generating reports...
2024-12-03 06:36:36,573 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:36:36,586 - INFO - Saved STIX file: Results_Topic_Sentences/wp-global-energy-cyberattacks-night-dragon/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:36:36,586 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:36:36,712 - INFO - Saved STIX file: Results_Topic_Sentences/wp-global-energy-cyberattacks-night-dragon/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:36:36,714 - INFO - Saved STIX file: Results_Topic_Sentences/wp-global-energy-cyberattacks-night-dragon/topic_-1/chunk_2/prediction_2.json
2024-12-03 06:36:36,714 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:36:36,780 - INFO - Saved STIX file: Results_Topic_Sentences/wp-global-energy-cyberatta

2024-12-03 06:36:50,369 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:36:50,372 - INFO - Use pytorch device_name: cuda
2024-12-03 06:36:50,373 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/13 [00:00<?, ?it/s]

2024-12-03 06:36:52,683 - BERTopic - Embedding - Completed ✓
2024-12-03 06:36:52,686 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:36:53,104 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:36:53,105 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:36:53,139 - BERTopic - Cluster - Completed ✓
2024-12-03 06:36:53,142 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:37:01,582 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/13 [00:00<?, ?it/s]

2024-12-03 06:37:02,085 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:37:02,091 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:37:02,092 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:37:02,101 - BERTopic - Cluster - Completed ✓


2024-12-03 06:37:02,101 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:37:02,102 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:37:02,306 - INFO - Step 5: Generating reports...
2024-12-03 06:37:02,307 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:37:02,471 - INFO - Saved STIX file: Results_Topic_Sentences/chinese_darkweb_analysis/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:37:02,473 - INFO - Saved STIX file: Results_Topic_Sentences/chinese_darkweb_analysis/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:37:02,474 - INFO - Saved STIX file: Results_Topic_Sentences/chinese_darkweb_analysis/topic_-1/chunk_1/prediction_3.json
2024-12-03 06:37:02,476 - INFO - Saved STIX file: Results_Topic_Sentences/chinese_darkweb_analysis/topic_-1/chunk_1/prediction_4.json
2024-12-03 06:37:02,477 - INFO - Saved STIX file: Results_Topic_Sentences/chinese_darkweb_analysis/topic_-1/chunk_1/prediction_5.json
2024-12-03 06:37:02,477 

2024-12-03 06:37:22,621 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:37:22,624 - INFO - Use pytorch device_name: cuda
2024-12-03 06:37:22,625 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

2024-12-03 06:37:25,256 - BERTopic - Embedding - Completed ✓
2024-12-03 06:37:25,257 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:37:25,399 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:37:25,400 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:37:25,414 - BERTopic - Cluster - Completed ✓
2024-12-03 06:37:25,416 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:37:28,829 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

2024-12-03 06:37:29,063 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:37:29,065 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:37:29,066 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:37:29,071 - BERTopic - Cluster - Completed ✓


2024-12-03 06:37:29,072 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:37:29,074 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:37:29,175 - INFO - Step 5: Generating reports...
2024-12-03 06:37:29,176 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:37:29,332 - INFO - Saved STIX file: Results_Topic_Sentences/In Pursuit of Optical Fibers and Troop Intel_ Targeted Attack Distributes PlugX in Russia _ Proofpoint/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:37:29,337 - INFO - Saved STIX file: Results_Topic_Sentences/In Pursuit of Optical Fibers and Troop Intel_ Targeted Attack Distributes PlugX in Russia _ Proofpoint/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:37:29,338 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:37:29,540 - INFO - Saved STIX file: Results_Topic_Sentences/In Pursuit of Optical Fibers and Troop Intel_ Targeted Attack Distributes PlugX in Russia _ Proofpoint/topic_-1/chunk_2/predict

2024-12-03 06:37:40,430 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:37:40,432 - INFO - Use pytorch device_name: cuda
2024-12-03 06:37:40,432 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/8 [00:00<?, ?it/s]

2024-12-03 06:37:43,071 - BERTopic - Embedding - Completed ✓
2024-12-03 06:37:43,072 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:37:43,288 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:37:43,289 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:37:43,308 - BERTopic - Cluster - Completed ✓
2024-12-03 06:37:43,310 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:37:47,709 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/8 [00:00<?, ?it/s]

2024-12-03 06:37:48,004 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:37:48,008 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:37:48,010 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:37:48,016 - BERTopic - Cluster - Completed ✓


2024-12-03 06:37:48,017 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:37:48,018 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:37:48,101 - INFO - Step 5: Generating reports...
2024-12-03 06:37:48,102 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:37:48,216 - INFO - Saved STIX file: Results_Topic_Sentences/rpt-china-chopper/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:37:48,217 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:37:48,377 - INFO - Saved STIX file: Results_Topic_Sentences/rpt-china-chopper/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:37:48,383 - INFO - Saved STIX file: Results_Topic_Sentences/rpt-china-chopper/topic_-1/chunk_2/prediction_2.json
2024-12-03 06:37:48,385 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:37:48,405 - INFO - Saved STIX file: Results_Topic_Sentences/rpt-china-chopper/topic_-1/chunk_3/prediction_1.json
2024-12-03 06:37:48,406 - INFO - Processing chun

2024-12-03 06:38:05,289 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:38:05,293 - INFO - Use pytorch device_name: cuda
2024-12-03 06:38:05,294 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/18 [00:00<?, ?it/s]

2024-12-03 06:38:07,618 - BERTopic - Embedding - Completed ✓
2024-12-03 06:38:07,619 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:38:08,338 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:38:08,338 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:38:08,383 - BERTopic - Cluster - Completed ✓
2024-12-03 06:38:08,386 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:38:19,128 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/18 [00:00<?, ?it/s]

2024-12-03 06:38:19,752 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:38:19,754 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:38:19,754 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:38:19,765 - BERTopic - Cluster - Completed ✓


2024-12-03 06:38:19,766 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:38:19,767 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:38:20,004 - INFO - Step 5: Generating reports...
2024-12-03 06:38:20,005 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:38:20,083 - INFO - Saved STIX file: Results_Topic_Sentences/Targeted_Attacks_Lense_NGO/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:38:20,084 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:38:20,097 - INFO - Saved STIX file: Results_Topic_Sentences/Targeted_Attacks_Lense_NGO/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:38:20,098 - INFO - Saved STIX file: Results_Topic_Sentences/Targeted_Attacks_Lense_NGO/topic_-1/chunk_2/prediction_2.json
2024-12-03 06:38:20,099 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:38:20,111 - INFO - Saved STIX file: Results_Topic_Sentences/Targeted_Attacks_Lense_NGO/topic_-1/chunk_3/prediction_1.json
2024-12-03 0

2024-12-03 06:38:53,677 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:38:53,679 - INFO - Use pytorch device_name: cuda
2024-12-03 06:38:53,680 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/20 [00:00<?, ?it/s]

2024-12-03 06:38:56,041 - BERTopic - Embedding - Completed ✓
2024-12-03 06:38:56,042 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:38:56,827 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:38:56,828 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:38:56,877 - BERTopic - Cluster - Completed ✓
2024-12-03 06:38:56,880 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:39:10,509 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/20 [00:00<?, ?it/s]

2024-12-03 06:39:11,432 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:39:11,437 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:39:11,438 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:39:11,455 - BERTopic - Cluster - Completed ✓


2024-12-03 06:39:11,456 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:39:11,457 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:39:11,912 - INFO - Step 5: Generating reports...
2024-12-03 06:39:11,913 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:39:11,928 - INFO - Saved STIX file: Results_Topic_Sentences/Offensive-Cyber-Capabilities-Proliferation-Report/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:39:11,930 - INFO - Saved STIX file: Results_Topic_Sentences/Offensive-Cyber-Capabilities-Proliferation-Report/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:39:11,930 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:39:11,996 - INFO - Saved STIX file: Results_Topic_Sentences/Offensive-Cyber-Capabilities-Proliferation-Report/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:39:11,997 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:39:12,011 - INFO - Saved STIX file: Results_Topic_Sentences/Offen

2024-12-03 06:39:47,005 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:39:47,007 - INFO - Use pytorch device_name: cuda
2024-12-03 06:39:47,008 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

2024-12-03 06:39:49,211 - BERTopic - Embedding - Completed ✓
2024-12-03 06:39:49,217 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:39:49,442 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:39:49,443 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:39:49,461 - BERTopic - Cluster - Completed ✓
2024-12-03 06:39:49,464 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:39:52,463 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

2024-12-03 06:39:52,675 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:39:52,676 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:39:52,676 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:39:52,680 - BERTopic - Cluster - Completed ✓


2024-12-03 06:39:52,681 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:39:52,684 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:39:52,768 - INFO - Step 5: Generating reports...
2024-12-03 06:39:52,769 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:39:52,869 - INFO - Saved STIX file: Results_Topic_Sentences/Hunting-Libyan-Scorpions-EN/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:39:52,869 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:39:52,875 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:39:52,894 - INFO - Saved STIX file: Results_Topic_Sentences/Hunting-Libyan-Scorpions-EN/topic_-1/chunk_3/prediction_1.json
2024-12-03 06:39:52,894 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:39:53,117 - INFO - Saved STIX file: Results_Topic_Sentences/Hunting-Libyan-Scorpions-EN/topic_0/chunk_1/prediction_1.json
2024-12-03 06:39:53,122 - INFO - Saved STIX file: Results_Topic_Sentences/Hunt

2024-12-03 06:40:01,388 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:40:01,390 - INFO - Use pytorch device_name: cuda
2024-12-03 06:40:01,390 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 06:40:03,930 - BERTopic - Embedding - Completed ✓
2024-12-03 06:40:03,934 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:40:03,986 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:40:03,987 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:40:03,995 - BERTopic - Cluster - Completed ✓
2024-12-03 06:40:03,997 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:40:05,125 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 06:40:05,214 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:40:05,217 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:40:05,218 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:40:05,221 - BERTopic - Cluster - Completed ✓


2024-12-03 06:40:05,221 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:40:05,223 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:40:05,252 - INFO - Step 5: Generating reports...
2024-12-03 06:40:05,253 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:40:05,347 - INFO - Saved STIX file: Results_Topic_Sentences/massive-admedia-iframe-javascript-infection.html/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:40:05,348 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:40:05,354 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:40:05,468 - INFO - Saved STIX file: Results_Topic_Sentences/massive-admedia-iframe-javascript-infection.html/topic_-1/chunk_3/prediction_1.json
2024-12-03 06:40:05,470 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:40:05,644 - INFO - Saved STIX file: Results_Topic_Sentences/massive-admedia-iframe-javascript-infection.html/topic_0/chunk_1/prediction_1.json
2024-12-03 06:4

2024-12-03 06:40:10,602 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:40:10,605 - INFO - Use pytorch device_name: cuda
2024-12-03 06:40:10,605 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2024-12-03 06:40:12,958 - BERTopic - Embedding - Completed ✓
2024-12-03 06:40:12,959 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:40:13,029 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:40:13,030 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:40:13,039 - BERTopic - Cluster - Completed ✓
2024-12-03 06:40:13,042 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:40:14,698 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2024-12-03 06:40:14,820 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:40:14,821 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:40:14,821 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:40:14,823 - BERTopic - Cluster - Completed ✓


2024-12-03 06:40:14,824 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:40:14,824 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:40:14,860 - INFO - Step 5: Generating reports...
2024-12-03 06:40:14,860 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:40:14,932 - INFO - Saved STIX file: Results_Topic_Sentences/JSAC2020_3_takai_jp/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:40:14,933 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:40:15,028 - INFO - Saved STIX file: Results_Topic_Sentences/JSAC2020_3_takai_jp/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:40:15,029 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:40:15,138 - INFO - Saved STIX file: Results_Topic_Sentences/JSAC2020_3_takai_jp/topic_-1/chunk_3/prediction_1.json
2024-12-03 06:40:15,139 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:40:15,154 - INFO - Saved STIX file: Results_Topic_Sentences/JSAC2020_3_takai_jp/topic_0

2024-12-03 06:40:23,228 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:40:23,230 - INFO - Use pytorch device_name: cuda
2024-12-03 06:40:23,231 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2024-12-03 06:40:25,467 - BERTopic - Embedding - Completed ✓
2024-12-03 06:40:25,467 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:40:25,538 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:40:25,538 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:40:25,547 - BERTopic - Cluster - Completed ✓
2024-12-03 06:40:25,549 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:40:27,310 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2024-12-03 06:40:27,430 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:40:27,433 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:40:27,434 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:40:27,437 - BERTopic - Cluster - Completed ✓


2024-12-03 06:40:27,438 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:40:27,440 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:40:27,480 - INFO - Step 5: Generating reports...
2024-12-03 06:40:27,481 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:40:27,495 - INFO - Saved STIX file: Results_Topic_Sentences/MysterySnail attacks with Windows zero-day _ Securelist/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:40:27,496 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:40:27,509 - INFO - Saved STIX file: Results_Topic_Sentences/MysterySnail attacks with Windows zero-day _ Securelist/topic_0/chunk_1/prediction_1.json
2024-12-03 06:40:27,510 - INFO - Processing chunk 2/3 of topic 0...
2024-12-03 06:40:27,526 - INFO - Saved STIX file: Results_Topic_Sentences/MysterySnail attacks with Windows zero-day _ Securelist/topic_0/chunk_2/prediction_1.json
2024-12-03 06:40:27,526 - INFO - Processing chunk 3/3 of topic 0.

2024-12-03 06:40:33,466 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:40:33,469 - INFO - Use pytorch device_name: cuda
2024-12-03 06:40:33,469 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/24 [00:00<?, ?it/s]

2024-12-03 06:40:35,839 - BERTopic - Embedding - Completed ✓
2024-12-03 06:40:35,840 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:40:36,901 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:40:36,902 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:40:36,960 - BERTopic - Cluster - Completed ✓
2024-12-03 06:40:36,963 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:40:48,825 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/24 [00:00<?, ?it/s]

2024-12-03 06:40:49,557 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:40:49,560 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:40:49,560 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:40:49,576 - BERTopic - Cluster - Completed ✓


2024-12-03 06:40:49,577 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:40:49,579 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:40:49,806 - INFO - Step 5: Generating reports...
2024-12-03 06:40:49,807 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:40:49,823 - INFO - Saved STIX file: Results_Topic_Sentences/C5_APT_SKHack/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:40:49,824 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:40:49,839 - INFO - Saved STIX file: Results_Topic_Sentences/C5_APT_SKHack/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:40:49,839 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:40:49,934 - INFO - Saved STIX file: Results_Topic_Sentences/C5_APT_SKHack/topic_-1/chunk_3/prediction_1.json
2024-12-03 06:40:49,935 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:40:50,044 - INFO - Saved STIX file: Results_Topic_Sentences/C5_APT_SKHack/topic_0/chunk_1/prediction_1.js

2024-12-03 06:41:44,194 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:41:44,198 - INFO - Use pytorch device_name: cuda
2024-12-03 06:41:44,198 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/21 [00:00<?, ?it/s]

2024-12-03 06:41:46,589 - BERTopic - Embedding - Completed ✓
2024-12-03 06:41:46,590 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:41:47,518 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:41:47,519 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:41:47,570 - BERTopic - Cluster - Completed ✓
2024-12-03 06:41:47,573 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:41:57,476 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/21 [00:00<?, ?it/s]

2024-12-03 06:41:58,075 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:41:58,077 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:41:58,078 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:41:58,091 - BERTopic - Cluster - Completed ✓


2024-12-03 06:41:58,092 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:41:58,093 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:41:58,321 - INFO - Step 5: Generating reports...
2024-12-03 06:41:58,322 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:41:58,335 - INFO - Saved STIX file: Results_Topic_Sentences/Talos_Cobalt_Strike/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:41:58,335 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:41:58,347 - INFO - Saved STIX file: Results_Topic_Sentences/Talos_Cobalt_Strike/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:41:58,349 - INFO - Saved STIX file: Results_Topic_Sentences/Talos_Cobalt_Strike/topic_-1/chunk_2/prediction_2.json
2024-12-03 06:41:58,350 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:41:58,355 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:41:58,542 - INFO - Saved STIX file: Results_Topic_Sentences/Talos_Cobalt_Strike/topic_0

2024-12-03 06:42:32,582 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:42:32,584 - INFO - Use pytorch device_name: cuda
2024-12-03 06:42:32,585 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

2024-12-03 06:42:34,759 - BERTopic - Embedding - Completed ✓
2024-12-03 06:42:34,760 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:42:34,900 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:42:34,901 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:42:34,916 - BERTopic - Cluster - Completed ✓
2024-12-03 06:42:34,918 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:42:37,976 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

2024-12-03 06:42:38,173 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:42:38,174 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:42:38,175 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:42:38,180 - BERTopic - Cluster - Completed ✓


2024-12-03 06:42:38,180 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:42:38,180 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:42:38,252 - INFO - Step 5: Generating reports...
2024-12-03 06:42:38,253 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:42:38,268 - INFO - Saved STIX file: Results_Topic_Sentences/sophos-rotten-tomato-campaign/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:42:38,269 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:42:38,284 - INFO - Saved STIX file: Results_Topic_Sentences/sophos-rotten-tomato-campaign/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:42:38,284 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:42:38,344 - INFO - Saved STIX file: Results_Topic_Sentences/sophos-rotten-tomato-campaign/topic_-1/chunk_3/prediction_1.json
2024-12-03 06:42:38,346 - INFO - Saved STIX file: Results_Topic_Sentences/sophos-rotten-tomato-campaign/topic_-1/chunk_3/prediction_2.json


2024-12-03 06:42:47,309 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:42:47,312 - INFO - Use pytorch device_name: cuda
2024-12-03 06:42:47,312 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

2024-12-03 06:42:49,515 - BERTopic - Embedding - Completed ✓
2024-12-03 06:42:49,516 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:42:49,676 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:42:49,677 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:42:49,691 - BERTopic - Cluster - Completed ✓
2024-12-03 06:42:49,694 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:42:53,053 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

2024-12-03 06:42:53,284 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:42:53,285 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:42:53,285 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:42:53,290 - BERTopic - Cluster - Completed ✓


2024-12-03 06:42:53,290 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:42:53,291 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:42:53,378 - INFO - Step 5: Generating reports...
2024-12-03 06:42:53,379 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:42:53,401 - INFO - Saved STIX file: Results_Topic_Sentences/Cryptocurrency-Miners-XMRig-Based-CoinMiner-by-Blue-Mockingbird-Group/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:42:53,402 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:42:53,417 - INFO - Saved STIX file: Results_Topic_Sentences/Cryptocurrency-Miners-XMRig-Based-CoinMiner-by-Blue-Mockingbird-Group/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:42:53,417 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:42:53,449 - INFO - Saved STIX file: Results_Topic_Sentences/Cryptocurrency-Miners-XMRig-Based-CoinMiner-by-Blue-Mockingbird-Group/topic_-1/chunk_3/prediction_1.json
2024-12-03 06:42:53

2024-12-03 06:43:02,768 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:43:02,770 - INFO - Use pytorch device_name: cuda
2024-12-03 06:43:02,771 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/21 [00:00<?, ?it/s]

2024-12-03 06:43:05,209 - BERTopic - Embedding - Completed ✓
2024-12-03 06:43:05,209 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:43:06,076 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:43:06,077 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:43:06,128 - BERTopic - Cluster - Completed ✓
2024-12-03 06:43:06,133 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:43:15,177 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/21 [00:00<?, ?it/s]

2024-12-03 06:43:15,699 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:43:15,702 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:43:15,703 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:43:15,717 - BERTopic - Cluster - Completed ✓


2024-12-03 06:43:15,718 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:43:15,719 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:43:15,816 - INFO - Step 5: Generating reports...
2024-12-03 06:43:15,817 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:43:15,834 - INFO - Saved STIX file: Results_Topic_Sentences/Secrets_of_the_Comfoo_Masters/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:43:15,836 - INFO - Saved STIX file: Results_Topic_Sentences/Secrets_of_the_Comfoo_Masters/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:43:15,837 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:43:16,020 - INFO - Saved STIX file: Results_Topic_Sentences/Secrets_of_the_Comfoo_Masters/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:43:16,021 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:43:16,037 - INFO - Saved STIX file: Results_Topic_Sentences/Secrets_of_the_Comfoo_Masters/topic_-1/chunk_3/prediction_1.json


2024-12-03 06:44:12,915 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:44:12,917 - INFO - Use pytorch device_name: cuda
2024-12-03 06:44:12,920 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:44:15,141 - BERTopic - Embedding - Completed ✓
2024-12-03 06:44:15,143 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:44:15,241 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:44:15,242 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:44:15,255 - BERTopic - Cluster - Completed ✓
2024-12-03 06:44:15,257 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:44:17,859 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:44:18,043 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:44:18,045 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:44:18,046 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:44:18,049 - BERTopic - Cluster - Completed ✓


2024-12-03 06:44:18,050 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:44:18,050 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:44:18,127 - INFO - Step 5: Generating reports...
2024-12-03 06:44:18,128 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:44:18,266 - INFO - Saved STIX file: Results_Topic_Sentences/TAG-74/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:44:18,267 - INFO - Saved STIX file: Results_Topic_Sentences/TAG-74/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:44:18,269 - INFO - Saved STIX file: Results_Topic_Sentences/TAG-74/topic_-1/chunk_1/prediction_3.json
2024-12-03 06:44:18,270 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:44:18,289 - INFO - Saved STIX file: Results_Topic_Sentences/TAG-74/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:44:18,290 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:44:18,305 - INFO - Saved STIX file: Results_Topic_Sentences/TAG-74/topic_-1/ch

2024-12-03 06:44:27,855 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:44:27,857 - INFO - Use pytorch device_name: cuda
2024-12-03 06:44:27,857 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/8 [00:00<?, ?it/s]

2024-12-03 06:44:30,198 - BERTopic - Embedding - Completed ✓
2024-12-03 06:44:30,200 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:44:30,439 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:44:30,439 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:44:30,457 - BERTopic - Cluster - Completed ✓
2024-12-03 06:44:30,460 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:44:35,035 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/8 [00:00<?, ?it/s]

2024-12-03 06:44:35,333 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:44:35,336 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:44:35,336 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:44:35,343 - BERTopic - Cluster - Completed ✓


2024-12-03 06:44:35,344 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:44:35,345 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:44:35,451 - INFO - Step 5: Generating reports...
2024-12-03 06:44:35,453 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:44:35,711 - INFO - Saved STIX file: Results_Topic_Sentences/tenable-cloud-risk-report-2024/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:44:35,713 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:44:35,823 - INFO - Saved STIX file: Results_Topic_Sentences/tenable-cloud-risk-report-2024/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:44:35,824 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:44:35,847 - INFO - Saved STIX file: Results_Topic_Sentences/tenable-cloud-risk-report-2024/topic_-1/chunk_3/prediction_1.json
2024-12-03 06:44:35,849 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:44:35,861 - INFO - Saved STIX file: Results_Topic_Sent

2024-12-03 06:44:49,725 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:44:49,728 - INFO - Use pytorch device_name: cuda
2024-12-03 06:44:49,728 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 06:44:52,012 - BERTopic - Embedding - Completed ✓
2024-12-03 06:44:52,012 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:44:52,062 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:44:52,063 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:44:52,070 - BERTopic - Cluster - Completed ✓
2024-12-03 06:44:52,072 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:44:53,218 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 06:44:53,306 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:44:53,309 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:44:53,311 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:44:53,315 - BERTopic - Cluster - Completed ✓


2024-12-03 06:44:53,316 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:44:53,320 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:44:53,347 - INFO - Step 5: Generating reports...
2024-12-03 06:44:53,348 - INFO - Processing chunk 1/2 of topic -1...
2024-12-03 06:44:53,363 - INFO - Saved STIX file: Results_Topic_Sentences/Tracking Tick Through Recent Campaigns Targeting East Asia/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:44:53,366 - INFO - Saved STIX file: Results_Topic_Sentences/Tracking Tick Through Recent Campaigns Targeting East Asia/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:44:53,367 - INFO - Processing chunk 2/2 of topic -1...
2024-12-03 06:44:53,476 - INFO - Saved STIX file: Results_Topic_Sentences/Tracking Tick Through Recent Campaigns Targeting East Asia/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:44:53,478 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:44:53,675 - INFO - Saved STIX file: Res

2024-12-03 06:44:59,407 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:44:59,409 - INFO - Use pytorch device_name: cuda
2024-12-03 06:44:59,410 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/17 [00:00<?, ?it/s]

2024-12-03 06:45:01,699 - BERTopic - Embedding - Completed ✓
2024-12-03 06:45:01,700 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:45:02,317 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:45:02,318 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:45:02,357 - BERTopic - Cluster - Completed ✓
2024-12-03 06:45:02,360 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:45:12,402 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/17 [00:00<?, ?it/s]

2024-12-03 06:45:13,002 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:45:13,005 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:45:13,005 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:45:13,018 - BERTopic - Cluster - Completed ✓


2024-12-03 06:45:13,019 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:45:13,020 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:45:13,245 - INFO - Step 5: Generating reports...
2024-12-03 06:45:13,245 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:45:13,259 - INFO - Saved STIX file: Results_Topic_Sentences/Group5_ Syria and the Iranian Connection - The Citizen Lab/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:45:13,261 - INFO - Saved STIX file: Results_Topic_Sentences/Group5_ Syria and the Iranian Connection - The Citizen Lab/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:45:13,261 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:45:13,384 - INFO - Saved STIX file: Results_Topic_Sentences/Group5_ Syria and the Iranian Connection - The Citizen Lab/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:45:13,385 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:45:13,398 - INFO - Saved STIX file: Re

2024-12-03 06:45:35,673 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:45:35,675 - INFO - Use pytorch device_name: cuda
2024-12-03 06:45:35,675 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:45:37,851 - BERTopic - Embedding - Completed ✓
2024-12-03 06:45:37,852 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:45:37,959 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:45:37,960 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:45:37,971 - BERTopic - Cluster - Completed ✓
2024-12-03 06:45:37,974 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:45:40,474 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:45:40,635 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:45:40,636 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:45:40,637 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:45:40,640 - BERTopic - Cluster - Completed ✓


2024-12-03 06:45:40,641 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:45:40,641 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:45:40,692 - INFO - Step 5: Generating reports...
2024-12-03 06:45:40,692 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:45:40,712 - INFO - Saved STIX file: Results_Topic_Sentences/hidden-cobra-targets-turkish-financial-sector-new-bankshot-implant/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:45:40,712 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:45:40,818 - INFO - Saved STIX file: Results_Topic_Sentences/hidden-cobra-targets-turkish-financial-sector-new-bankshot-implant/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:45:40,818 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:45:40,830 - INFO - Saved STIX file: Results_Topic_Sentences/hidden-cobra-targets-turkish-financial-sector-new-bankshot-implant/topic_-1/chunk_3/prediction_1.json
2024-12-03 06:45:40,831 - IN

2024-12-03 06:45:48,610 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:45:48,612 - INFO - Use pytorch device_name: cuda
2024-12-03 06:45:48,613 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/9 [00:00<?, ?it/s]

2024-12-03 06:45:50,932 - BERTopic - Embedding - Completed ✓
2024-12-03 06:45:50,933 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:45:51,215 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:45:51,215 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:45:51,238 - BERTopic - Cluster - Completed ✓
2024-12-03 06:45:51,241 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:45:56,522 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/9 [00:00<?, ?it/s]

2024-12-03 06:45:56,847 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:45:56,848 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:45:56,849 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:45:56,855 - BERTopic - Cluster - Completed ✓


2024-12-03 06:45:56,856 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:45:56,856 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:45:56,987 - INFO - Step 5: Generating reports...
2024-12-03 06:45:56,988 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:45:57,001 - INFO - Saved STIX file: Results_Topic_Sentences/Lazarus Group Recruitment_ Threat Hunters vs Head Hunters/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:45:57,008 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:45:57,124 - INFO - Saved STIX file: Results_Topic_Sentences/Lazarus Group Recruitment_ Threat Hunters vs Head Hunters/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:45:57,125 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:45:57,132 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:45:57,145 - INFO - Saved STIX file: Results_Topic_Sentences/Lazarus Group Recruitment_ Threat Hunters vs Head Hunters/topic_0/chunk_1/predic

2024-12-03 06:46:17,596 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:46:17,599 - INFO - Use pytorch device_name: cuda
2024-12-03 06:46:17,600 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 06:46:19,795 - BERTopic - Embedding - Completed ✓
2024-12-03 06:46:19,796 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:46:19,832 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:46:19,833 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:46:19,839 - BERTopic - Cluster - Completed ✓
2024-12-03 06:46:19,841 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:46:20,894 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 06:46:20,988 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:46:20,991 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:46:20,992 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:46:20,994 - BERTopic - Cluster - Completed ✓


2024-12-03 06:46:20,995 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:46:20,995 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:46:21,030 - INFO - Step 5: Generating reports...
2024-12-03 06:46:21,031 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:46:21,048 - INFO - Saved STIX file: Results_Topic_Sentences/down-the-h-w0rm-hole-with-houdinis-rat.html/topic_0/chunk_1/prediction_1.json
2024-12-03 06:46:21,049 - INFO - Processing chunk 2/3 of topic 0...
2024-12-03 06:46:21,064 - INFO - Saved STIX file: Results_Topic_Sentences/down-the-h-w0rm-hole-with-houdinis-rat.html/topic_0/chunk_2/prediction_1.json
2024-12-03 06:46:21,066 - INFO - Saved STIX file: Results_Topic_Sentences/down-the-h-w0rm-hole-with-houdinis-rat.html/topic_0/chunk_2/prediction_2.json
2024-12-03 06:46:21,067 - INFO - Processing chunk 3/3 of topic 0...
2024-12-03 06:46:21,082 - INFO - Saved STIX file: Results_Topic_Sentences/down-the-h-w0rm-hole-with-hou

2024-12-03 06:46:24,799 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:46:24,801 - INFO - Use pytorch device_name: cuda
2024-12-03 06:46:24,801 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:46:27,047 - BERTopic - Embedding - Completed ✓
2024-12-03 06:46:27,047 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:46:27,144 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:46:27,145 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:46:27,157 - BERTopic - Cluster - Completed ✓
2024-12-03 06:46:27,159 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:46:30,177 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:46:30,391 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:46:30,393 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:46:30,393 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:46:30,397 - BERTopic - Cluster - Completed ✓


2024-12-03 06:46:30,399 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:46:30,400 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:46:30,501 - INFO - Step 5: Generating reports...
2024-12-03 06:46:30,502 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:46:30,517 - INFO - Saved STIX file: Results_Topic_Sentences/tropic-trooper-targets-taiwanese/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:46:30,518 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:46:30,535 - INFO - Saved STIX file: Results_Topic_Sentences/tropic-trooper-targets-taiwanese/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:46:30,536 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:46:30,742 - INFO - Saved STIX file: Results_Topic_Sentences/tropic-trooper-targets-taiwanese/topic_-1/chunk_3/prediction_1.json
2024-12-03 06:46:30,746 - INFO - Saved STIX file: Results_Topic_Sentences/tropic-trooper-targets-taiwanese/topic_-1/chunk_3/predic

2024-12-03 06:46:38,449 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:46:38,452 - INFO - Use pytorch device_name: cuda
2024-12-03 06:46:38,456 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/7 [00:00<?, ?it/s]

2024-12-03 06:46:40,676 - BERTopic - Embedding - Completed ✓
2024-12-03 06:46:40,677 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:46:40,885 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:46:40,886 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:46:40,905 - BERTopic - Cluster - Completed ✓
2024-12-03 06:46:40,908 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:46:45,013 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/7 [00:00<?, ?it/s]

2024-12-03 06:46:45,246 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:46:45,248 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:46:45,248 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:46:45,253 - BERTopic - Cluster - Completed ✓


2024-12-03 06:46:45,253 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:46:45,254 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:46:45,325 - INFO - Step 5: Generating reports...
2024-12-03 06:46:45,326 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:46:45,345 - INFO - Saved STIX file: Results_Topic_Sentences/20230620_threat_inteligence_report_apt37_macos/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:46:45,346 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:46:45,351 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:46:45,459 - INFO - Saved STIX file: Results_Topic_Sentences/20230620_threat_inteligence_report_apt37_macos/topic_-1/chunk_3/prediction_1.json
2024-12-03 06:46:45,460 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:46:45,472 - INFO - Saved STIX file: Results_Topic_Sentences/20230620_threat_inteligence_report_apt37_macos/topic_0/chunk_1/prediction_1.json
2024-12-03 06:46:45,4

2024-12-03 06:47:05,079 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:47:05,082 - INFO - Use pytorch device_name: cuda
2024-12-03 06:47:05,083 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

2024-12-03 06:47:07,328 - BERTopic - Embedding - Completed ✓
2024-12-03 06:47:07,330 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:47:07,481 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:47:07,482 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:47:07,497 - BERTopic - Cluster - Completed ✓
2024-12-03 06:47:07,500 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:47:10,816 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

2024-12-03 06:47:11,006 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:47:11,008 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:47:11,009 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:47:11,014 - BERTopic - Cluster - Completed ✓


2024-12-03 06:47:11,015 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:47:11,015 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:47:11,083 - INFO - Step 5: Generating reports...
2024-12-03 06:47:11,084 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:47:11,098 - INFO - Saved STIX file: Results_Topic_Sentences/MDM_India/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:47:11,098 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:47:11,209 - INFO - Saved STIX file: Results_Topic_Sentences/MDM_India/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:47:11,210 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:47:11,557 - INFO - Saved STIX file: Results_Topic_Sentences/MDM_India/topic_-1/chunk_3/prediction_1.json
2024-12-03 06:47:11,560 - INFO - Saved STIX file: Results_Topic_Sentences/MDM_India/topic_-1/chunk_3/prediction_2.json
2024-12-03 06:47:11,562 - INFO - Saved STIX file: Results_Topic_Sentences/MDM_In

2024-12-03 06:47:24,629 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:47:24,632 - INFO - Use pytorch device_name: cuda
2024-12-03 06:47:24,632 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/27 [00:00<?, ?it/s]

2024-12-03 06:47:27,232 - BERTopic - Embedding - Completed ✓
2024-12-03 06:47:27,233 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:47:28,629 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:47:28,630 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:47:28,698 - BERTopic - Cluster - Completed ✓
2024-12-03 06:47:28,702 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:47:41,835 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/27 [00:00<?, ?it/s]

2024-12-03 06:47:42,808 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:47:42,811 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:47:42,812 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:47:42,830 - BERTopic - Cluster - Completed ✓


2024-12-03 06:47:42,831 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:47:42,833 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:47:43,270 - INFO - Step 5: Generating reports...
2024-12-03 06:47:43,272 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:47:43,286 - INFO - Saved STIX file: Results_Topic_Sentences/UAC0006_FC.pdf/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:47:43,287 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:47:43,301 - INFO - Saved STIX file: Results_Topic_Sentences/UAC0006_FC.pdf/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:47:43,302 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:47:43,411 - INFO - Saved STIX file: Results_Topic_Sentences/UAC0006_FC.pdf/topic_-1/chunk_3/prediction_1.json
2024-12-03 06:47:43,412 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:47:43,426 - INFO - Saved STIX file: Results_Topic_Sentences/UAC0006_FC.pdf/topic_0/chunk_1/prediction_

2024-12-03 06:48:31,555 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:48:31,558 - INFO - Use pytorch device_name: cuda
2024-12-03 06:48:31,559 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/26 [00:00<?, ?it/s]

2024-12-03 06:48:33,922 - BERTopic - Embedding - Completed ✓
2024-12-03 06:48:33,923 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:48:35,377 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:48:35,378 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:48:35,434 - BERTopic - Cluster - Completed ✓
2024-12-03 06:48:35,437 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:48:46,455 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/26 [00:00<?, ?it/s]

2024-12-03 06:48:47,168 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:48:47,171 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:48:47,171 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:48:47,194 - BERTopic - Cluster - Completed ✓


2024-12-03 06:48:47,195 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:48:47,196 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:48:47,429 - INFO - Step 5: Generating reports...
2024-12-03 06:48:47,430 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:48:47,538 - INFO - Saved STIX file: Results_Topic_Sentences/ClearSky-Fox-Kitten-Campaign-v1/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:48:47,541 - INFO - Saved STIX file: Results_Topic_Sentences/ClearSky-Fox-Kitten-Campaign-v1/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:48:47,542 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:48:47,649 - INFO - Saved STIX file: Results_Topic_Sentences/ClearSky-Fox-Kitten-Campaign-v1/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:48:47,652 - INFO - Saved STIX file: Results_Topic_Sentences/ClearSky-Fox-Kitten-Campaign-v1/topic_-1/chunk_2/prediction_2.json
2024-12-03 06:48:47,653 - INFO - Processing chunk 3/3 of topi

2024-12-03 06:49:32,696 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:49:32,699 - INFO - Use pytorch device_name: cuda
2024-12-03 06:49:32,699 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2024-12-03 06:49:34,878 - BERTopic - Embedding - Completed ✓
2024-12-03 06:49:34,879 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:49:34,940 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:49:34,940 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:49:34,948 - BERTopic - Cluster - Completed ✓
2024-12-03 06:49:34,951 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:49:36,784 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2024-12-03 06:49:36,877 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:49:36,878 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:49:36,879 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:49:36,881 - BERTopic - Cluster - Completed ✓


2024-12-03 06:49:36,881 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:49:36,882 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:49:36,912 - INFO - Step 5: Generating reports...
2024-12-03 06:49:36,912 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:49:37,103 - INFO - Saved STIX file: Results_Topic_Sentences/PowerShell-based Backdoor Found in Turkey Strikingly Similar to MuddyWater Tools/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:49:37,105 - INFO - Saved STIX file: Results_Topic_Sentences/PowerShell-based Backdoor Found in Turkey Strikingly Similar to MuddyWater Tools/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:49:37,105 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:49:37,119 - INFO - Saved STIX file: Results_Topic_Sentences/PowerShell-based Backdoor Found in Turkey Strikingly Similar to MuddyWater Tools/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:49:37,121 - INFO - Saved STIX file: Resul

2024-12-03 06:49:42,847 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:49:42,850 - INFO - Use pytorch device_name: cuda
2024-12-03 06:49:42,850 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/11 [00:00<?, ?it/s]

2024-12-03 06:49:45,094 - BERTopic - Embedding - Completed ✓
2024-12-03 06:49:45,094 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:49:45,431 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:49:45,432 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:49:45,459 - BERTopic - Cluster - Completed ✓
2024-12-03 06:49:45,462 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:49:51,590 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/11 [00:00<?, ?it/s]

2024-12-03 06:49:51,998 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:49:52,000 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:49:52,000 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:49:52,007 - BERTopic - Cluster - Completed ✓


2024-12-03 06:49:52,008 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:49:52,009 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:49:52,173 - INFO - Step 5: Generating reports...
2024-12-03 06:49:52,173 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:49:52,186 - INFO - Saved STIX file: Results_Topic_Sentences/CERTFR-2021-CTI-009/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:49:52,187 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:49:52,298 - INFO - Saved STIX file: Results_Topic_Sentences/CERTFR-2021-CTI-009/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:49:52,299 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:49:52,345 - INFO - Saved STIX file: Results_Topic_Sentences/CERTFR-2021-CTI-009/topic_-1/chunk_3/prediction_1.json
2024-12-03 06:49:52,345 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:49:52,503 - INFO - Saved STIX file: Results_Topic_Sentences/CERTFR-2021-CTI-009/topic_0

2024-12-03 06:50:19,775 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:50:19,777 - INFO - Use pytorch device_name: cuda
2024-12-03 06:50:19,778 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2024-12-03 06:50:22,607 - BERTopic - Embedding - Completed ✓
2024-12-03 06:50:22,607 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:50:22,683 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:50:22,683 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:50:22,693 - BERTopic - Cluster - Completed ✓
2024-12-03 06:50:22,696 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:50:24,283 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2024-12-03 06:50:24,397 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:50:24,402 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:50:24,403 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:50:24,405 - BERTopic - Cluster - Completed ✓


2024-12-03 06:50:24,406 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:50:24,406 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:50:24,443 - INFO - Step 5: Generating reports...
2024-12-03 06:50:24,444 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:50:24,539 - INFO - Saved STIX file: Results_Topic_Sentences/The Naikon APT - Securelist/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:50:24,539 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:50:24,551 - INFO - Saved STIX file: Results_Topic_Sentences/The Naikon APT - Securelist/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:50:24,552 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:50:24,558 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:50:24,646 - INFO - Saved STIX file: Results_Topic_Sentences/The Naikon APT - Securelist/topic_0/chunk_1/prediction_1.json
2024-12-03 06:50:24,648 - INFO - Saved STIX file: Results_Topic_Sentences/The 

2024-12-03 06:50:29,580 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:50:29,582 - INFO - Use pytorch device_name: cuda
2024-12-03 06:50:29,583 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 06:50:31,744 - BERTopic - Embedding - Completed ✓
2024-12-03 06:50:31,745 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:50:31,795 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:50:31,796 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:50:31,804 - BERTopic - Cluster - Completed ✓
2024-12-03 06:50:31,806 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:50:33,094 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 06:50:33,188 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:50:33,191 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:50:33,192 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:50:33,195 - BERTopic - Cluster - Completed ✓


2024-12-03 06:50:33,195 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:50:33,196 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:50:33,231 - INFO - Step 5: Generating reports...
2024-12-03 06:50:33,232 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:50:33,248 - INFO - Saved STIX file: Results_Topic_Sentences/FTA 1010 - njRAT The Saga Continues/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:50:33,249 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:50:33,348 - INFO - Saved STIX file: Results_Topic_Sentences/FTA 1010 - njRAT The Saga Continues/topic_0/chunk_1/prediction_1.json
2024-12-03 06:50:33,348 - INFO - Processing chunk 2/3 of topic 0...
2024-12-03 06:50:33,617 - INFO - Saved STIX file: Results_Topic_Sentences/FTA 1010 - njRAT The Saga Continues/topic_0/chunk_2/prediction_1.json
2024-12-03 06:50:33,622 - INFO - Saved STIX file: Results_Topic_Sentences/FTA 1010 - njRAT The Saga Continues/topic_0/chunk_2

2024-12-03 06:50:37,711 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:50:37,714 - INFO - Use pytorch device_name: cuda
2024-12-03 06:50:37,714 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 06:50:39,955 - BERTopic - Embedding - Completed ✓
2024-12-03 06:50:39,956 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:50:39,998 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:50:40,000 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:50:40,007 - BERTopic - Cluster - Completed ✓
2024-12-03 06:50:40,009 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:50:41,149 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 06:50:41,240 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:50:41,241 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:50:41,242 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:50:41,244 - BERTopic - Cluster - Completed ✓


2024-12-03 06:50:41,244 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:50:41,248 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:50:41,287 - INFO - Step 5: Generating reports...
2024-12-03 06:50:41,289 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:50:41,386 - INFO - Saved STIX file: Results_Topic_Sentences/Down the H-W0rm Hole with Houdini's RAT - Threat Geek/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:50:41,387 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:50:41,566 - INFO - Saved STIX file: Results_Topic_Sentences/Down the H-W0rm Hole with Houdini's RAT - Threat Geek/topic_0/chunk_1/prediction_1.json
2024-12-03 06:50:41,567 - INFO - Processing chunk 2/3 of topic 0...
2024-12-03 06:50:41,615 - INFO - Saved STIX file: Results_Topic_Sentences/Down the H-W0rm Hole with Houdini's RAT - Threat Geek/topic_0/chunk_2/prediction_1.json
2024-12-03 06:50:41,617 - INFO - Processing chunk 3/3 of topic 0...
202

2024-12-03 06:50:44,794 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:50:44,797 - INFO - Use pytorch device_name: cuda
2024-12-03 06:50:44,798 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

2024-12-03 06:50:47,143 - BERTopic - Embedding - Completed ✓
2024-12-03 06:50:47,143 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:50:47,251 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:50:47,252 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:50:47,264 - BERTopic - Cluster - Completed ✓
2024-12-03 06:50:47,266 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:50:49,729 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

2024-12-03 06:50:49,891 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:50:49,892 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:50:49,892 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:50:49,896 - BERTopic - Cluster - Completed ✓


2024-12-03 06:50:49,896 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:50:49,897 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:50:49,957 - INFO - Step 5: Generating reports...
2024-12-03 06:50:49,958 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:50:49,979 - INFO - Saved STIX file: Results_Topic_Sentences/Operation_Honeybee/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:50:49,979 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:50:50,053 - INFO - Saved STIX file: Results_Topic_Sentences/Operation_Honeybee/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:50:50,056 - INFO - Saved STIX file: Results_Topic_Sentences/Operation_Honeybee/topic_-1/chunk_2/prediction_2.json
2024-12-03 06:50:50,058 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:50:50,086 - INFO - Saved STIX file: Results_Topic_Sentences/Operation_Honeybee/topic_-1/chunk_3/prediction_1.json
2024-12-03 06:50:50,088 - INFO - Processing 

2024-12-03 06:50:58,027 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:50:58,029 - INFO - Use pytorch device_name: cuda
2024-12-03 06:50:58,030 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2024-12-03 06:51:00,228 - BERTopic - Embedding - Completed ✓
2024-12-03 06:51:00,228 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:51:00,280 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:51:00,281 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:51:00,288 - BERTopic - Cluster - Completed ✓
2024-12-03 06:51:00,290 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:51:01,537 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2024-12-03 06:51:01,640 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:51:01,642 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:51:01,642 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:51:01,644 - BERTopic - Cluster - Completed ✓


2024-12-03 06:51:01,645 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:51:01,645 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:51:01,676 - INFO - Step 5: Generating reports...
2024-12-03 06:51:01,678 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:51:01,692 - INFO - Saved STIX file: Results_Topic_Sentences/ScarCruft continues to evolve, introduces Bluetooth harvester _ Securelist/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:51:01,696 - INFO - Saved STIX file: Results_Topic_Sentences/ScarCruft continues to evolve, introduces Bluetooth harvester _ Securelist/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:51:01,697 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:51:01,703 - INFO - Processing chunk 2/3 of topic 0...
2024-12-03 06:51:01,723 - INFO - Saved STIX file: Results_Topic_Sentences/ScarCruft continues to evolve, introduces Bluetooth harvester _ Securelist/topic_0/chunk_2/prediction_1.json
2024-12

2024-12-03 06:51:07,483 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:51:07,486 - INFO - Use pytorch device_name: cuda
2024-12-03 06:51:07,487 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/22 [00:00<?, ?it/s]

2024-12-03 06:51:09,884 - BERTopic - Embedding - Completed ✓
2024-12-03 06:51:09,885 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:51:10,748 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:51:10,749 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:51:10,797 - BERTopic - Cluster - Completed ✓
2024-12-03 06:51:10,800 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:51:22,859 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/22 [00:00<?, ?it/s]

2024-12-03 06:51:23,579 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:51:23,581 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:51:23,581 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:51:23,595 - BERTopic - Cluster - Completed ✓


2024-12-03 06:51:23,596 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:51:23,597 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:51:23,876 - INFO - Step 5: Generating reports...
2024-12-03 06:51:23,877 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:51:23,893 - INFO - Saved STIX file: Results_Topic_Sentences/MacMalware_2022/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:51:23,894 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:51:24,151 - INFO - Saved STIX file: Results_Topic_Sentences/MacMalware_2022/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:51:24,152 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:51:24,159 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:51:24,171 - INFO - Saved STIX file: Results_Topic_Sentences/MacMalware_2022/topic_0/chunk_1/prediction_1.json
2024-12-03 06:51:24,172 - INFO - Processing chunk 2/3 of topic 0...
2024-12-03 06:51:24,184 - INFO - Saved STIX fi

2024-12-03 06:51:59,697 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:51:59,699 - INFO - Use pytorch device_name: cuda
2024-12-03 06:51:59,700 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 06:52:01,907 - BERTopic - Embedding - Completed ✓
2024-12-03 06:52:01,908 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:52:01,958 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:52:01,958 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:52:01,965 - BERTopic - Cluster - Completed ✓
2024-12-03 06:52:01,967 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:52:03,483 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 06:52:03,561 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:52:03,562 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:52:03,562 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:52:03,564 - BERTopic - Cluster - Completed ✓


2024-12-03 06:52:03,564 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:52:03,565 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:52:03,588 - INFO - Step 5: Generating reports...
2024-12-03 06:52:03,589 - INFO - Processing chunk 1/2 of topic -1...
2024-12-03 06:52:03,602 - INFO - Saved STIX file: Results_Topic_Sentences/C2_Communication_of_ThreatNeedle/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:52:03,603 - INFO - Processing chunk 2/2 of topic -1...
2024-12-03 06:52:03,615 - INFO - Saved STIX file: Results_Topic_Sentences/C2_Communication_of_ThreatNeedle/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:52:03,616 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:52:03,695 - INFO - Saved STIX file: Results_Topic_Sentences/C2_Communication_of_ThreatNeedle/topic_0/chunk_1/prediction_1.json
2024-12-03 06:52:03,697 - INFO - Saved STIX file: Results_Topic_Sentences/C2_Communication_of_ThreatNeedle/topic_0/chunk_1/predictio

2024-12-03 06:52:09,613 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:52:09,616 - INFO - Use pytorch device_name: cuda
2024-12-03 06:52:09,617 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/19 [00:00<?, ?it/s]

2024-12-03 06:52:11,909 - BERTopic - Embedding - Completed ✓
2024-12-03 06:52:11,910 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:52:12,620 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:52:12,621 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:52:12,669 - BERTopic - Cluster - Completed ✓
2024-12-03 06:52:12,672 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:52:23,777 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/19 [00:00<?, ?it/s]

2024-12-03 06:52:24,433 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:52:24,435 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:52:24,437 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:52:24,453 - BERTopic - Cluster - Completed ✓


2024-12-03 06:52:24,454 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:52:24,455 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:52:24,734 - INFO - Step 5: Generating reports...
2024-12-03 06:52:24,735 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:52:24,755 - INFO - Saved STIX file: Results_Topic_Sentences/packrat-report/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:52:24,757 - INFO - Saved STIX file: Results_Topic_Sentences/packrat-report/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:52:24,758 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:52:24,772 - INFO - Saved STIX file: Results_Topic_Sentences/packrat-report/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:52:24,772 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:52:24,917 - INFO - Saved STIX file: Results_Topic_Sentences/packrat-report/topic_-1/chunk_3/prediction_1.json
2024-12-03 06:52:24,920 - INFO - Saved STIX file: Results_To

2024-12-03 06:52:57,343 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:52:57,345 - INFO - Use pytorch device_name: cuda
2024-12-03 06:52:57,345 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/11 [00:00<?, ?it/s]

2024-12-03 06:52:59,605 - BERTopic - Embedding - Completed ✓
2024-12-03 06:52:59,605 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:52:59,919 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:52:59,919 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:52:59,946 - BERTopic - Cluster - Completed ✓
2024-12-03 06:52:59,948 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:53:05,882 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/11 [00:00<?, ?it/s]

2024-12-03 06:53:06,238 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:53:06,244 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:53:06,245 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:53:06,260 - BERTopic - Cluster - Completed ✓


2024-12-03 06:53:06,260 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:53:06,261 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:53:06,436 - INFO - Step 5: Generating reports...
2024-12-03 06:53:06,437 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:53:06,457 - INFO - Saved STIX file: Results_Topic_Sentences/eset-sednit-part3/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:53:06,458 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:53:06,477 - INFO - Saved STIX file: Results_Topic_Sentences/eset-sednit-part3/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:53:06,478 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:53:06,493 - INFO - Saved STIX file: Results_Topic_Sentences/eset-sednit-part3/topic_-1/chunk_3/prediction_1.json
2024-12-03 06:53:06,494 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:53:06,658 - INFO - Saved STIX file: Results_Topic_Sentences/eset-sednit-part3/topic_0/chunk_1

2024-12-03 06:53:28,005 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:53:28,007 - INFO - Use pytorch device_name: cuda
2024-12-03 06:53:28,007 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-12-03 06:53:30,136 - BERTopic - Embedding - Completed ✓
2024-12-03 06:53:30,137 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:53:30,162 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:53:30,163 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:53:30,167 - BERTopic - Cluster - Completed ✓
2024-12-03 06:53:30,169 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:53:30,615 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-12-03 06:53:30,654 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:53:30,655 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:53:30,655 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:53:30,656 - BERTopic - Cluster - Completed ✓


2024-12-03 06:53:30,657 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:53:30,657 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:53:30,670 - INFO - Step 5: Generating reports...
2024-12-03 06:53:30,671 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:53:30,852 - INFO - Saved STIX file: Results_Topic_Sentences/Targeted Attack on Indian Ministry of External Affairs using Crimson RAT/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:53:30,854 - INFO - Saved STIX file: Results_Topic_Sentences/Targeted Attack on Indian Ministry of External Affairs using Crimson RAT/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:53:30,856 - INFO - Saved STIX file: Results_Topic_Sentences/Targeted Attack on Indian Ministry of External Affairs using Crimson RAT/topic_-1/chunk_1/prediction_3.json
2024-12-03 06:53:30,857 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:53:30,869 - INFO - Saved STIX file: Results_Topic_Sentences/Target

2024-12-03 06:53:32,099 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:53:32,101 - INFO - Use pytorch device_name: cuda
2024-12-03 06:53:32,101 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:53:34,308 - BERTopic - Embedding - Completed ✓
2024-12-03 06:53:34,309 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:53:34,404 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:53:34,404 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:53:34,416 - BERTopic - Cluster - Completed ✓
2024-12-03 06:53:34,418 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:53:36,619 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:53:36,766 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:53:36,767 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:53:36,768 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:53:36,771 - BERTopic - Cluster - Completed ✓


2024-12-03 06:53:36,771 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:53:36,772 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:53:36,838 - INFO - Step 5: Generating reports...
2024-12-03 06:53:36,839 - INFO - Processing chunk 1/2 of topic -1...
2024-12-03 06:53:36,853 - INFO - Saved STIX file: Results_Topic_Sentences/North Korea's Lazarus APT leverages Windows Update client, GitHub in latest campaign _ Malwarebytes Labs/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:53:36,854 - INFO - Saved STIX file: Results_Topic_Sentences/North Korea's Lazarus APT leverages Windows Update client, GitHub in latest campaign _ Malwarebytes Labs/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:53:36,856 - INFO - Saved STIX file: Results_Topic_Sentences/North Korea's Lazarus APT leverages Windows Update client, GitHub in latest campaign _ Malwarebytes Labs/topic_-1/chunk_1/prediction_3.json
2024-12-03 06:53:36,856 - INFO - Processing chunk 2/

2024-12-03 06:53:44,111 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:53:44,113 - INFO - Use pytorch device_name: cuda
2024-12-03 06:53:44,114 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 06:53:46,670 - BERTopic - Embedding - Completed ✓
2024-12-03 06:53:46,670 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:53:46,723 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:53:46,723 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:53:46,731 - BERTopic - Cluster - Completed ✓
2024-12-03 06:53:46,733 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:53:48,110 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 06:53:48,188 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:53:48,189 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:53:48,189 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:53:48,191 - BERTopic - Cluster - Completed ✓


2024-12-03 06:53:48,192 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:53:48,193 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:53:48,220 - INFO - Step 5: Generating reports...
2024-12-03 06:53:48,221 - INFO - Processing chunk 1/1 of topic -1...
2024-12-03 06:53:48,227 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:53:48,273 - INFO - Saved STIX file: Results_Topic_Sentences/APT40 Examining a China-Nexus Espionage Actor  APT40 Examining a China-Nexus Espionage Actor/topic_0/chunk_1/prediction_1.json
2024-12-03 06:53:48,274 - INFO - Processing chunk 2/3 of topic 0...
2024-12-03 06:53:48,286 - INFO - Saved STIX file: Results_Topic_Sentences/APT40 Examining a China-Nexus Espionage Actor  APT40 Examining a China-Nexus Espionage Actor/topic_0/chunk_2/prediction_1.json
2024-12-03 06:53:48,286 - INFO - Processing chunk 3/3 of topic 0...
2024-12-03 06:53:48,332 - INFO - Saved STIX file: Results_Topic_Sentences/APT40 Examinin

2024-12-03 06:53:51,321 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:53:51,323 - INFO - Use pytorch device_name: cuda
2024-12-03 06:53:51,324 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 06:53:53,873 - BERTopic - Embedding - Completed ✓
2024-12-03 06:53:53,874 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:53:53,933 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:53:53,934 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:53:53,943 - BERTopic - Cluster - Completed ✓
2024-12-03 06:53:53,945 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:53:55,361 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 06:53:55,435 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:53:55,436 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:53:55,436 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:53:55,438 - BERTopic - Cluster - Completed ✓


2024-12-03 06:53:55,438 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:53:55,439 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:53:55,466 - INFO - Step 5: Generating reports...
2024-12-03 06:53:55,467 - INFO - Processing chunk 1/2 of topic -1...
2024-12-03 06:53:55,484 - INFO - Saved STIX file: Results_Topic_Sentences/spear_phishing_techn.html/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:53:55,485 - INFO - Processing chunk 2/2 of topic -1...
2024-12-03 06:53:55,500 - INFO - Saved STIX file: Results_Topic_Sentences/spear_phishing_techn.html/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:53:55,502 - INFO - Saved STIX file: Results_Topic_Sentences/spear_phishing_techn.html/topic_-1/chunk_2/prediction_2.json
2024-12-03 06:53:55,503 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:53:55,522 - INFO - Saved STIX file: Results_Topic_Sentences/spear_phishing_techn.html/topic_0/chunk_1/prediction_1.json
2024-12-03 06:53:5

2024-12-03 06:53:59,925 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:53:59,928 - INFO - Use pytorch device_name: cuda
2024-12-03 06:53:59,929 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/13 [00:00<?, ?it/s]

2024-12-03 06:54:02,263 - BERTopic - Embedding - Completed ✓
2024-12-03 06:54:02,263 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:54:02,663 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:54:02,663 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:54:02,692 - BERTopic - Cluster - Completed ✓
2024-12-03 06:54:02,695 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:54:09,940 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/13 [00:00<?, ?it/s]

2024-12-03 06:54:10,361 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:54:10,363 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:54:10,363 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:54:10,372 - BERTopic - Cluster - Completed ✓


2024-12-03 06:54:10,373 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:54:10,373 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:54:10,535 - INFO - Step 5: Generating reports...
2024-12-03 06:54:10,536 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:54:10,549 - INFO - Saved STIX file: Results_Topic_Sentences/Yoroi_Cybersecurity_Annual_-Security_Report_2020-ENGLISH_rMT-FINAL-1s/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:54:10,549 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:54:10,555 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:54:10,617 - INFO - Saved STIX file: Results_Topic_Sentences/Yoroi_Cybersecurity_Annual_-Security_Report_2020-ENGLISH_rMT-FINAL-1s/topic_-1/chunk_3/prediction_1.json
2024-12-03 06:54:10,619 - INFO - Saved STIX file: Results_Topic_Sentences/Yoroi_Cybersecurity_Annual_-Security_Report_2020-ENGLISH_rMT-FINAL-1s/topic_-1/chunk_3/prediction_2.json
2024-12-03 06:54:10

2024-12-03 06:54:31,190 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:54:31,192 - INFO - Use pytorch device_name: cuda
2024-12-03 06:54:31,192 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 06:54:33,431 - BERTopic - Embedding - Completed ✓
2024-12-03 06:54:33,432 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:54:33,480 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:54:33,481 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:54:33,489 - BERTopic - Cluster - Completed ✓
2024-12-03 06:54:33,491 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:54:34,821 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 06:54:34,915 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:54:34,917 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:54:34,917 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:54:34,920 - BERTopic - Cluster - Completed ✓


2024-12-03 06:54:34,921 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:54:34,921 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:54:34,961 - INFO - Step 5: Generating reports...
2024-12-03 06:54:34,962 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:54:34,968 - INFO - Processing chunk 2/3 of topic 0...
2024-12-03 06:54:35,033 - INFO - Saved STIX file: Results_Topic_Sentences/Global_APT_Research_Report_for_the_first_half_of_2021-360/topic_0/chunk_2/prediction_1.json
2024-12-03 06:54:35,034 - INFO - Processing chunk 3/3 of topic 0...
2024-12-03 06:54:35,052 - INFO - Saved STIX file: Results_Topic_Sentences/Global_APT_Research_Report_for_the_first_half_of_2021-360/topic_0/chunk_3/prediction_1.json
2024-12-03 06:54:35,053 - INFO - Processing chunk 1/3 of topic 1...
2024-12-03 06:54:35,237 - INFO - Saved STIX file: Results_Topic_Sentences/Global_APT_Research_Report_for_the_first_half_of_2021-360/topic_1/chunk_1/prediction_

2024-12-03 06:54:41,514 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:54:41,516 - INFO - Use pytorch device_name: cuda
2024-12-03 06:54:41,516 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:54:43,846 - BERTopic - Embedding - Completed ✓
2024-12-03 06:54:43,849 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:54:43,936 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:54:43,936 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:54:43,947 - BERTopic - Cluster - Completed ✓
2024-12-03 06:54:43,950 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:54:46,609 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:54:46,782 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:54:46,784 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:54:46,784 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:54:46,787 - BERTopic - Cluster - Completed ✓


2024-12-03 06:54:46,788 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:54:46,788 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:54:46,883 - INFO - Step 5: Generating reports...
2024-12-03 06:54:46,884 - INFO - Processing chunk 1/2 of topic -1...
2024-12-03 06:54:46,890 - INFO - Processing chunk 2/2 of topic -1...
2024-12-03 06:54:47,166 - INFO - Saved STIX file: Results_Topic_Sentences/CSA_TTPs-of-Indicted-APT40-Actors-Associated-with-China-MSS-Hainan-State-Security-Department/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:54:47,170 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:54:47,276 - INFO - Saved STIX file: Results_Topic_Sentences/CSA_TTPs-of-Indicted-APT40-Actors-Associated-with-China-MSS-Hainan-State-Security-Department/topic_0/chunk_1/prediction_1.json
2024-12-03 06:54:47,277 - INFO - Processing chunk 2/3 of topic 0...
2024-12-03 06:54:47,461 - INFO - Saved STIX file: Results_Topic_Sentences/CSA_TTPs-of-

2024-12-03 06:54:56,665 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:54:56,667 - INFO - Use pytorch device_name: cuda
2024-12-03 06:54:56,668 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/7 [00:00<?, ?it/s]

2024-12-03 06:54:58,903 - BERTopic - Embedding - Completed ✓
2024-12-03 06:54:58,903 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:54:59,084 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:54:59,085 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:54:59,103 - BERTopic - Cluster - Completed ✓
2024-12-03 06:54:59,105 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:55:03,355 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/7 [00:00<?, ?it/s]

2024-12-03 06:55:03,612 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:55:03,613 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:55:03,614 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:55:03,619 - BERTopic - Cluster - Completed ✓


2024-12-03 06:55:03,620 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:55:03,620 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:55:03,728 - INFO - Step 5: Generating reports...
2024-12-03 06:55:03,729 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:55:03,911 - INFO - Saved STIX file: Results_Topic_Sentences/The zero-day exploits of Operation WizardOpium _ Securelist/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:55:03,913 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:55:03,942 - INFO - Saved STIX file: Results_Topic_Sentences/The zero-day exploits of Operation WizardOpium _ Securelist/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:55:03,945 - INFO - Saved STIX file: Results_Topic_Sentences/The zero-day exploits of Operation WizardOpium _ Securelist/topic_-1/chunk_2/prediction_2.json
2024-12-03 06:55:03,946 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:55:04,045 - INFO - Saved STIX file:

2024-12-03 06:55:19,030 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:55:19,032 - INFO - Use pytorch device_name: cuda
2024-12-03 06:55:19,032 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/44 [00:00<?, ?it/s]

2024-12-03 06:55:21,571 - BERTopic - Embedding - Completed ✓
2024-12-03 06:55:21,572 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:55:24,191 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:55:24,192 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:55:24,305 - BERTopic - Cluster - Completed ✓
2024-12-03 06:55:24,309 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:55:50,613 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/44 [00:00<?, ?it/s]

2024-12-03 06:55:52,154 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:55:52,159 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:55:52,159 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:55:52,189 - BERTopic - Cluster - Completed ✓


2024-12-03 06:55:52,190 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:55:52,191 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:55:52,759 - INFO - Step 5: Generating reports...
2024-12-03 06:55:52,760 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:55:52,846 - INFO - Saved STIX file: Results_Topic_Sentences/FINDING BEACONS IN THE DARK 1650728751599/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:55:52,847 - INFO - Saved STIX file: Results_Topic_Sentences/FINDING BEACONS IN THE DARK 1650728751599/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:55:52,848 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:55:53,002 - INFO - Saved STIX file: Results_Topic_Sentences/FINDING BEACONS IN THE DARK 1650728751599/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:55:53,003 - INFO - Saved STIX file: Results_Topic_Sentences/FINDING BEACONS IN THE DARK 1650728751599/topic_-1/chunk_2/prediction_2.json
2024-12-03 06:55:53,0

2024-12-03 06:57:18,207 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:57:18,209 - INFO - Use pytorch device_name: cuda
2024-12-03 06:57:18,214 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/31 [00:00<?, ?it/s]

2024-12-03 06:57:20,598 - BERTopic - Embedding - Completed ✓
2024-12-03 06:57:20,599 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:57:22,073 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:57:22,074 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:57:22,151 - BERTopic - Cluster - Completed ✓
2024-12-03 06:57:22,154 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:57:39,346 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/31 [00:00<?, ?it/s]

2024-12-03 06:57:40,344 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:57:40,347 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:57:40,348 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:57:40,376 - BERTopic - Cluster - Completed ✓


2024-12-03 06:57:40,377 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:57:40,379 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:57:40,697 - INFO - Step 5: Generating reports...
2024-12-03 06:57:40,698 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:57:40,819 - INFO - Saved STIX file: Results_Topic_Sentences/001/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:57:40,824 - INFO - Saved STIX file: Results_Topic_Sentences/001/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:57:40,827 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:57:40,900 - INFO - Saved STIX file: Results_Topic_Sentences/001/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:57:40,902 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:57:40,924 - INFO - Saved STIX file: Results_Topic_Sentences/001/topic_-1/chunk_3/prediction_1.json
2024-12-03 06:57:40,926 - INFO - Saved STIX file: Results_Topic_Sentences/001/topic_-1/chunk_3/predictio

2024-12-03 06:58:42,769 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:58:42,771 - INFO - Use pytorch device_name: cuda
2024-12-03 06:58:42,771 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:58:45,162 - BERTopic - Embedding - Completed ✓
2024-12-03 06:58:45,163 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:58:45,250 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:58:45,251 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:58:45,262 - BERTopic - Cluster - Completed ✓
2024-12-03 06:58:45,265 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:58:47,624 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:58:47,779 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:58:47,781 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:58:47,781 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:58:47,785 - BERTopic - Cluster - Completed ✓


2024-12-03 06:58:47,785 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:58:47,786 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:58:47,839 - INFO - Step 5: Generating reports...
2024-12-03 06:58:47,840 - INFO - Processing chunk 1/2 of topic -1...
2024-12-03 06:58:47,945 - INFO - Saved STIX file: Results_Topic_Sentences/targeted-attack-in-middle-east-by-apt34.html/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:58:47,947 - INFO - Saved STIX file: Results_Topic_Sentences/targeted-attack-in-middle-east-by-apt34.html/topic_-1/chunk_1/prediction_2.json
2024-12-03 06:58:47,949 - INFO - Saved STIX file: Results_Topic_Sentences/targeted-attack-in-middle-east-by-apt34.html/topic_-1/chunk_1/prediction_3.json
2024-12-03 06:58:47,949 - INFO - Processing chunk 2/2 of topic -1...
2024-12-03 06:58:47,978 - INFO - Saved STIX file: Results_Topic_Sentences/targeted-attack-in-middle-east-by-apt34.html/topic_-1/chunk_2/prediction_1.json
2024-12-0

2024-12-03 06:58:56,064 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:58:56,067 - INFO - Use pytorch device_name: cuda
2024-12-03 06:58:56,068 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-12-03 06:58:58,277 - BERTopic - Embedding - Completed ✓
2024-12-03 06:58:58,278 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:58:58,307 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:58:58,308 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:58:58,313 - BERTopic - Cluster - Completed ✓
2024-12-03 06:58:58,316 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:58:58,977 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-12-03 06:58:59,020 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:58:59,022 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:58:59,022 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:58:59,024 - BERTopic - Cluster - Completed ✓


2024-12-03 06:58:59,024 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:58:59,025 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:58:59,037 - INFO - Step 5: Generating reports...
2024-12-03 06:58:59,038 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:58:59,073 - INFO - Saved STIX file: Results_Topic_Sentences/blog_whos-who-in-the-zoo/topic_0/chunk_1/prediction_1.json
2024-12-03 06:58:59,075 - INFO - Processing chunk 2/3 of topic 0...
2024-12-03 06:58:59,152 - INFO - Saved STIX file: Results_Topic_Sentences/blog_whos-who-in-the-zoo/topic_0/chunk_2/prediction_1.json
2024-12-03 06:58:59,154 - INFO - Saved STIX file: Results_Topic_Sentences/blog_whos-who-in-the-zoo/topic_0/chunk_2/prediction_2.json
2024-12-03 06:58:59,155 - INFO - Processing chunk 3/3 of topic 0...
2024-12-03 06:58:59,204 - INFO - Saved STIX file: Results_Topic_Sentences/blog_whos-who-in-the-zoo/topic_0/chunk_3/prediction_1.json
2024-12-03 06:58:59,205 - I

2024-12-03 06:59:02,107 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:59:02,110 - INFO - Use pytorch device_name: cuda
2024-12-03 06:59:02,110 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:59:04,373 - BERTopic - Embedding - Completed ✓
2024-12-03 06:59:04,373 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:59:04,481 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:59:04,481 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:59:04,493 - BERTopic - Cluster - Completed ✓
2024-12-03 06:59:04,495 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:59:07,052 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 06:59:07,221 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:59:07,222 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:59:07,223 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:59:07,226 - BERTopic - Cluster - Completed ✓


2024-12-03 06:59:07,227 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:59:07,227 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:59:07,318 - INFO - Step 5: Generating reports...
2024-12-03 06:59:07,318 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:59:07,335 - INFO - Saved STIX file: Results_Topic_Sentences/Vicious Panda_ The COVID Campaign - Check Point Research/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:59:07,335 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:59:07,349 - INFO - Saved STIX file: Results_Topic_Sentences/Vicious Panda_ The COVID Campaign - Check Point Research/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:59:07,350 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:59:07,477 - INFO - Saved STIX file: Results_Topic_Sentences/Vicious Panda_ The COVID Campaign - Check Point Research/topic_-1/chunk_3/prediction_1.json
2024-12-03 06:59:07,478 - INFO - Processing chunk 1/3 of t

2024-12-03 06:59:14,471 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:59:14,474 - INFO - Use pytorch device_name: cuda
2024-12-03 06:59:14,475 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

2024-12-03 06:59:16,755 - BERTopic - Embedding - Completed ✓
2024-12-03 06:59:16,756 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:59:16,936 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:59:16,937 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:59:16,952 - BERTopic - Cluster - Completed ✓
2024-12-03 06:59:16,955 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:59:21,064 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

2024-12-03 06:59:21,306 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:59:21,308 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:59:21,308 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:59:21,312 - BERTopic - Cluster - Completed ✓


2024-12-03 06:59:21,313 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:59:21,314 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:59:21,418 - INFO - Step 5: Generating reports...
2024-12-03 06:59:21,419 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:59:21,434 - INFO - Saved STIX file: Results_Topic_Sentences/North Korea’s Cyber Strategy/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:59:21,434 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:59:21,447 - INFO - Saved STIX file: Results_Topic_Sentences/North Korea’s Cyber Strategy/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:59:21,448 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:59:21,495 - INFO - Saved STIX file: Results_Topic_Sentences/North Korea’s Cyber Strategy/topic_-1/chunk_3/prediction_1.json
2024-12-03 06:59:21,496 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:59:21,764 - INFO - Saved STIX file: Results_Topic_Sentences/

2024-12-03 06:59:32,574 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:59:32,577 - INFO - Use pytorch device_name: cuda
2024-12-03 06:59:32,577 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

2024-12-03 06:59:34,755 - BERTopic - Embedding - Completed ✓
2024-12-03 06:59:34,756 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:59:34,873 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:59:34,875 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:59:34,889 - BERTopic - Cluster - Completed ✓
2024-12-03 06:59:34,892 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:59:37,539 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

2024-12-03 06:59:37,705 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:59:37,706 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:59:37,706 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:59:37,711 - BERTopic - Cluster - Completed ✓


2024-12-03 06:59:37,713 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:59:37,714 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:59:37,787 - INFO - Step 5: Generating reports...
2024-12-03 06:59:37,788 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:59:37,795 - INFO - Processing chunk 2/3 of topic 0...
2024-12-03 06:59:37,900 - INFO - Saved STIX file: Results_Topic_Sentences/rpt-southeast-asia-threat-landscape/topic_0/chunk_2/prediction_1.json
2024-12-03 06:59:37,901 - INFO - Processing chunk 3/3 of topic 0...
2024-12-03 06:59:37,907 - INFO - Processing chunk 1/3 of topic 1...
2024-12-03 06:59:37,924 - INFO - Saved STIX file: Results_Topic_Sentences/rpt-southeast-asia-threat-landscape/topic_1/chunk_1/prediction_1.json
2024-12-03 06:59:37,925 - INFO - Processing chunk 2/3 of topic 1...
2024-12-03 06:59:37,938 - INFO - Saved STIX file: Results_Topic_Sentences/rpt-southeast-asia-threat-landscape/topic_1/chunk_2/predictio

2024-12-03 06:59:46,029 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 06:59:46,032 - INFO - Use pytorch device_name: cuda
2024-12-03 06:59:46,032 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/11 [00:00<?, ?it/s]

2024-12-03 06:59:48,306 - BERTopic - Embedding - Completed ✓
2024-12-03 06:59:48,307 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 06:59:48,655 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:59:48,656 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 06:59:48,688 - BERTopic - Cluster - Completed ✓
2024-12-03 06:59:48,690 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 06:59:54,761 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/11 [00:00<?, ?it/s]

2024-12-03 06:59:55,142 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 06:59:55,144 - BERTopic - Dimensionality - Completed ✓
2024-12-03 06:59:55,144 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 06:59:55,151 - BERTopic - Cluster - Completed ✓


2024-12-03 06:59:55,152 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 06:59:55,153 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 06:59:55,283 - INFO - Step 5: Generating reports...
2024-12-03 06:59:55,284 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 06:59:55,301 - INFO - Saved STIX file: Results_Topic_Sentences/cta-2023-0727-1/topic_-1/chunk_1/prediction_1.json
2024-12-03 06:59:55,302 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 06:59:55,399 - INFO - Saved STIX file: Results_Topic_Sentences/cta-2023-0727-1/topic_-1/chunk_2/prediction_1.json
2024-12-03 06:59:55,400 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 06:59:55,412 - INFO - Saved STIX file: Results_Topic_Sentences/cta-2023-0727-1/topic_-1/chunk_3/prediction_1.json
2024-12-03 06:59:55,413 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 06:59:55,630 - INFO - Saved STIX file: Results_Topic_Sentences/cta-2023-0727-1/topic_0/chunk_1/predict

2024-12-03 07:00:22,293 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 07:00:22,295 - INFO - Use pytorch device_name: cuda
2024-12-03 07:00:22,296 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/9 [00:00<?, ?it/s]

2024-12-03 07:00:24,616 - BERTopic - Embedding - Completed ✓
2024-12-03 07:00:24,617 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 07:00:24,843 - BERTopic - Dimensionality - Completed ✓
2024-12-03 07:00:24,843 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 07:00:24,862 - BERTopic - Cluster - Completed ✓
2024-12-03 07:00:24,865 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 07:00:29,540 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/9 [00:00<?, ?it/s]

2024-12-03 07:00:29,840 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 07:00:29,842 - BERTopic - Dimensionality - Completed ✓
2024-12-03 07:00:29,842 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 07:00:29,849 - BERTopic - Cluster - Completed ✓


2024-12-03 07:00:29,849 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 07:00:29,851 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 07:00:29,973 - INFO - Step 5: Generating reports...
2024-12-03 07:00:29,974 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 07:00:29,988 - INFO - Saved STIX file: Results_Topic_Sentences/tech-brief-untangling-the-patchwork-cyberespionage-group/topic_-1/chunk_1/prediction_1.json
2024-12-03 07:00:29,990 - INFO - Saved STIX file: Results_Topic_Sentences/tech-brief-untangling-the-patchwork-cyberespionage-group/topic_-1/chunk_1/prediction_2.json
2024-12-03 07:00:29,991 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 07:00:30,100 - INFO - Saved STIX file: Results_Topic_Sentences/tech-brief-untangling-the-patchwork-cyberespionage-group/topic_-1/chunk_2/prediction_1.json
2024-12-03 07:00:30,101 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 07:00:30,115 - INFO - Saved STIX file: Results_

2024-12-03 07:00:42,263 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 07:00:42,265 - INFO - Use pytorch device_name: cuda
2024-12-03 07:00:42,266 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2024-12-03 07:00:44,603 - BERTopic - Embedding - Completed ✓
2024-12-03 07:00:44,603 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 07:00:44,667 - BERTopic - Dimensionality - Completed ✓
2024-12-03 07:00:44,667 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 07:00:44,676 - BERTopic - Cluster - Completed ✓
2024-12-03 07:00:44,678 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 07:00:46,174 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2024-12-03 07:00:46,288 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 07:00:46,290 - BERTopic - Dimensionality - Completed ✓
2024-12-03 07:00:46,290 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 07:00:46,294 - BERTopic - Cluster - Completed ✓


2024-12-03 07:00:46,295 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 07:00:46,295 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 07:00:46,329 - INFO - Step 5: Generating reports...
2024-12-03 07:00:46,330 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 07:00:46,344 - INFO - Saved STIX file: Results_Topic_Sentences/APT10 Targeting Japanese Corporations Using Updated TTPs/topic_-1/chunk_1/prediction_1.json
2024-12-03 07:00:46,345 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 07:00:46,358 - INFO - Saved STIX file: Results_Topic_Sentences/APT10 Targeting Japanese Corporations Using Updated TTPs/topic_-1/chunk_2/prediction_1.json
2024-12-03 07:00:46,359 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 07:00:46,375 - INFO - Saved STIX file: Results_Topic_Sentences/APT10 Targeting Japanese Corporations Using Updated TTPs/topic_-1/chunk_3/prediction_1.json
2024-12-03 07:00:46,376 - INFO - Processing chunk 1/3 of t

2024-12-03 07:00:51,852 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 07:00:51,854 - INFO - Use pytorch device_name: cuda
2024-12-03 07:00:51,855 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/8 [00:00<?, ?it/s]

2024-12-03 07:00:54,192 - BERTopic - Embedding - Completed ✓
2024-12-03 07:00:54,193 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 07:00:54,407 - BERTopic - Dimensionality - Completed ✓
2024-12-03 07:00:54,408 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 07:00:54,428 - BERTopic - Cluster - Completed ✓
2024-12-03 07:00:54,430 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 07:00:59,552 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/8 [00:00<?, ?it/s]

2024-12-03 07:00:59,861 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 07:00:59,862 - BERTopic - Dimensionality - Completed ✓
2024-12-03 07:00:59,862 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 07:00:59,868 - BERTopic - Cluster - Completed ✓


2024-12-03 07:00:59,869 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 07:00:59,869 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 07:01:00,043 - INFO - Step 5: Generating reports...
2024-12-03 07:01:00,044 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 07:01:00,060 - INFO - Saved STIX file: Results_Topic_Sentences/The State of OT:IoT Routers in the Software Supply Chain/topic_-1/chunk_1/prediction_1.json
2024-12-03 07:01:00,062 - INFO - Saved STIX file: Results_Topic_Sentences/The State of OT:IoT Routers in the Software Supply Chain/topic_-1/chunk_1/prediction_2.json
2024-12-03 07:01:00,062 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 07:01:00,077 - INFO - Saved STIX file: Results_Topic_Sentences/The State of OT:IoT Routers in the Software Supply Chain/topic_-1/chunk_2/prediction_1.json
2024-12-03 07:01:00,078 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 07:01:00,092 - INFO - Saved STIX file: Results_

2024-12-03 07:01:14,951 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 07:01:14,955 - INFO - Use pytorch device_name: cuda
2024-12-03 07:01:14,956 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2024-12-03 07:01:17,381 - BERTopic - Embedding - Completed ✓
2024-12-03 07:01:17,382 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 07:01:17,453 - BERTopic - Dimensionality - Completed ✓
2024-12-03 07:01:17,453 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 07:01:17,462 - BERTopic - Cluster - Completed ✓
2024-12-03 07:01:17,464 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 07:01:18,918 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2024-12-03 07:01:19,013 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 07:01:19,014 - BERTopic - Dimensionality - Completed ✓
2024-12-03 07:01:19,015 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 07:01:19,017 - BERTopic - Cluster - Completed ✓


2024-12-03 07:01:19,018 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 07:01:19,018 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 07:01:19,052 - INFO - Step 5: Generating reports...
2024-12-03 07:01:19,052 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 07:01:19,099 - INFO - Saved STIX file: Results_Topic_Sentences/2020.11.18_Zooming_into_Darknet_Threats_Targeting_Japanese_Organizations/topic_0/chunk_1/prediction_1.json
2024-12-03 07:01:19,100 - INFO - Processing chunk 2/3 of topic 0...
2024-12-03 07:01:19,113 - INFO - Saved STIX file: Results_Topic_Sentences/2020.11.18_Zooming_into_Darknet_Threats_Targeting_Japanese_Organizations/topic_0/chunk_2/prediction_1.json
2024-12-03 07:01:19,113 - INFO - Processing chunk 3/3 of topic 0...
2024-12-03 07:01:19,126 - INFO - Saved STIX file: Results_Topic_Sentences/2020.11.18_Zooming_into_Darknet_Threats_Targeting_Japanese_Organizations/topic_0/chunk_3/prediction_1.json
2024-12-03 07:01

2024-12-03 07:01:25,000 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 07:01:25,003 - INFO - Use pytorch device_name: cuda
2024-12-03 07:01:25,004 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/27 [00:00<?, ?it/s]

2024-12-03 07:01:27,485 - BERTopic - Embedding - Completed ✓
2024-12-03 07:01:27,486 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 07:01:28,693 - BERTopic - Dimensionality - Completed ✓
2024-12-03 07:01:28,694 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 07:01:28,758 - BERTopic - Cluster - Completed ✓
2024-12-03 07:01:28,760 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 07:01:42,640 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/27 [00:00<?, ?it/s]

2024-12-03 07:01:43,457 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 07:01:43,460 - BERTopic - Dimensionality - Completed ✓
2024-12-03 07:01:43,460 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 07:01:43,479 - BERTopic - Cluster - Completed ✓


2024-12-03 07:01:43,480 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 07:01:43,481 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 07:01:43,758 - INFO - Step 5: Generating reports...
2024-12-03 07:01:43,760 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 07:01:43,775 - INFO - Saved STIX file: Results_Topic_Sentences/Emotet_Exposed_A_Look_Inside_the_Cybercriminal_Supply_Chain/topic_-1/chunk_1/prediction_1.json
2024-12-03 07:01:43,777 - INFO - Saved STIX file: Results_Topic_Sentences/Emotet_Exposed_A_Look_Inside_the_Cybercriminal_Supply_Chain/topic_-1/chunk_1/prediction_2.json
2024-12-03 07:01:43,778 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 07:01:43,792 - INFO - Saved STIX file: Results_Topic_Sentences/Emotet_Exposed_A_Look_Inside_the_Cybercriminal_Supply_Chain/topic_-1/chunk_2/prediction_1.json
2024-12-03 07:01:43,793 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 07:01:43,808 - INFO - Saved STIX file:

2024-12-03 07:02:37,764 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 07:02:37,766 - INFO - Use pytorch device_name: cuda
2024-12-03 07:02:37,767 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/16 [00:00<?, ?it/s]

2024-12-03 07:02:40,006 - BERTopic - Embedding - Completed ✓
2024-12-03 07:02:40,010 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 07:02:40,718 - BERTopic - Dimensionality - Completed ✓
2024-12-03 07:02:40,719 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 07:02:40,758 - BERTopic - Cluster - Completed ✓
2024-12-03 07:02:40,760 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 07:02:46,414 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/16 [00:00<?, ?it/s]

2024-12-03 07:02:46,763 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 07:02:46,765 - BERTopic - Dimensionality - Completed ✓
2024-12-03 07:02:46,766 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 07:02:46,776 - BERTopic - Cluster - Completed ✓


2024-12-03 07:02:46,777 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 07:02:46,777 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 07:02:46,834 - INFO - Step 5: Generating reports...
2024-12-03 07:02:46,835 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 07:02:46,850 - INFO - Saved STIX file: Results_Topic_Sentences/scanv_1/topic_-1/chunk_1/prediction_1.json
2024-12-03 07:02:46,851 - INFO - Saved STIX file: Results_Topic_Sentences/scanv_1/topic_-1/chunk_1/prediction_2.json
2024-12-03 07:02:46,852 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 07:02:46,947 - INFO - Saved STIX file: Results_Topic_Sentences/scanv_1/topic_-1/chunk_2/prediction_1.json
2024-12-03 07:02:46,948 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 07:02:47,043 - INFO - Saved STIX file: Results_Topic_Sentences/scanv_1/topic_-1/chunk_3/prediction_1.json
2024-12-03 07:02:47,043 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 07:02:47,

2024-12-03 07:03:28,558 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 07:03:28,560 - INFO - Use pytorch device_name: cuda
2024-12-03 07:03:28,561 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 07:03:30,781 - BERTopic - Embedding - Completed ✓
2024-12-03 07:03:30,782 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 07:03:30,833 - BERTopic - Dimensionality - Completed ✓
2024-12-03 07:03:30,833 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 07:03:30,840 - BERTopic - Cluster - Completed ✓
2024-12-03 07:03:30,842 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 07:03:31,640 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-03 07:03:31,696 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 07:03:31,697 - BERTopic - Dimensionality - Completed ✓
2024-12-03 07:03:31,698 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 07:03:31,699 - BERTopic - Cluster - Completed ✓


2024-12-03 07:03:31,700 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 07:03:31,701 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 07:03:31,715 - INFO - Step 5: Generating reports...
2024-12-03 07:03:31,715 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 07:03:31,904 - INFO - Saved STIX file: Results_Topic_Sentences/The_destruction_of_APT3/topic_0/chunk_1/prediction_1.json
2024-12-03 07:03:31,905 - INFO - Saved STIX file: Results_Topic_Sentences/The_destruction_of_APT3/topic_0/chunk_1/prediction_2.json
2024-12-03 07:03:31,906 - INFO - Processing chunk 2/3 of topic 0...
2024-12-03 07:03:31,913 - INFO - Processing chunk 3/3 of topic 0...
2024-12-03 07:03:31,960 - INFO - Saved STIX file: Results_Topic_Sentences/The_destruction_of_APT3/topic_0/chunk_3/prediction_1.json
2024-12-03 07:03:31,961 - INFO - Processing chunk 1/3 of topic 1...
2024-12-03 07:03:32,072 - INFO - Saved STIX file: Results_Topic_Sentences/The_destruction_of_AP

2024-12-03 07:03:35,512 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 07:03:35,515 - INFO - Use pytorch device_name: cuda
2024-12-03 07:03:35,516 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 07:03:37,716 - BERTopic - Embedding - Completed ✓
2024-12-03 07:03:37,718 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 07:03:37,826 - BERTopic - Dimensionality - Completed ✓
2024-12-03 07:03:37,828 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 07:03:37,844 - BERTopic - Cluster - Completed ✓
2024-12-03 07:03:37,846 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 07:03:40,154 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 07:03:40,288 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 07:03:40,289 - BERTopic - Dimensionality - Completed ✓
2024-12-03 07:03:40,290 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 07:03:40,293 - BERTopic - Cluster - Completed ✓


2024-12-03 07:03:40,294 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 07:03:40,295 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 07:03:40,338 - INFO - Step 5: Generating reports...
2024-12-03 07:03:40,339 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 07:03:40,351 - INFO - Saved STIX file: Results_Topic_Sentences/black-ddos/topic_-1/chunk_1/prediction_1.json
2024-12-03 07:03:40,352 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 07:03:40,365 - INFO - Saved STIX file: Results_Topic_Sentences/black-ddos/topic_-1/chunk_2/prediction_1.json
2024-12-03 07:03:40,366 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 07:03:40,378 - INFO - Saved STIX file: Results_Topic_Sentences/black-ddos/topic_-1/chunk_3/prediction_1.json
2024-12-03 07:03:40,379 - INFO - Processing chunk 1/3 of topic 0...
2024-12-03 07:03:40,391 - INFO - Saved STIX file: Results_Topic_Sentences/black-ddos/topic_0/chunk_1/prediction_1.json
2024-12-0

2024-12-03 07:03:46,601 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 07:03:46,603 - INFO - Use pytorch device_name: cuda
2024-12-03 07:03:46,604 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

2024-12-03 07:03:48,836 - BERTopic - Embedding - Completed ✓
2024-12-03 07:03:48,837 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 07:03:48,978 - BERTopic - Dimensionality - Completed ✓
2024-12-03 07:03:48,979 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 07:03:48,993 - BERTopic - Cluster - Completed ✓
2024-12-03 07:03:48,995 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 07:03:52,144 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

2024-12-03 07:03:52,406 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 07:03:52,408 - BERTopic - Dimensionality - Completed ✓
2024-12-03 07:03:52,409 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 07:03:52,413 - BERTopic - Cluster - Completed ✓


2024-12-03 07:03:52,414 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 07:03:52,414 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 07:03:52,559 - INFO - Step 5: Generating reports...
2024-12-03 07:03:52,560 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 07:03:52,576 - INFO - Saved STIX file: Results_Topic_Sentences/iranian-threat-group-updates-ttps-in-spear-phishing-campaign/topic_-1/chunk_1/prediction_1.json
2024-12-03 07:03:52,577 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 07:03:52,603 - INFO - Saved STIX file: Results_Topic_Sentences/iranian-threat-group-updates-ttps-in-spear-phishing-campaign/topic_-1/chunk_2/prediction_1.json
2024-12-03 07:03:52,605 - INFO - Saved STIX file: Results_Topic_Sentences/iranian-threat-group-updates-ttps-in-spear-phishing-campaign/topic_-1/chunk_2/prediction_2.json
2024-12-03 07:03:52,606 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 07:03:52,621 - INFO - Saved STIX fi

2024-12-03 07:04:04,351 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 07:04:04,353 - INFO - Use pytorch device_name: cuda
2024-12-03 07:04:04,353 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 07:04:06,695 - BERTopic - Embedding - Completed ✓
2024-12-03 07:04:06,696 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 07:04:06,786 - BERTopic - Dimensionality - Completed ✓
2024-12-03 07:04:06,787 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 07:04:06,798 - BERTopic - Cluster - Completed ✓
2024-12-03 07:04:06,801 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 07:04:09,267 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-12-03 07:04:09,428 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 07:04:09,429 - BERTopic - Dimensionality - Completed ✓
2024-12-03 07:04:09,429 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 07:04:09,432 - BERTopic - Cluster - Completed ✓


2024-12-03 07:04:09,433 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 07:04:09,433 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 07:04:09,493 - INFO - Step 5: Generating reports...
2024-12-03 07:04:09,494 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 07:04:09,507 - INFO - Saved STIX file: Results_Topic_Sentences/CosmicStrand_ the discovery of a sophisticated UEFI firmware rootkit _ Securelist/topic_-1/chunk_1/prediction_1.json
2024-12-03 07:04:09,507 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 07:04:09,619 - INFO - Saved STIX file: Results_Topic_Sentences/CosmicStrand_ the discovery of a sophisticated UEFI firmware rootkit _ Securelist/topic_-1/chunk_2/prediction_1.json
2024-12-03 07:04:09,620 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 07:04:09,734 - INFO - Saved STIX file: Results_Topic_Sentences/CosmicStrand_ the discovery of a sophisticated UEFI firmware rootkit _ Securelist/topic_-1/chunk_3/p

2024-12-03 07:04:19,210 - BERTopic - Embedding - Transforming documents to embeddings.


2024-12-03 07:04:19,213 - INFO - Use pytorch device_name: cuda
2024-12-03 07:04:19,214 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

2024-12-03 07:04:21,471 - BERTopic - Embedding - Completed ✓
2024-12-03 07:04:21,472 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-03 07:04:21,630 - BERTopic - Dimensionality - Completed ✓
2024-12-03 07:04:21,631 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-03 07:04:21,648 - BERTopic - Cluster - Completed ✓
2024-12-03 07:04:21,650 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-03 07:04:26,070 - BERTopic - Representation - Completed ✓


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

2024-12-03 07:04:26,314 - BERTopic - Dimensionality - Reducing dimensionality of input embeddings.
2024-12-03 07:04:26,315 - BERTopic - Dimensionality - Completed ✓
2024-12-03 07:04:26,316 - BERTopic - Clustering - Approximating new points with `hdbscan_model`
2024-12-03 07:04:26,320 - BERTopic - Cluster - Completed ✓


2024-12-03 07:04:26,321 - INFO - Step 3: Fetching representative documents for each topic...
2024-12-03 07:04:26,321 - INFO - Step 4: Processing topics for IoC extraction...
2024-12-03 07:04:26,411 - INFO - Step 5: Generating reports...
2024-12-03 07:04:26,413 - INFO - Processing chunk 1/3 of topic -1...
2024-12-03 07:04:26,429 - INFO - Saved STIX file: Results_Topic_Sentences/Weaponizing a Lazarus Group Implant/topic_-1/chunk_1/prediction_1.json
2024-12-03 07:04:26,431 - INFO - Saved STIX file: Results_Topic_Sentences/Weaponizing a Lazarus Group Implant/topic_-1/chunk_1/prediction_2.json
2024-12-03 07:04:26,432 - INFO - Processing chunk 2/3 of topic -1...
2024-12-03 07:04:26,535 - INFO - Saved STIX file: Results_Topic_Sentences/Weaponizing a Lazarus Group Implant/topic_-1/chunk_2/prediction_1.json
2024-12-03 07:04:26,536 - INFO - Processing chunk 3/3 of topic -1...
2024-12-03 07:04:26,549 - INFO - Saved STIX file: Results_Topic_Sentences/Weaponizing a Lazarus Group Implant/topic_-1/ch

# Finish