In [18]:
# Loading libraries

import os
from dotenv import load_dotenv
from huggingface_hub import login
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import numpy as np
import pandas as pd
from rapidfuzz import fuzz
from codecarbon import EmissionsTracker
from sklearn.model_selection import train_test_split
from utils import *
import optuna
from optuna.samplers import TPESampler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC
from sentence_transformers import SentenceTransformer

random_state = 42

In [19]:
# Load variables from .env
load_dotenv()

# Get token from .env
hf_token = os.getenv("HF_TOKEN")

# Log in to HF Hub
login(token=hf_token)

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [20]:
# Define the path to the Parquet dataset
path = "dataset/leandl_oesnpg_dados.parquet"

# Load the Parquet file into a Pandas DataFrame
df = pd.read_parquet(path)

# Display the DataFrame
df

Unnamed: 0,hash_id,tema_id,tema,palavras_chave,uf_tema_info,uf_pesquisador,nome_programa,sigla_entidade_ensino,nome_producao,nome_subtipo_producao,...,descricao_abstract,descricao_keyword,data_titulacao,nome_grau_academico,nome_grande_area_conhecimento,nome_area_conhecimento,nome_subarea_conhecimento,nome_especialidade,modelo_nivel,modelo_explicacao
0,ce4025a58d1cff3d346e96af2e8f2d0185caeaddd78c1b...,1,Agronegócio e Tecnologias de Informação e Comu...,"[agroindústria, assistência técnica e extensão...",ACRE,ACRE,ENSINO DE CIÊNCIAS E MATEMÁTICA,UFAC,AS TECNOLOGIAS DIGITAIS DA INFORMAÇÃO E COMUNI...,DISSERTAÇÃO,...,"THE RESEARCH, DESCRIBED IN THE PRESENT WORK, I...",INFORMATION AND COMMUNICATION TECHNOLOGIES;COM...,2023-10-18 00:00:00,MESTRADO PROFISSIONAL,MULTIDISCIPLINAR,ENSINO,ENSINO DE CIÊNCIAS E MATEMÁTICA,NÃO SE APLICA,BAIXA,A afinidade entre os dados do pesquisador e o ...
1,55982d77d62446fb9f76ae636c99c36d77d5e233ce4863...,1,Agronegócio e Tecnologias de Informação e Comu...,"[agroindústria, assistência técnica e extensão...",ACRE,ACRE,EDUCAÇÃO PROFISSIONAL E TECNOLÓGICA,IFAC,O CURRÍCULO INTEGRADO DO INSTITUTO FEDERAL DO ...,DISSERTAÇÃO,...,THIS STUDY INVESTIGATED THE CURRICULUM OF INTE...,INTEGRATED SECONDARY EDUCATIO;CURRICULAR ORGAN...,2023-09-29 00:00:00,MESTRADO PROFISSIONAL,MULTIDISCIPLINAR,ENSINO,NÃO SE APLICA,NÃO SE APLICA,MEDIA,A afinidade entre os dados do pesquisador e o ...
2,1f7615b9be49f80d289ba7c99eb64a5f8dc387a23518a6...,3,Biodiversidade e Biotecnologia,"[biodiversidade, bioeconomia, biotecnologia, c...",ACRE,ACRE,CIÊNCIAS DA SAÚDE NA AMAZÔNIA OCIDENTAL,UFAC,TENDÊNCIA TEMPORAL E DISTRIBUIÇÃO ESPACIAL DAS...,DISSERTAÇÃO,...,"THE ANALYZES IN THE BIOMES: AMAZON, CAATINGA, ...",EPIDEMIOLOGY;LEISHMANIA;PRAIS-WINSTEN,2023-03-21 00:00:00,MESTRADO,CIÊNCIAS DA SAÚDE,MEDICINA,ANATOMIA PATOLÓGICA E PATOLOGIA CLÍNICA,NÃO SE APLICA,BAIXA,Os dados do pesquisador(a) estão focados na an...
3,3773fcb7084d294753146c9580750eca9b2348b78cc4aa...,3,Biodiversidade e Biotecnologia,"[biodiversidade, bioeconomia, biotecnologia, c...",ACRE,ACRE,GEOGRAFIA,UFAC,MODELAGEM DE BIOMASSA FLORESTAL E CÁLCULO DE C...,DISSERTAÇÃO,...,DURING THE LAST FEW DECADES TROPICAL FORESTS H...,GEDI;REMOTE SENSING;FOREST;MAPPING;BIOMASS;CARBON,2023-08-25 00:00:00,MESTRADO,CIÊNCIAS HUMANAS,GEOGRAFIA,NÃO SE APLICA,NÃO SE APLICA,MEDIA,"A dissertação do pesquisador foca na ""modelage..."
4,07dae75eae08bbf1828e779c70c9d283965fe58880d02f...,3,Biodiversidade e Biotecnologia,"[biodiversidade, bioeconomia, biotecnologia, c...",ACRE,ACRE,CIÊNCIAS AMBIENTAIS,UFAC,ANÁLISE SOCIOECONÔMICA E AMBIENTAL DA CADEIA P...,DISSERTAÇÃO,...,THE AMAZON BIOME HOLDS A PROMINENT POSITION IN...,BURITI;NON-TIMBER FOREST PRODUCTS;COOPERATIVIS...,2023-01-30 00:00:00,MESTRADO,MULTIDISCIPLINAR,CIÊNCIAS AMBIENTAIS,NÃO SE APLICA,NÃO SE APLICA,ALTA,Os dados do pesquisador mostram uma clara afin...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
42041,34aa7153bcaefa1132d1117f40f41f8e46781282b23d2f...,467,Transformação Digital,"[automação de processos, cibersegurança, desbu...",TOCANTINS,TOCANTINS,GOVERNANÇA E TRANSFORMAÇÃO DIGITAL,UFT-PALMAS,COMAIS LABS: IMPLANTAÇÃO E TRANSFORMAÇÃO DIGIT...,TESE,...,THIS WORK ADDRESSES THE IMPLEMENTATION AND DIG...,DIGITAL TRANSFORMATION;SOLUTION MODELING;ARTIF...,2023-07-21 00:00:00,DOUTORADO PROFISSIONAL,MULTIDISCIPLINAR,INTERDISCIPLINAR,ENGENHARIA/TECNOLOGIA/GESTÃO,NÃO SE APLICA,ALTA,O título da tese 'COMAIS LABS: IMPLANTAÇÃO E T...
42042,993855663c31514350f2f5d9926141bf24b534bc3486a8...,467,Transformação Digital,"[automação de processos, cibersegurança, desbu...",TOCANTINS,TOCANTINS,PROFNIT - PROPRIEDADE INTELECTUAL E TRANSFERÊN...,UFT-PALMAS,DESENVOLVIMENTO DE JOGO EDUCATIVO SOBRE PROPRI...,DISSERTAÇÃO,...,IT IS A PROTOTYPE FOR THE DESIGN OF A BOARD GA...,BOARD GAME;INTELECTUAL PROPERTY;TECHNOLOGY TRA...,2023-04-04 00:00:00,MESTRADO PROFISSIONAL,CIÊNCIAS SOCIAIS APLICADAS,ADMINISTRAÇÃO,NÃO SE APLICA,NÃO SE APLICA,BAIXA,O tema estratégico 'Transformação Digital' abo...
42043,2e36040512f07996790dd0797c17e54942e82188a53190...,467,Transformação Digital,"[automação de processos, cibersegurança, desbu...",TOCANTINS,TOCANTINS,DESENVOLVIMENTO REGIONAL,UFT-PALMAS,DESAFIOS E OPORTUNIDADES DA IMPLEMENTAÇÃO DA I...,TESE,...,OPEN INNOVATION (AI) HAS BEEN USED AS AN ALTER...,OPEN INNOVATION;LEGAL FRAMEWORK FOR SCIENCE;TE...,2023-04-26 00:00:00,DOUTORADO,CIÊNCIAS SOCIAIS APLICADAS,PLANEJAMENTO URBANO E REGIONAL,NÃO SE APLICA,NÃO SE APLICA,BAIXA,A pesquisa aborda a inovação aberta e o marco ...
42044,562129e7e79c218188529aa81f5300214a922f5b745afc...,467,Transformação Digital,"[automação de processos, cibersegurança, desbu...",TOCANTINS,TOCANTINS,ENSINO EM CIÊNCIAS E SAÚDE,UFT-PALMAS,ECOSSISTEMAS NA COMUNIDADE ACADÊMICA E EMPRESA...,DISSERTAÇÃO,...,"INNOVATION ECOSYSTEMS ARE HOME TO PEOPLE, TECH...",INCUBATOR;INNOVATION;ENTREPRENEURSHIP,2023-10-23 00:00:00,MESTRADO,MULTIDISCIPLINAR,ENSINO,NÃO SE APLICA,NÃO SE APLICA,ALTA,A pesquisa aborda ecossistemas de inovação tec...


In [21]:
# Creating the "afinidade" column
df["afinidade"] = df["modelo_nivel"].apply(
    # Assign 0 if the value in "modelo_nivel" is either "BAIXA" or "MEDIA"
    # Otherwise, assign 1
    lambda x: 0 if x in ["BAIXA", "MEDIA"] else 1
)

In [22]:
# Creating the "perfil" column by concatenating two text fields
df['perfil'] = (
    df['nome_producao'].fillna("")   # Replace NaN with an empty string in 'nome_producao'
    + "\n"                           # Add a line break between the two fields
    + df["descricao_resumo"].fillna("")  # Replace NaN with an empty string in 'descricao_resumo'
)

# Display the "perfil" column
df['perfil']

0        AS TECNOLOGIAS DIGITAIS DA INFORMAÇÃO E COMUNI...
1        O CURRÍCULO INTEGRADO DO INSTITUTO FEDERAL DO ...
2        TENDÊNCIA TEMPORAL E DISTRIBUIÇÃO ESPACIAL DAS...
3        MODELAGEM DE BIOMASSA FLORESTAL E CÁLCULO DE C...
4        ANÁLISE SOCIOECONÔMICA E AMBIENTAL DA CADEIA P...
                               ...                        
42041    COMAIS LABS: IMPLANTAÇÃO E TRANSFORMAÇÃO DIGIT...
42042    DESENVOLVIMENTO DE JOGO EDUCATIVO SOBRE PROPRI...
42043    DESAFIOS E OPORTUNIDADES DA IMPLEMENTAÇÃO DA I...
42044    ECOSSISTEMAS NA COMUNIDADE ACADÊMICA E EMPRESA...
42045    TURISMO SUSTENTÁVEL EM PRAIA DE RIO: PERCEPÇÃO...
Name: perfil, Length: 42046, dtype: object

In [23]:
# First: split into train (70%) and temp (30%)
train, temp = train_test_split(
    df,
    test_size=0.30,
    stratify=df['afinidade'],   # preserve class distribution in "afinidade"
    random_state=random_state
)

# Then: split the remaining temp into validation (10%) and test (20%)
# Since temp contains 30% of the data, we need to keep the right proportions:
# val = 10/30 = 0.3333 of temp
val, test = train_test_split(
    temp,
    test_size=2/3,  # equivalent to 20% of the total (2/3 of temp)
    stratify=temp['afinidade'],
    random_state=random_state
)

print(f"Train: {len(train)}")
print(f"Val: {len(val)}")
print(f"Test: {len(test)}")


Train: 29432
Val: 4204
Test: 8410


# String Match

In [7]:
# Define the path for saving the string match results
path_string_match = "results/string_match"

# Create the directory if it does not exist
create_directory(path_string_match)

Directory 'results/string_match' already exists.


In [None]:
# Start CodeCarbon tracker to measure carbon emissions during execution
tracker = EmissionsTracker(output_dir=path_string_match, save_to_file=True, project_name="test")
tracker.start()

# Define a simple string matching function:
# Returns 1 if the value of 'tema' is found inside 'perfil', otherwise 0
def string_match(row):
    return int(row['tema'].lower() in row['perfil'].lower())

# Apply the string_match function row by row to the test set
test['pred_string'] = test.apply(string_match, axis=1)

# Generate classification report comparing true labels ('afinidade') 
# against predictions ('pred_string'), and return it as a dictionary
report = classification_report(test['afinidade'], test['pred_string'], output_dict=True)

# Stop the CodeCarbon tracker and store estimated emissions
emissions = tracker.stop()

# Save the classification report to a JSON file
save_dict_to_json(report, os.path.join(path_string_match, "test_classification_report.json"))

print("✅ Results saved in", path_string_match)
print("Estimated emissions:", emissions, "kg CO₂")



✅ Dictionary saved at: results/string_match/classification_report.json
✅ Results saved in results/string_match
Estimated emissions: 5.344569361430485e-07 kg CO₂


# Fuzzy Match

In [9]:
# Define the path for saving the fuzzy match results
path_fuzzy_match = "results/fuzzy_match"

# Create the directory if it does not exist
create_directory(path_fuzzy_match)

Directory 'results/fuzzy_match' already exists.


In [10]:
path_fuzzy_match = "results/fuzzy_match"

# Fuzzy prediction function
# Returns 1 if the similarity between 'tema' and 'perfil' is above the threshold, otherwise 0
def fuzzy_predict(df, threshold):
    return df.apply(
        lambda row: int(
            fuzz.partial_ratio(row['tema'].lower(), row['perfil'].lower()) >= threshold
        ),
        axis=1
    )

# Combine training and validation sets for threshold tuning
train_val = pd.concat([train, val])

best_threshold = None
best_f1 = 0
results_train = {}

# -------------------------
# 🚀 Measuring carbon emissions during threshold search
# -------------------------
tracker_train = EmissionsTracker(
    output_dir=path_fuzzy_match,
    save_to_file=True,
    project_name="train_val"
)
tracker_train.start()

# Loop over possible thresholds (5, 10, ..., 100)
for t in range(5, 101, 5): 
    preds = fuzzy_predict(train_val, threshold=t)
    f1 = f1_score(train_val['afinidade'], preds)
    results_train[t] = {"f1_score": f1}
    # Update best threshold if a higher F1-score is found
    if f1 > best_f1:
        best_f1 = f1
        best_threshold = t

# Stop emissions tracker and log results
emissions_train = tracker_train.stop()
print(f"🌱 Total carbon emissions (train/validation): {emissions_train:.6f} kg CO₂")

# Save F1-scores for all thresholds and the best one
save_dict_to_json(results_train, f"{path_fuzzy_match}/train_val_f1.json")
save_dict_to_json(
    {"best_threshold": best_threshold, "best_f1": best_f1, "emissions_train": emissions_train},
    f"{path_fuzzy_match}/best_threshold.json"
)

print(f"Best threshold on train+val: {best_threshold} (F1={best_f1:.3f})")

# -------------------------
# 🚀 Measuring carbon emissions on the test set with the best threshold
# -------------------------
tracker_test = EmissionsTracker(
    output_dir=path_fuzzy_match,
    save_to_file=True,
    project_name="test"
)
tracker_test.start()

# Apply the chosen threshold to the test set
test_preds = fuzzy_predict(test, threshold=best_threshold)
report_test = classification_report(test['afinidade'], test_preds, output_dict=True)

# Stop tracker and store emissions
emissions_test = tracker_test.stop()
print(f"🌱 Carbon emissions (test): {emissions_test:.6f} kg CO₂")

# Save test report
save_dict_to_json(report_test, f"{path_fuzzy_match}/test_classification_report.json")

print("✅ Results and emissions saved in", path_fuzzy_match)

🌱 Total carbon emissions (train/validation): 0.000022 kg CO₂
✅ Dictionary saved at: results/fuzzy_match/train_val_f1.json
✅ Dictionary saved at: results/fuzzy_match/best_threshold.json
Best threshold on train+val: 55 (F1=0.385)
🌱 Carbon emissions (test): 0.000001 kg CO₂
✅ Dictionary saved at: results/fuzzy_match/test_classification_report.json
✅ Results and emissions saved in results/fuzzy_match


# BERT

In [24]:
def compute_and_save_embeddings(embedding_model, train, val, test,
                                profile_col="perfil", theme_col="tema", label_col="afinidade"):
    """
    Generate embeddings for train/val/test and measure carbon emissions separately:
      - Training + Validation
      - Test
    Only computes if the files do not already exist.
    """
    print(f"\n🟢 Checking embeddings for {embedding_model}...")
    embed_path = f"embeddings/{embedding_model.replace('/', '_')}"
    create_directory(embed_path)

    files_needed = [
        f"{embed_path}/X_profile_train.npy",
        f"{embed_path}/X_theme_train.npy",
        f"{embed_path}/y_train.npy",
        f"{embed_path}/X_profile_val.npy",
        f"{embed_path}/X_theme_val.npy",
        f"{embed_path}/y_val.npy",
        f"{embed_path}/X_profile_test.npy",
        f"{embed_path}/X_theme_test.npy",
        f"{embed_path}/y_test.npy"
    ]

    # If all files already exist, skip computation
    if all(os.path.exists(f) for f in files_needed):
        print(f"✅ Embeddings already exist in {embed_path}, nothing to do.")
        return None

    model = SentenceTransformer(embedding_model)

    def encode_and_save(df, split_name):
        # Encode both profile and theme columns into embeddings
        emb_profile = model.encode(df[profile_col].tolist(), show_progress_bar=True, normalize_embeddings=True)
        emb_theme   = model.encode(df[theme_col].tolist(), show_progress_bar=True, normalize_embeddings=True)
        y           = df[label_col].values

        # Save embeddings and labels as .npy files
        np.save(f"{embed_path}/X_profile_{split_name}.npy", emb_profile)
        np.save(f"{embed_path}/X_theme_{split_name}.npy", emb_theme)
        np.save(f"{embed_path}/y_{split_name}.npy", y)

    # ------------------------
    # 🚀 Training + Validation
    # ------------------------
    tracker_trainval = EmissionsTracker(
        output_dir=embed_path,
        save_to_file=True,
        project_name=f"train_val"
    )
    tracker_trainval.start()

    encode_and_save(train, "train")
    encode_and_save(val, "val")

    emissions_trainval = tracker_trainval.stop()
    print(f"🌱 {embedding_model} - Embedding emissions (train+val): {emissions_trainval:.6f} kg CO₂")

    # ------------------------
    # 🚀 Test
    # ------------------------
    tracker_test = EmissionsTracker(
        output_dir=embed_path,
        save_to_file=True,
        project_name=f"test"
    )
    tracker_test.start()

    encode_and_save(test, "test")

    emissions_test = tracker_test.stop()
    print(f"🌱 {embedding_model} - Embedding emissions (test): {emissions_test:.6f} kg CO₂")

    return {
        "emissions_trainval": emissions_trainval,
        "emissions_test": emissions_test
    }


def load_embeddings(embedding_model, split):
    """
    Load embeddings and labels saved as .npy for a given embedding_model and split.
    split: "train", "val", or "test"
    """
    embed_path = f"embeddings/{embedding_model.replace('/', '_')}"
    X_profile = np.load(f"{embed_path}/X_profile_{split}.npy")
    X_theme   = np.load(f"{embed_path}/X_theme_{split}.npy")
    y         = np.load(f"{embed_path}/y_{split}.npy")
    return X_profile, X_theme, y


def combine_embeddings(X_profile, X_theme, method="concatenate"):
    """
    Combine profile and theme embeddings using different strategies:
      - concatenate: join embeddings side by side
      - mean: element-wise average
      - min: element-wise minimum
      - max: element-wise maximum
    """
    if method == "concatenate":
        return np.concatenate([X_profile, X_theme], axis=1)
    elif method == "mean":
        return (X_profile + X_theme) / 2
    elif method == "min":
        return np.minimum(X_profile, X_theme)
    elif method == "max":
        return np.maximum(X_profile, X_theme)
    else:
        raise ValueError(f"Unknown method: {method}")

In [25]:
embedding_models = [
    "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", 
    "sentence-transformers/paraphrase-multilingual-mpnet-base-v2", 
    "ibm-granite/granite-embedding-278m-multilingual"
]

# Iterate through the list of embedding models
for embedding_model in embedding_models:
    # Generate and save embeddings for train/val/test (only if they don't already exist)
    compute_and_save_embeddings(embedding_model, train, val, test)


🟢 Checking embeddings for sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2...
Directory 'embeddings/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2' already exists.
✅ Embeddings already exist in embeddings/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2, nothing to do.

🟢 Checking embeddings for sentence-transformers/paraphrase-multilingual-mpnet-base-v2...
Directory 'embeddings/sentence-transformers_paraphrase-multilingual-mpnet-base-v2' already exists.
✅ Embeddings already exist in embeddings/sentence-transformers_paraphrase-multilingual-mpnet-base-v2, nothing to do.

🟢 Checking embeddings for ibm-granite/granite-embedding-278m-multilingual...
Directory 'embeddings/ibm-granite_granite-embedding-278m-multilingual' already exists.
✅ Embeddings already exist in embeddings/ibm-granite_granite-embedding-278m-multilingual, nothing to do.


## BERT Threshold

In [27]:
def run_threshold_pipeline(embedding_model, thresholds=range(5, 101, 5)):
    print(f"\n🟢 Threshold pipeline for {embedding_model}")
    base_path = "results/BERT_threshold"
    model_path = f"{base_path}/{embedding_model.replace('/', '_')}"
    create_directory(model_path)

    # Load embeddings (Xp = profile, Xt = theme, y = labels)
    Xp_train, Xt_train, y_train = load_embeddings(embedding_model, "train")
    Xp_val,   Xt_val,   y_val   = load_embeddings(embedding_model, "val")
    Xp_test,  Xt_test,  y_test  = load_embeddings(embedding_model, "test")

    # Merge train + val to find the best threshold
    Xp_train_val = np.vstack([Xp_train, Xp_val])
    Xt_train_val = np.vstack([Xt_train, Xt_val])
    y_train_val  = np.concatenate([y_train, y_val])

    results_summary = {}

    # ------------------------
    # 🚀 CodeCarbon emissions for training/validation (threshold search)
    # ------------------------
    tracker_train = EmissionsTracker(
        output_dir=model_path,
        save_to_file=True,
        project_name=f"train_val"
    )
    tracker_train.start()

    # Cosine similarity between embeddings
    sims_train_val = np.sum(Xp_train_val * Xt_train_val, axis=1) / (
        np.linalg.norm(Xp_train_val, axis=1) * np.linalg.norm(Xt_train_val, axis=1)
    )

    sims_train_val = (sims_train_val + 1) / 2

    best_threshold, best_f1 = None, 0
    results_train_val = {}

    # Search thresholds (5% to 100%)
    for t in thresholds:
        thr = t / 100  # scale to [0, 1]
        preds = (sims_train_val >= thr).astype(int)
        f1 = f1_score(y_train_val, preds, average="macro", zero_division=0)
        results_train_val[thr] = {"f1": f1}
        if f1 > best_f1:
            best_f1 = f1
            best_threshold = thr

    emissions_train = tracker_train.stop()
    print(f"🌱 Training/validation emissions: {emissions_train:.6f} kg CO₂")
    print(f"⭐ Best threshold: {best_threshold} (F1={best_f1:.3f})")

    # Save train/val results
    save_dict_to_json(results_train_val, f"{model_path}/train_val.json")
    save_dict_to_json(
        {"best_threshold": best_threshold, "best_f1": best_f1, "emissions_train": emissions_train},
        f"{model_path}/best_threshold.json"
    )

    # ------------------------
    # 🚀 CodeCarbon emissions for test (using best threshold)
    # ------------------------
    tracker_test = EmissionsTracker(
        output_dir=model_path,
        save_to_file=True,
        project_name=f"test"
    )
    tracker_test.start()
    
    sims_test = np.sum(Xp_test * Xt_test, axis=1) / (
        np.linalg.norm(Xp_test, axis=1) * np.linalg.norm(Xt_test, axis=1)
    )

    sims_test = (sims_test + 1) / 2

    preds_test = (sims_test >= best_threshold).astype(int)
    report = classification_report(y_test, preds_test, output_dict=True)

    emissions_test = tracker_test.stop()
    print(f"🌱 Test emissions: {emissions_test:.6f} kg CO₂")

    # Save test results
    results_test = {
        "best_threshold": best_threshold,
        "classification_report": report,
        "emissions_train": emissions_train,
        "emissions_test": emissions_test
    }
    save_dict_to_json(results_test, f"{model_path}/test_report.json")
    save_dict_to_json(report, f"{model_path}/test_classification_report.json")
    

    results_summary["cosine_threshold"] = results_test
    return results_summary

In [28]:
for embedding_model in embedding_models:
    run_threshold_pipeline(embedding_model)


🟢 Threshold pipeline for sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
Directory 'results/BERT_threshold/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2' created successfully!
🌱 Training/validation emissions: 0.000002 kg CO₂
⭐ Best threshold: 0.8 (F1=0.547)
✅ Dictionary saved at: results/BERT_threshold/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/train_val.json
✅ Dictionary saved at: results/BERT_threshold/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/best_threshold.json
🌱 Test emissions: 0.000001 kg CO₂
✅ Dictionary saved at: results/BERT_threshold/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/test_report.json
✅ Dictionary saved at: results/BERT_threshold/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/test_classification_report.json

🟢 Threshold pipeline for sentence-transformers/paraphrase-multilingual-mpnet-base-v2
Directory 'results/BERT_threshold/sentence-transformers_paraphrase-multilingual-mpnet-b

## BERT Classifiers

In [15]:
# ------------------------
# Settings
# ------------------------
n_trials = 20
base_path = "results/BERT_classifiers"
create_directory(base_path)

# ------------------------
# Generic evaluation function
# ------------------------
def evaluate_and_return(trial, y_true, y_pred):
    acc = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred, average="macro", zero_division=0)
    rec = recall_score(y_true, y_pred, average="macro", zero_division=0)
    f1 = f1_score(y_true, y_pred, average="macro", zero_division=0)

    # Save metrics in Optuna trial attributes for later analysis
    trial.set_user_attr("accuracy", acc)
    trial.set_user_attr("precision", prec)
    trial.set_user_attr("recall", rec)
    trial.set_user_attr("f1", f1)

    return f1  # optimize based on F1-score

# ------------------------
# Objective functions for each classifier
# ------------------------
def make_objectives(X_train, y_train, X_val, y_val):
    def objective_knn(trial):
        n_neighbors = trial.suggest_int("n_neighbors", 2, 200)
        weights = trial.suggest_categorical("weights", ["uniform", "distance"])
        metric = trial.suggest_categorical("metric", ["euclidean", "manhattan", "cosine"])
        model = KNeighborsClassifier(n_neighbors=n_neighbors, weights=weights, metric=metric)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_val)
        return evaluate_and_return(trial, y_val, y_pred)

    def objective_logreg(trial):
        C = trial.suggest_loguniform("C", 1e-3, 1e2)
        model = LogisticRegression(C=C, max_iter=2000, solver="lbfgs", random_state=random_state)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_val)
        return evaluate_and_return(trial, y_val, y_pred)

    def objective_dtree(trial):
        max_depth = trial.suggest_int("max_depth", 2, 50)
        criterion = trial.suggest_categorical("criterion", ["gini", "entropy", "log_loss"])
        min_samples_split = trial.suggest_int("min_samples_split", 2, 20)
        model = DecisionTreeClassifier(
            max_depth=max_depth, 
            criterion=criterion, 
            min_samples_split=min_samples_split, 
            random_state=random_state
        )
        model.fit(X_train, y_train)
        y_pred = model.predict(X_val)
        return evaluate_and_return(trial, y_val, y_pred)
    
    def objective_rf(trial):
        n_estimators = trial.suggest_int("n_estimators", 20, 300)
        criterion = trial.suggest_categorical("criterion", ["gini", "entropy", "log_loss"])
        min_samples_split = trial.suggest_int("min_samples_split", 2, 20)
        max_depth = trial.suggest_int("max_depth", 2, 20)
        model = RandomForestClassifier(
            n_estimators=n_estimators, 
            max_depth=max_depth,
            criterion=criterion,
            min_samples_split=min_samples_split,
            random_state=random_state
        )
        model.fit(X_train, y_train)
        y_pred = model.predict(X_val)
        return evaluate_and_return(trial, y_val, y_pred)
    
    def objective_svm(trial):
        C = trial.suggest_loguniform("C", 1e-3, 1e2)
        penalty = trial.suggest_categorical("penalty", ["l1", "l2"])
        model = LinearSVC(C=C, penalty=penalty, random_state=random_state)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_val)
        return evaluate_and_return(trial, y_val, y_pred)

    return {
        "KNN": objective_knn,
        "LogReg": objective_logreg,
        "DecisionTree": objective_dtree,
        "RandomForest": objective_rf,
        "SVM": objective_svm
    }

Directory 'results/BERT_classifiers' created successfully!


In [16]:
# ------------------------
# Main pipeline per embedding model
# ------------------------
def run_pipeline_for_embedding(embedding_model, methods=["concatenate", "mean", "min", "max"]):
    print(f"\n🟢 Running pipeline for embedding: {embedding_model}")
    embed_path = f"{base_path}/{embedding_model.replace('/', '_')}"
    create_directory(embed_path)

    # Load embeddings for train, val, test
    Xp_train, Xt_train, y_train = load_embeddings(embedding_model, "train")
    Xp_val,   Xt_val,   y_val   = load_embeddings(embedding_model, "val")
    Xp_test,  Xt_test,  y_test  = load_embeddings(embedding_model, "test")

    # Loop over embedding combination methods
    for method in methods:
        print(f"\n🔀 Testing combination method: {method}")
        method_path = f"{embed_path}/{method}"
        create_directory(method_path)

        # Combine embeddings (profile + theme)
        X_train = combine_embeddings(Xp_train, Xt_train, method=method)
        X_val   = combine_embeddings(Xp_val,   Xt_val,   method=method)
        X_test  = combine_embeddings(Xp_test,  Xt_test,  method=method)

        # Merge train+val for final training after hyperparameter tuning
        X_train_val = np.vstack([X_train, X_val])
        y_train_val = np.concatenate([y_train, y_val])

        # Define objective functions for each classifier
        objectives = make_objectives(X_train, y_train, X_val, y_val)

        # Loop through classifiers
        for name, obj in objectives.items():
            print(f"\n🔎 Running Optuna study for {name} ({method})...")
            model_path = f"{method_path}/{name}"
            create_directory(model_path)

            # 🚀 Track emissions during training/validation
            tracker_train = EmissionsTracker(
                output_dir=model_path, save_to_file=True, 
                project_name=f"train_val"
            )
            tracker_train.start()

            sampler = TPESampler(seed=random_state)
            study = optuna.create_study(direction="maximize", sampler=sampler)
            study.optimize(obj, n_trials=n_trials)

            emissions_train = tracker_train.stop()
            print(f"🌱 {name} ({method}) - Train/Validation emissions: {emissions_train:.6f} kg CO₂")

            # Save all trial results
            trials_summary = [
                {
                    "number": t.number,
                    "params": t.params,
                    "accuracy": t.user_attrs.get("accuracy"),
                    "precision": t.user_attrs.get("precision"),
                    "recall": t.user_attrs.get("recall"),
                    "f1": t.user_attrs.get("f1"),
                    "method": method   # log combination method used
                }
                for t in study.trials
            ]
            save_dict_to_json(trials_summary, f"{model_path}/train_val.json")

            # ------------------------
            # Evaluate best trial on test set
            # ------------------------
            best_trial = study.best_trial
            params = best_trial.params

            if name == "KNN":
                model = KNeighborsClassifier(**params)
            elif name == "LogReg":
                model = LogisticRegression(max_iter=2000, solver="lbfgs", random_state=random_state, **params)
            elif name == "DecisionTree":
                model = DecisionTreeClassifier(random_state=random_state, **params)
            elif name == "RandomForest":
                model = RandomForestClassifier(random_state=random_state, **params)
            elif name == "SVM":
                model = LinearSVC(random_state=random_state, **params)

            # Retrain with train+val data
            model.fit(X_train_val, y_train_val)

            # 🚀 Track emissions during test evaluation
            tracker_test = EmissionsTracker(
                output_dir=model_path, save_to_file=True, 
                project_name=f"test"
            )
            tracker_test.start()
            y_pred_test = model.predict(X_test)
            emissions_test = tracker_test.stop()

            print(f"🌱 {name} ({method}) - Test emissions: {emissions_test:.6f} kg CO₂")

            # Compute final test metrics
            report = classification_report(y_test, y_pred_test, output_dict=True)

            results_test = {
                "best_params": params,
                "classification_report": report,
                "emissions_train": emissions_train,
                "emissions_test": emissions_test,
                "method": method
            }
            save_dict_to_json(results_test, f"{model_path}/test_results.json")
            save_dict_to_json(report, f"{model_path}/test_classification_report.json")


In [17]:
for emb in embedding_models:
    run_pipeline_for_embedding(emb)


🟢 Running pipeline for embedding: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2' created successfully!

🔀 Testing combination method: concatenate
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/concatenate' created successfully!

🔎 Running Optuna study for KNN (concatenate)...
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/concatenate/KNN' created successfully!


[I 2025-08-26 17:58:27,730] A new study created in memory with name: no-name-a15add4a-98b2-439a-a7ca-2287ca0844a5
[I 2025-08-26 17:58:29,245] Trial 0 finished with value: 0.5875880142660859 and parameters: {'n_neighbors': 76, 'weights': 'uniform', 'metric': 'euclidean'}. Best is trial 0 with value: 0.5875880142660859.
[I 2025-08-26 17:58:32,507] Trial 1 finished with value: 0.649887088143336 and parameters: {'n_neighbors': 13, 'weights': 'uniform', 'metric': 'cosine'}. Best is trial 1 with value: 0.649887088143336.
[I 2025-08-26 17:58:34,381] Trial 2 finished with value: 0.5674682075575262 and parameters: {'n_neighbors': 167, 'weights': 'uniform', 'metric': 'cosine'}. Best is trial 1 with value: 0.649887088143336.
[I 2025-08-26 17:58:35,846] Trial 3 finished with value: 0.7337167218897807 and parameters: {'n_neighbors': 87, 'weights': 'distance', 'metric': 'cosine'}. Best is trial 3 with value: 0.7337167218897807.
[I 2025-08-26 17:58:50,856] Trial 4 finished with value: 0.5784368620440

🌱 KNN (concatenate) - Train/Validation emissions: 0.000094 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/concatenate/KNN/train_val.json
🌱 KNN (concatenate) - Test emissions: 0.000004 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/concatenate/KNN/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/concatenate/KNN/test_classification_report.json

🔎 Running Optuna study for LogReg (concatenate)...
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/concatenate/LogReg' created successfully!


[I 2025-08-26 18:00:11,526] A new study created in memory with name: no-name-b7b4f48d-59ec-4deb-9f14-b88eb5203d9c
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 18:00:12,035] Trial 0 finished with value: 0.48797550061203876 and parameters: {'C': 0.0745934328572655}. Best is trial 0 with value: 0.48797550061203876.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 18:00:16,625] Trial 1 finished with value: 0.6002601981006912 and parameters: {'C': 56.69849511478853}. Best is trial 1 with value: 0.6002601981006912.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 18:00:19,548] Trial 2 finished with value: 0.5780457765553046 and parameters: {'C': 4.5705630998014515}. Best is trial 1 with value: 0.6002601981006912.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 18:00:21,292] Trial 3 finished with value: 0.5636727858267113 and parameters: {'C': 0.9846738873614566}. Best is trial 1 with value: 0.6002601981006912.
  C = trial.suggest_logunifor

🌱 LogReg (concatenate) - Train/Validation emissions: 0.000092 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/concatenate/LogReg/train_val.json
🌱 LogReg (concatenate) - Test emissions: 0.000002 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/concatenate/LogReg/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/concatenate/LogReg/test_classification_report.json

🔎 Running Optuna study for DecisionTree (concatenate)...
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/concatenate/DecisionTree' created successfully!


[I 2025-08-26 18:01:14,004] A new study created in memory with name: no-name-5d6081e2-f09d-47e5-a2d7-829d24191e8f
[I 2025-08-26 18:01:34,000] Trial 0 finished with value: 0.6815667391879404 and parameters: {'max_depth': 20, 'criterion': 'gini', 'min_samples_split': 4}. Best is trial 0 with value: 0.6815667391879404.
[I 2025-08-26 18:01:46,964] Trial 1 finished with value: 0.6003035689509032 and parameters: {'max_depth': 9, 'criterion': 'entropy', 'min_samples_split': 15}. Best is trial 0 with value: 0.6815667391879404.
[I 2025-08-26 18:01:51,218] Trial 2 finished with value: 0.5148760247649438 and parameters: {'max_depth': 3, 'criterion': 'gini', 'min_samples_split': 5}. Best is trial 0 with value: 0.6815667391879404.
[I 2025-08-26 18:02:05,151] Trial 3 finished with value: 0.6000648924371828 and parameters: {'max_depth': 10, 'criterion': 'entropy', 'min_samples_split': 7}. Best is trial 0 with value: 0.6815667391879404.
[I 2025-08-26 18:02:24,844] Trial 4 finished with value: 0.688762

🌱 DecisionTree (concatenate) - Train/Validation emissions: 0.000312 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/concatenate/DecisionTree/train_val.json
🌱 DecisionTree (concatenate) - Test emissions: 0.000002 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/concatenate/DecisionTree/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/concatenate/DecisionTree/test_classification_report.json

🔎 Running Optuna study for RandomForest (concatenate)...
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/concatenate/RandomForest' created successfully!


[I 2025-08-26 18:08:54,203] A new study created in memory with name: no-name-f5eaa952-c0b4-469b-86d4-366a68b7d26e
[I 2025-08-26 18:09:11,414] Trial 0 finished with value: 0.5210345572354211 and parameters: {'n_estimators': 125, 'criterion': 'gini', 'min_samples_split': 4, 'max_depth': 4}. Best is trial 0 with value: 0.5210345572354211.
[I 2025-08-26 18:09:29,471] Trial 1 finished with value: 0.7020160337211307 and parameters: {'n_estimators': 36, 'criterion': 'gini', 'min_samples_split': 2, 'max_depth': 20}. Best is trial 1 with value: 0.7020160337211307.
[I 2025-08-26 18:10:53,627] Trial 2 finished with value: 0.589568147396432 and parameters: {'n_estimators': 253, 'criterion': 'gini', 'min_samples_split': 7, 'max_depth': 11}. Best is trial 1 with value: 0.7020160337211307.
[I 2025-08-26 18:11:32,348] Trial 3 finished with value: 0.5366795510864691 and parameters: {'n_estimators': 141, 'criterion': 'entropy', 'min_samples_split': 7, 'max_depth': 8}. Best is trial 1 with value: 0.70201

🌱 RandomForest (concatenate) - Train/Validation emissions: 0.000978 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/concatenate/RandomForest/train_val.json
🌱 RandomForest (concatenate) - Test emissions: 0.000001 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/concatenate/RandomForest/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/concatenate/RandomForest/test_classification_report.json

🔎 Running Optuna study for SVM (concatenate)...
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/concatenate/SVM' created successfully!


[I 2025-08-26 18:31:14,872] A new study created in memory with name: no-name-1b06dbd1-5a12-4177-a7a4-8bb24bc01478
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 18:31:20,881] Trial 0 finished with value: 0.5017711876308011 and parameters: {'C': 0.0745934328572655, 'penalty': 'l1'}. Best is trial 0 with value: 0.5017711876308011.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 18:31:44,267] Trial 1 finished with value: 0.5677512342853877 and parameters: {'C': 0.9846738873614566, 'penalty': 'l1'}. Best is trial 1 with value: 0.5677512342853877.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 18:31:44,740] Trial 2 finished with value: 0.44073433550618596 and parameters: {'C': 0.0019517224641449498, 'penalty': 'l1'}. Best is trial 1 with value: 0.5677512342853877.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 18:31:57,557] Trial 3 finished with value: 0.5790249052094896 and parameters: {'C': 3.4702669886504163, 'penalty': 'l2'}. Best i

🌱 SVM (concatenate) - Train/Validation emissions: 0.000367 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/concatenate/SVM/train_val.json




🌱 SVM (concatenate) - Test emissions: 0.000000 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/concatenate/SVM/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/concatenate/SVM/test_classification_report.json

🔀 Testing combination method: mean
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/mean' created successfully!

🔎 Running Optuna study for KNN (mean)...
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/mean/KNN' created successfully!


[I 2025-08-26 18:40:28,319] A new study created in memory with name: no-name-706476aa-75eb-4120-85b6-e6f4b4a54f53
[I 2025-08-26 18:40:29,046] Trial 0 finished with value: 0.5833568500974267 and parameters: {'n_neighbors': 76, 'weights': 'uniform', 'metric': 'euclidean'}. Best is trial 0 with value: 0.5833568500974267.
[I 2025-08-26 18:40:30,236] Trial 1 finished with value: 0.6604724025974026 and parameters: {'n_neighbors': 13, 'weights': 'uniform', 'metric': 'cosine'}. Best is trial 1 with value: 0.6604724025974026.
[I 2025-08-26 18:40:31,257] Trial 2 finished with value: 0.5772658493393763 and parameters: {'n_neighbors': 167, 'weights': 'uniform', 'metric': 'cosine'}. Best is trial 1 with value: 0.6604724025974026.
[I 2025-08-26 18:40:32,277] Trial 3 finished with value: 0.7350235708318416 and parameters: {'n_neighbors': 87, 'weights': 'distance', 'metric': 'cosine'}. Best is trial 3 with value: 0.7350235708318416.
[I 2025-08-26 18:40:37,177] Trial 4 finished with value: 0.5835220627

🌱 KNN (mean) - Train/Validation emissions: 0.000034 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/mean/KNN/train_val.json
🌱 KNN (mean) - Test emissions: 0.000004 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/mean/KNN/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/mean/KNN/test_classification_report.json

🔎 Running Optuna study for LogReg (mean)...
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/mean/LogReg' created successfully!


[I 2025-08-26 18:41:17,359] A new study created in memory with name: no-name-603ee718-0e07-41cd-9c64-c5c56ad3731d
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 18:41:17,742] Trial 0 finished with value: 0.4596013216586913 and parameters: {'C': 0.0745934328572655}. Best is trial 0 with value: 0.4596013216586913.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 18:41:19,659] Trial 1 finished with value: 0.5523822851197683 and parameters: {'C': 56.69849511478853}. Best is trial 1 with value: 0.5523822851197683.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 18:41:21,392] Trial 2 finished with value: 0.5284962470882582 and parameters: {'C': 4.5705630998014515}. Best is trial 1 with value: 0.5523822851197683.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 18:41:22,731] Trial 3 finished with value: 0.516809669028792 and parameters: {'C': 0.9846738873614566}. Best is trial 1 with value: 0.5523822851197683.
  C = trial.suggest_loguniform("

🌱 LogReg (mean) - Train/Validation emissions: 0.000056 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/mean/LogReg/train_val.json
🌱 LogReg (mean) - Test emissions: 0.000002 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/mean/LogReg/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/mean/LogReg/test_classification_report.json

🔎 Running Optuna study for DecisionTree (mean)...
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/mean/DecisionTree' created successfully!


[I 2025-08-26 18:42:00,259] A new study created in memory with name: no-name-ddc2b0bf-3e28-4259-b9aa-06d63d96060e
[I 2025-08-26 18:42:19,533] Trial 0 finished with value: 0.664079856805714 and parameters: {'max_depth': 20, 'criterion': 'gini', 'min_samples_split': 4}. Best is trial 0 with value: 0.664079856805714.
[I 2025-08-26 18:42:30,736] Trial 1 finished with value: 0.5922586964056871 and parameters: {'max_depth': 9, 'criterion': 'entropy', 'min_samples_split': 15}. Best is trial 0 with value: 0.664079856805714.
[I 2025-08-26 18:42:34,317] Trial 2 finished with value: 0.5322420854650433 and parameters: {'max_depth': 3, 'criterion': 'gini', 'min_samples_split': 5}. Best is trial 0 with value: 0.664079856805714.
[I 2025-08-26 18:42:46,544] Trial 3 finished with value: 0.6027127610735047 and parameters: {'max_depth': 10, 'criterion': 'entropy', 'min_samples_split': 7}. Best is trial 0 with value: 0.664079856805714.
[I 2025-08-26 18:43:08,110] Trial 4 finished with value: 0.70040357200

🌱 DecisionTree (mean) - Train/Validation emissions: 0.000290 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/mean/DecisionTree/train_val.json
🌱 DecisionTree (mean) - Test emissions: 0.000001 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/mean/DecisionTree/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/mean/DecisionTree/test_classification_report.json

🔎 Running Optuna study for RandomForest (mean)...
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/mean/RandomForest' created successfully!


[I 2025-08-26 18:49:04,287] A new study created in memory with name: no-name-61e11a09-690e-4056-acb6-d2981663c737
[I 2025-08-26 18:49:24,020] Trial 0 finished with value: 0.4880319769780158 and parameters: {'n_estimators': 125, 'criterion': 'gini', 'min_samples_split': 4, 'max_depth': 4}. Best is trial 0 with value: 0.4880319769780158.
[I 2025-08-26 18:49:45,913] Trial 1 finished with value: 0.6734279742809927 and parameters: {'n_estimators': 36, 'criterion': 'gini', 'min_samples_split': 2, 'max_depth': 20}. Best is trial 1 with value: 0.6734279742809927.
[I 2025-08-26 18:51:25,008] Trial 2 finished with value: 0.5646391829063211 and parameters: {'n_estimators': 253, 'criterion': 'gini', 'min_samples_split': 7, 'max_depth': 11}. Best is trial 1 with value: 0.6734279742809927.
[I 2025-08-26 18:52:10,614] Trial 3 finished with value: 0.5315738088863287 and parameters: {'n_estimators': 141, 'criterion': 'entropy', 'min_samples_split': 7, 'max_depth': 8}. Best is trial 1 with value: 0.6734

🌱 RandomForest (mean) - Train/Validation emissions: 0.001116 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/mean/RandomForest/train_val.json
🌱 RandomForest (mean) - Test emissions: 0.000001 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/mean/RandomForest/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/mean/RandomForest/test_classification_report.json

🔎 Running Optuna study for SVM (mean)...
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/mean/SVM' created successfully!


[I 2025-08-26 19:14:33,143] A new study created in memory with name: no-name-d62b9461-972c-4b27-99e3-a2e616a6e8ca
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 19:14:36,381] Trial 0 finished with value: 0.4786427151379467 and parameters: {'C': 0.0745934328572655, 'penalty': 'l1'}. Best is trial 0 with value: 0.4786427151379467.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 19:14:49,046] Trial 1 finished with value: 0.5338726899729271 and parameters: {'C': 0.9846738873614566, 'penalty': 'l1'}. Best is trial 1 with value: 0.5338726899729271.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 19:14:49,295] Trial 2 finished with value: 0.44073433550618596 and parameters: {'C': 0.0019517224641449498, 'penalty': 'l1'}. Best is trial 1 with value: 0.5338726899729271.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 19:14:53,973] Trial 3 finished with value: 0.5378111516760704 and parameters: {'C': 3.4702669886504163, 'penalty': 'l2'}. Best i

🌱 SVM (mean) - Train/Validation emissions: 0.000166 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/mean/SVM/train_val.json




🌱 SVM (mean) - Test emissions: 0.000001 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/mean/SVM/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/mean/SVM/test_classification_report.json

🔀 Testing combination method: min
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/min' created successfully!

🔎 Running Optuna study for KNN (min)...
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/min/KNN' created successfully!


[I 2025-08-26 19:18:43,967] A new study created in memory with name: no-name-e33ef856-4c92-4b52-a9dc-f3f18ccc9de8
[I 2025-08-26 19:18:44,691] Trial 0 finished with value: 0.5938337854567932 and parameters: {'n_neighbors': 76, 'weights': 'uniform', 'metric': 'euclidean'}. Best is trial 0 with value: 0.5938337854567932.
[I 2025-08-26 19:18:46,689] Trial 1 finished with value: 0.6487619130442885 and parameters: {'n_neighbors': 13, 'weights': 'uniform', 'metric': 'cosine'}. Best is trial 1 with value: 0.6487619130442885.
[I 2025-08-26 19:18:48,004] Trial 2 finished with value: 0.5644438949606844 and parameters: {'n_neighbors': 167, 'weights': 'uniform', 'metric': 'cosine'}. Best is trial 1 with value: 0.6487619130442885.
[I 2025-08-26 19:18:49,150] Trial 3 finished with value: 0.7347607412932879 and parameters: {'n_neighbors': 87, 'weights': 'distance', 'metric': 'cosine'}. Best is trial 3 with value: 0.7347607412932879.
[I 2025-08-26 19:18:54,781] Trial 4 finished with value: 0.5735437100

🌱 KNN (min) - Train/Validation emissions: 0.000032 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/min/KNN/train_val.json
🌱 KNN (min) - Test emissions: 0.000002 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/min/KNN/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/min/KNN/test_classification_report.json

🔎 Running Optuna study for LogReg (min)...
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/min/LogReg' created successfully!


[I 2025-08-26 19:19:28,261] A new study created in memory with name: no-name-538264ba-5a4c-4a82-97c5-6dd80a10962e
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 19:19:28,791] Trial 0 finished with value: 0.4661317364404362 and parameters: {'C': 0.0745934328572655}. Best is trial 0 with value: 0.4661317364404362.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 19:19:30,455] Trial 1 finished with value: 0.5717547943037047 and parameters: {'C': 56.69849511478853}. Best is trial 1 with value: 0.5717547943037047.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 19:19:32,036] Trial 2 finished with value: 0.5566213163255678 and parameters: {'C': 4.5705630998014515}. Best is trial 1 with value: 0.5717547943037047.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 19:19:33,086] Trial 3 finished with value: 0.5340365702664336 and parameters: {'C': 0.9846738873614566}. Best is trial 1 with value: 0.5717547943037047.
  C = trial.suggest_loguniform(

🌱 LogReg (min) - Train/Validation emissions: 0.000046 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/min/LogReg/train_val.json
🌱 LogReg (min) - Test emissions: 0.000001 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/min/LogReg/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/min/LogReg/test_classification_report.json

🔎 Running Optuna study for DecisionTree (min)...
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/min/DecisionTree' created successfully!


[I 2025-08-26 19:20:05,639] A new study created in memory with name: no-name-80387c97-967f-42dc-91a0-c270bc824f0a
[I 2025-08-26 19:20:17,264] Trial 0 finished with value: 0.6782203808740261 and parameters: {'max_depth': 20, 'criterion': 'gini', 'min_samples_split': 4}. Best is trial 0 with value: 0.6782203808740261.
[I 2025-08-26 19:20:23,863] Trial 1 finished with value: 0.5959273540094998 and parameters: {'max_depth': 9, 'criterion': 'entropy', 'min_samples_split': 15}. Best is trial 0 with value: 0.6782203808740261.
[I 2025-08-26 19:20:26,205] Trial 2 finished with value: 0.5233105751282636 and parameters: {'max_depth': 3, 'criterion': 'gini', 'min_samples_split': 5}. Best is trial 0 with value: 0.6782203808740261.
[I 2025-08-26 19:20:33,883] Trial 3 finished with value: 0.6167107225698062 and parameters: {'max_depth': 10, 'criterion': 'entropy', 'min_samples_split': 7}. Best is trial 0 with value: 0.6782203808740261.
[I 2025-08-26 19:20:45,429] Trial 4 finished with value: 0.683928

🌱 DecisionTree (min) - Train/Validation emissions: 0.000198 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/min/DecisionTree/train_val.json
🌱 DecisionTree (min) - Test emissions: 0.000000 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/min/DecisionTree/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/min/DecisionTree/test_classification_report.json

🔎 Running Optuna study for RandomForest (min)...
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/min/RandomForest' created successfully!


[I 2025-08-26 19:25:03,724] A new study created in memory with name: no-name-3ea7190b-1176-42d7-bdfb-c6edf9e58688
[I 2025-08-26 19:25:17,213] Trial 0 finished with value: 0.4870940328921268 and parameters: {'n_estimators': 125, 'criterion': 'gini', 'min_samples_split': 4, 'max_depth': 4}. Best is trial 0 with value: 0.4870940328921268.
[I 2025-08-26 19:25:32,083] Trial 1 finished with value: 0.6951524121791622 and parameters: {'n_estimators': 36, 'criterion': 'gini', 'min_samples_split': 2, 'max_depth': 20}. Best is trial 1 with value: 0.6951524121791622.
[I 2025-08-26 19:26:39,334] Trial 2 finished with value: 0.5752625294675933 and parameters: {'n_estimators': 253, 'criterion': 'gini', 'min_samples_split': 7, 'max_depth': 11}. Best is trial 1 with value: 0.6951524121791622.
[I 2025-08-26 19:27:10,238] Trial 3 finished with value: 0.5296320356957487 and parameters: {'n_estimators': 141, 'criterion': 'entropy', 'min_samples_split': 7, 'max_depth': 8}. Best is trial 1 with value: 0.6951

🌱 RandomForest (min) - Train/Validation emissions: 0.000794 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/min/RandomForest/train_val.json
🌱 RandomForest (min) - Test emissions: 0.000002 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/min/RandomForest/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/min/RandomForest/test_classification_report.json

🔎 Running Optuna study for SVM (min)...
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/min/SVM' created successfully!


[I 2025-08-26 19:43:10,840] A new study created in memory with name: no-name-c634ad0d-8db5-474f-97f8-599f2ae2be7a
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 19:43:15,524] Trial 0 finished with value: 0.48424505398006096 and parameters: {'C': 0.0745934328572655, 'penalty': 'l1'}. Best is trial 0 with value: 0.48424505398006096.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 19:43:31,548] Trial 1 finished with value: 0.5418309840924682 and parameters: {'C': 0.9846738873614566, 'penalty': 'l1'}. Best is trial 1 with value: 0.5418309840924682.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 19:43:31,811] Trial 2 finished with value: 0.44073433550618596 and parameters: {'C': 0.0019517224641449498, 'penalty': 'l1'}. Best is trial 1 with value: 0.5418309840924682.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 19:43:34,257] Trial 3 finished with value: 0.5532740377964824 and parameters: {'C': 3.4702669886504163, 'penalty': 'l2'}. Best

🌱 SVM (min) - Train/Validation emissions: 0.000169 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/min/SVM/train_val.json




🌱 SVM (min) - Test emissions: 0.000001 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/min/SVM/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/min/SVM/test_classification_report.json

🔀 Testing combination method: max
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/max' created successfully!

🔎 Running Optuna study for KNN (max)...
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/max/KNN' created successfully!


[I 2025-08-26 19:47:32,794] A new study created in memory with name: no-name-ef1bf4e7-8872-46cd-988f-dee75b4a14a4
[I 2025-08-26 19:47:33,574] Trial 0 finished with value: 0.5894008558263195 and parameters: {'n_neighbors': 76, 'weights': 'uniform', 'metric': 'euclidean'}. Best is trial 0 with value: 0.5894008558263195.
[I 2025-08-26 19:47:34,735] Trial 1 finished with value: 0.6632837140825715 and parameters: {'n_neighbors': 13, 'weights': 'uniform', 'metric': 'cosine'}. Best is trial 1 with value: 0.6632837140825715.
[I 2025-08-26 19:47:35,742] Trial 2 finished with value: 0.5694311588521248 and parameters: {'n_neighbors': 167, 'weights': 'uniform', 'metric': 'cosine'}. Best is trial 1 with value: 0.6632837140825715.
[I 2025-08-26 19:47:36,751] Trial 3 finished with value: 0.7313591459504545 and parameters: {'n_neighbors': 87, 'weights': 'distance', 'metric': 'cosine'}. Best is trial 3 with value: 0.7313591459504545.
[I 2025-08-26 19:47:41,556] Trial 4 finished with value: 0.5739283544

🌱 KNN (max) - Train/Validation emissions: 0.000033 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/max/KNN/train_val.json
🌱 KNN (max) - Test emissions: 0.000003 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/max/KNN/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/max/KNN/test_classification_report.json

🔎 Running Optuna study for LogReg (max)...
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/max/LogReg' created successfully!


[I 2025-08-26 19:48:17,304] A new study created in memory with name: no-name-f516054b-8e41-4d35-b98a-e073f2c76c99
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 19:48:17,804] Trial 0 finished with value: 0.46268383643415395 and parameters: {'C': 0.0745934328572655}. Best is trial 0 with value: 0.46268383643415395.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 19:48:19,414] Trial 1 finished with value: 0.5726950233616077 and parameters: {'C': 56.69849511478853}. Best is trial 1 with value: 0.5726950233616077.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 19:48:20,167] Trial 2 finished with value: 0.5612036862939292 and parameters: {'C': 4.5705630998014515}. Best is trial 1 with value: 0.5726950233616077.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 19:48:21,093] Trial 3 finished with value: 0.5353634842310193 and parameters: {'C': 0.9846738873614566}. Best is trial 1 with value: 0.5726950233616077.
  C = trial.suggest_logunifor

🌱 LogReg (max) - Train/Validation emissions: 0.000042 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/max/LogReg/train_val.json
🌱 LogReg (max) - Test emissions: 0.000001 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/max/LogReg/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/max/LogReg/test_classification_report.json

🔎 Running Optuna study for DecisionTree (max)...
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/max/DecisionTree' created successfully!


[I 2025-08-26 19:48:51,529] A new study created in memory with name: no-name-fff78bf0-a0a6-4b6c-b99c-46aeffea8b49
[I 2025-08-26 19:49:03,097] Trial 0 finished with value: 0.6790471891924381 and parameters: {'max_depth': 20, 'criterion': 'gini', 'min_samples_split': 4}. Best is trial 0 with value: 0.6790471891924381.
[I 2025-08-26 19:49:09,709] Trial 1 finished with value: 0.5885839708243932 and parameters: {'max_depth': 9, 'criterion': 'entropy', 'min_samples_split': 15}. Best is trial 0 with value: 0.6790471891924381.
[I 2025-08-26 19:49:11,990] Trial 2 finished with value: 0.5341517710344406 and parameters: {'max_depth': 3, 'criterion': 'gini', 'min_samples_split': 5}. Best is trial 0 with value: 0.6790471891924381.
[I 2025-08-26 19:49:19,173] Trial 3 finished with value: 0.601677204931917 and parameters: {'max_depth': 10, 'criterion': 'entropy', 'min_samples_split': 7}. Best is trial 0 with value: 0.6790471891924381.
[I 2025-08-26 19:49:30,208] Trial 4 finished with value: 0.6884219

🌱 DecisionTree (max) - Train/Validation emissions: 0.000157 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/max/DecisionTree/train_val.json
🌱 DecisionTree (max) - Test emissions: 0.000001 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/max/DecisionTree/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/max/DecisionTree/test_classification_report.json

🔎 Running Optuna study for RandomForest (max)...
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/max/RandomForest' created successfully!


[I 2025-08-26 19:52:46,077] A new study created in memory with name: no-name-1a2637f4-5d7f-447c-9dd2-0943252cafc2
[I 2025-08-26 19:52:59,054] Trial 0 finished with value: 0.4923792602789636 and parameters: {'n_estimators': 125, 'criterion': 'gini', 'min_samples_split': 4, 'max_depth': 4}. Best is trial 0 with value: 0.4923792602789636.
[I 2025-08-26 19:53:13,274] Trial 1 finished with value: 0.6922194445413538 and parameters: {'n_estimators': 36, 'criterion': 'gini', 'min_samples_split': 2, 'max_depth': 20}. Best is trial 1 with value: 0.6922194445413538.
[I 2025-08-26 19:54:19,367] Trial 2 finished with value: 0.5861539697846352 and parameters: {'n_estimators': 253, 'criterion': 'gini', 'min_samples_split': 7, 'max_depth': 11}. Best is trial 1 with value: 0.6922194445413538.
[I 2025-08-26 19:54:48,649] Trial 3 finished with value: 0.5320499073683517 and parameters: {'n_estimators': 141, 'criterion': 'entropy', 'min_samples_split': 7, 'max_depth': 8}. Best is trial 1 with value: 0.6922

🌱 RandomForest (max) - Train/Validation emissions: 0.000538 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/max/RandomForest/train_val.json
🌱 RandomForest (max) - Test emissions: 0.000000 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/max/RandomForest/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/max/RandomForest/test_classification_report.json

🔎 Running Optuna study for SVM (max)...
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/max/SVM' created successfully!


[I 2025-08-26 20:05:33,050] A new study created in memory with name: no-name-69bb26f6-6189-4909-aa4a-8ab0c8ede14b
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 20:05:38,325] Trial 0 finished with value: 0.4948758056914763 and parameters: {'C': 0.0745934328572655, 'penalty': 'l1'}. Best is trial 0 with value: 0.4948758056914763.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 20:05:54,910] Trial 1 finished with value: 0.5409119472780358 and parameters: {'C': 0.9846738873614566, 'penalty': 'l1'}. Best is trial 1 with value: 0.5409119472780358.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 20:05:55,171] Trial 2 finished with value: 0.44073433550618596 and parameters: {'C': 0.0019517224641449498, 'penalty': 'l1'}. Best is trial 1 with value: 0.5409119472780358.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 20:05:57,905] Trial 3 finished with value: 0.5500746185764096 and parameters: {'C': 3.4702669886504163, 'penalty': 'l2'}. Best i

🌱 SVM (max) - Train/Validation emissions: 0.000199 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/max/SVM/train_val.json




🌱 SVM (max) - Test emissions: 0.000001 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/max/SVM/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/max/SVM/test_classification_report.json

🟢 Running pipeline for embedding: sentence-transformers/paraphrase-multilingual-mpnet-base-v2
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2' created successfully!

🔀 Testing combination method: concatenate
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/concatenate' created successfully!

🔎 Running Optuna study for KNN (concatenate)...
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/concatenate/KNN' created successfully!


[I 2025-08-26 20:10:32,496] A new study created in memory with name: no-name-e495f4f8-56d3-46b0-8ce7-4722d9a9a561
[I 2025-08-26 20:10:34,956] Trial 0 finished with value: 0.6197304600442601 and parameters: {'n_neighbors': 76, 'weights': 'uniform', 'metric': 'euclidean'}. Best is trial 0 with value: 0.6197304600442601.
[I 2025-08-26 20:10:37,016] Trial 1 finished with value: 0.6903226419667425 and parameters: {'n_neighbors': 13, 'weights': 'uniform', 'metric': 'cosine'}. Best is trial 1 with value: 0.6903226419667425.
[I 2025-08-26 20:10:38,887] Trial 2 finished with value: 0.5907984815547841 and parameters: {'n_neighbors': 167, 'weights': 'uniform', 'metric': 'cosine'}. Best is trial 1 with value: 0.6903226419667425.
[I 2025-08-26 20:10:40,867] Trial 3 finished with value: 0.7378288645865987 and parameters: {'n_neighbors': 87, 'weights': 'distance', 'metric': 'cosine'}. Best is trial 3 with value: 0.7378288645865987.
[I 2025-08-26 20:11:00,972] Trial 4 finished with value: 0.5990934477

🌱 KNN (concatenate) - Train/Validation emissions: 0.000111 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/concatenate/KNN/train_val.json
🌱 KNN (concatenate) - Test emissions: 0.000005 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/concatenate/KNN/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/concatenate/KNN/test_classification_report.json

🔎 Running Optuna study for LogReg (concatenate)...
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/concatenate/LogReg' created successfully!


[I 2025-08-26 20:12:31,325] A new study created in memory with name: no-name-9794fe5f-f52e-4d5e-9c24-7a4436f3021c
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 20:12:32,404] Trial 0 finished with value: 0.5135291179164264 and parameters: {'C': 0.0745934328572655}. Best is trial 0 with value: 0.5135291179164264.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 20:12:36,858] Trial 1 finished with value: 0.6225087862896376 and parameters: {'C': 56.69849511478853}. Best is trial 1 with value: 0.6225087862896376.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 20:12:39,134] Trial 2 finished with value: 0.6005496816243567 and parameters: {'C': 4.5705630998014515}. Best is trial 1 with value: 0.6225087862896376.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 20:12:40,932] Trial 3 finished with value: 0.5829244271934653 and parameters: {'C': 0.9846738873614566}. Best is trial 1 with value: 0.6225087862896376.
  C = trial.suggest_loguniform(

🌱 LogReg (concatenate) - Train/Validation emissions: 0.000110 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/concatenate/LogReg/train_val.json
🌱 LogReg (concatenate) - Test emissions: 0.000000 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/concatenate/LogReg/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/concatenate/LogReg/test_classification_report.json

🔎 Running Optuna study for DecisionTree (concatenate)...
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/concatenate/DecisionTree' created successfully!


[I 2025-08-26 20:13:46,404] A new study created in memory with name: no-name-ec8e7f4b-4cb4-42d9-9cdd-a0377156056e
[I 2025-08-26 20:14:31,521] Trial 0 finished with value: 0.6900991998801642 and parameters: {'max_depth': 20, 'criterion': 'gini', 'min_samples_split': 4}. Best is trial 0 with value: 0.6900991998801642.
[I 2025-08-26 20:14:56,971] Trial 1 finished with value: 0.5929494993682023 and parameters: {'max_depth': 9, 'criterion': 'entropy', 'min_samples_split': 15}. Best is trial 0 with value: 0.6900991998801642.
[I 2025-08-26 20:15:05,842] Trial 2 finished with value: 0.5122690915627017 and parameters: {'max_depth': 3, 'criterion': 'gini', 'min_samples_split': 5}. Best is trial 0 with value: 0.6900991998801642.
[I 2025-08-26 20:15:34,210] Trial 3 finished with value: 0.6136763237375253 and parameters: {'max_depth': 10, 'criterion': 'entropy', 'min_samples_split': 7}. Best is trial 0 with value: 0.6900991998801642.
[I 2025-08-26 20:16:15,491] Trial 4 finished with value: 0.698940

🌱 DecisionTree (concatenate) - Train/Validation emissions: 0.000574 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/concatenate/DecisionTree/train_val.json
🌱 DecisionTree (concatenate) - Test emissions: 0.000002 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/concatenate/DecisionTree/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/concatenate/DecisionTree/test_classification_report.json

🔎 Running Optuna study for RandomForest (concatenate)...
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/concatenate/RandomForest' created successfully!


[I 2025-08-26 20:27:23,946] A new study created in memory with name: no-name-164912fa-8b94-4569-a769-93053d31adb9
[I 2025-08-26 20:27:48,525] Trial 0 finished with value: 0.513144580529857 and parameters: {'n_estimators': 125, 'criterion': 'gini', 'min_samples_split': 4, 'max_depth': 4}. Best is trial 0 with value: 0.513144580529857.
[I 2025-08-26 20:28:15,132] Trial 1 finished with value: 0.7152419074549625 and parameters: {'n_estimators': 36, 'criterion': 'gini', 'min_samples_split': 2, 'max_depth': 20}. Best is trial 1 with value: 0.7152419074549625.
[I 2025-08-26 20:30:17,937] Trial 2 finished with value: 0.6134594732433216 and parameters: {'n_estimators': 253, 'criterion': 'gini', 'min_samples_split': 7, 'max_depth': 11}. Best is trial 1 with value: 0.7152419074549625.
[I 2025-08-26 20:31:14,416] Trial 3 finished with value: 0.5598133640333669 and parameters: {'n_estimators': 141, 'criterion': 'entropy', 'min_samples_split': 7, 'max_depth': 8}. Best is trial 1 with value: 0.715241

🌱 RandomForest (concatenate) - Train/Validation emissions: 0.001509 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/concatenate/RandomForest/train_val.json
🌱 RandomForest (concatenate) - Test emissions: 0.000000 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/concatenate/RandomForest/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/concatenate/RandomForest/test_classification_report.json

🔎 Running Optuna study for SVM (concatenate)...
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/concatenate/SVM' created successfully!


[I 2025-08-26 21:05:06,170] A new study created in memory with name: no-name-5cc2e4f9-7ff8-4e64-8b40-4832730c40b4
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 21:05:13,398] Trial 0 finished with value: 0.5139697098154071 and parameters: {'C': 0.0745934328572655, 'penalty': 'l1'}. Best is trial 0 with value: 0.5139697098154071.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 21:05:49,948] Trial 1 finished with value: 0.5984029539634328 and parameters: {'C': 0.9846738873614566, 'penalty': 'l1'}. Best is trial 1 with value: 0.5984029539634328.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 21:05:50,938] Trial 2 finished with value: 0.44073433550618596 and parameters: {'C': 0.0019517224641449498, 'penalty': 'l1'}. Best is trial 1 with value: 0.5984029539634328.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 21:06:22,912] Trial 3 finished with value: 0.6093288095007445 and parameters: {'C': 3.4702669886504163, 'penalty': 'l2'}. Best i

🌱 SVM (concatenate) - Train/Validation emissions: 0.000017 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/concatenate/SVM/train_val.json




🌱 SVM (concatenate) - Test emissions: 0.000002 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/concatenate/SVM/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/concatenate/SVM/test_classification_report.json

🔀 Testing combination method: mean
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/mean' created successfully!

🔎 Running Optuna study for KNN (mean)...
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/mean/KNN' created successfully!


[I 2025-08-26 21:23:18,082] A new study created in memory with name: no-name-ce4bed42-c47e-4f93-904f-8271d80db504
[I 2025-08-26 21:23:19,411] Trial 0 finished with value: 0.611928314212972 and parameters: {'n_neighbors': 76, 'weights': 'uniform', 'metric': 'euclidean'}. Best is trial 0 with value: 0.611928314212972.
[I 2025-08-26 21:23:20,675] Trial 1 finished with value: 0.680724688529182 and parameters: {'n_neighbors': 13, 'weights': 'uniform', 'metric': 'cosine'}. Best is trial 1 with value: 0.680724688529182.
[I 2025-08-26 21:23:21,956] Trial 2 finished with value: 0.596069855626869 and parameters: {'n_neighbors': 167, 'weights': 'uniform', 'metric': 'cosine'}. Best is trial 1 with value: 0.680724688529182.
[I 2025-08-26 21:23:23,236] Trial 3 finished with value: 0.7401091601973291 and parameters: {'n_neighbors': 87, 'weights': 'distance', 'metric': 'cosine'}. Best is trial 3 with value: 0.7401091601973291.
[I 2025-08-26 21:23:34,596] Trial 4 finished with value: 0.6065314400096322

🌱 KNN (mean) - Train/Validation emissions: 0.000056 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/mean/KNN/train_val.json
🌱 KNN (mean) - Test emissions: 0.000003 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/mean/KNN/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/mean/KNN/test_classification_report.json

🔎 Running Optuna study for LogReg (mean)...
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/mean/LogReg' created successfully!


[I 2025-08-26 21:24:26,345] A new study created in memory with name: no-name-d46946e5-4e8c-4b1c-8ecd-b5144e7bcd61
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 21:24:27,092] Trial 0 finished with value: 0.47343243836819854 and parameters: {'C': 0.0745934328572655}. Best is trial 0 with value: 0.47343243836819854.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 21:24:29,678] Trial 1 finished with value: 0.5877617712469381 and parameters: {'C': 56.69849511478853}. Best is trial 1 with value: 0.5877617712469381.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 21:24:30,963] Trial 2 finished with value: 0.561572040894433 and parameters: {'C': 4.5705630998014515}. Best is trial 1 with value: 0.5877617712469381.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 21:24:32,272] Trial 3 finished with value: 0.5308240773692459 and parameters: {'C': 0.9846738873614566}. Best is trial 1 with value: 0.5877617712469381.
  C = trial.suggest_loguniform

🌱 LogReg (mean) - Train/Validation emissions: 0.000063 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/mean/LogReg/train_val.json
🌱 LogReg (mean) - Test emissions: 0.000001 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/mean/LogReg/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/mean/LogReg/test_classification_report.json

🔎 Running Optuna study for DecisionTree (mean)...
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/mean/DecisionTree' created successfully!


[I 2025-08-26 21:25:14,778] A new study created in memory with name: no-name-64bba678-4e2e-4e35-b5d8-45b5e0289c2b
[I 2025-08-26 21:25:52,555] Trial 0 finished with value: 0.6788204907240943 and parameters: {'max_depth': 20, 'criterion': 'gini', 'min_samples_split': 4}. Best is trial 0 with value: 0.6788204907240943.
[I 2025-08-26 21:26:14,200] Trial 1 finished with value: 0.6129455536892631 and parameters: {'max_depth': 9, 'criterion': 'entropy', 'min_samples_split': 15}. Best is trial 0 with value: 0.6788204907240943.
[I 2025-08-26 21:26:21,329] Trial 2 finished with value: 0.5362923282157581 and parameters: {'max_depth': 3, 'criterion': 'gini', 'min_samples_split': 5}. Best is trial 0 with value: 0.6788204907240943.
[I 2025-08-26 21:26:44,924] Trial 3 finished with value: 0.6256703362975531 and parameters: {'max_depth': 10, 'criterion': 'entropy', 'min_samples_split': 7}. Best is trial 0 with value: 0.6788204907240943.
[I 2025-08-26 21:27:22,326] Trial 4 finished with value: 0.702407

🌱 DecisionTree (mean) - Train/Validation emissions: 0.000542 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/mean/DecisionTree/train_val.json
🌱 DecisionTree (mean) - Test emissions: 0.000002 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/mean/DecisionTree/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/mean/DecisionTree/test_classification_report.json

🔎 Running Optuna study for RandomForest (mean)...
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/mean/RandomForest' created successfully!


[I 2025-08-26 21:38:16,890] A new study created in memory with name: no-name-4e047291-1924-4269-be0d-e9c341ea2040
[I 2025-08-26 21:38:44,920] Trial 0 finished with value: 0.48353896475060615 and parameters: {'n_estimators': 125, 'criterion': 'gini', 'min_samples_split': 4, 'max_depth': 4}. Best is trial 0 with value: 0.48353896475060615.
[I 2025-08-26 21:39:16,551] Trial 1 finished with value: 0.6924659025800082 and parameters: {'n_estimators': 36, 'criterion': 'gini', 'min_samples_split': 2, 'max_depth': 20}. Best is trial 1 with value: 0.6924659025800082.
[I 2025-08-26 21:41:38,107] Trial 2 finished with value: 0.5998135878566513 and parameters: {'n_estimators': 253, 'criterion': 'gini', 'min_samples_split': 7, 'max_depth': 11}. Best is trial 1 with value: 0.6924659025800082.
[I 2025-08-26 21:42:44,343] Trial 3 finished with value: 0.5565200355652004 and parameters: {'n_estimators': 141, 'criterion': 'entropy', 'min_samples_split': 7, 'max_depth': 8}. Best is trial 1 with value: 0.69

🌱 RandomForest (mean) - Train/Validation emissions: 0.001189 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/mean/RandomForest/train_val.json
🌱 RandomForest (mean) - Test emissions: 0.000001 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/mean/RandomForest/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/mean/RandomForest/test_classification_report.json

🔎 Running Optuna study for SVM (mean)...
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/mean/SVM' created successfully!


[I 2025-08-26 22:06:35,241] A new study created in memory with name: no-name-0fe5c2da-90c5-4dfe-9e31-310ed5a5480b
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 22:06:38,811] Trial 0 finished with value: 0.49098462632186973 and parameters: {'C': 0.0745934328572655, 'penalty': 'l1'}. Best is trial 0 with value: 0.49098462632186973.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 22:06:57,484] Trial 1 finished with value: 0.5555606017713227 and parameters: {'C': 0.9846738873614566, 'penalty': 'l1'}. Best is trial 1 with value: 0.5555606017713227.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 22:06:57,988] Trial 2 finished with value: 0.44073433550618596 and parameters: {'C': 0.0019517224641449498, 'penalty': 'l1'}. Best is trial 1 with value: 0.5555606017713227.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 22:07:09,736] Trial 3 finished with value: 0.5691712883133009 and parameters: {'C': 3.4702669886504163, 'penalty': 'l2'}. Best

🌱 SVM (mean) - Train/Validation emissions: 0.000327 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/mean/SVM/train_val.json




🌱 SVM (mean) - Test emissions: 0.000002 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/mean/SVM/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/mean/SVM/test_classification_report.json

🔀 Testing combination method: min
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/min' created successfully!

🔎 Running Optuna study for KNN (min)...
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/min/KNN' created successfully!


[I 2025-08-26 22:14:53,691] A new study created in memory with name: no-name-5bdc0a29-6c8b-4c47-b3a7-4a809dd5a7d1
[I 2025-08-26 22:14:55,019] Trial 0 finished with value: 0.6136064661503233 and parameters: {'n_neighbors': 76, 'weights': 'uniform', 'metric': 'euclidean'}. Best is trial 0 with value: 0.6136064661503233.
[I 2025-08-26 22:14:56,246] Trial 1 finished with value: 0.6864469193611904 and parameters: {'n_neighbors': 13, 'weights': 'uniform', 'metric': 'cosine'}. Best is trial 1 with value: 0.6864469193611904.
[I 2025-08-26 22:14:57,529] Trial 2 finished with value: 0.5935422952725524 and parameters: {'n_neighbors': 167, 'weights': 'uniform', 'metric': 'cosine'}. Best is trial 1 with value: 0.6864469193611904.
[I 2025-08-26 22:14:58,819] Trial 3 finished with value: 0.735264483627204 and parameters: {'n_neighbors': 87, 'weights': 'distance', 'metric': 'cosine'}. Best is trial 3 with value: 0.735264483627204.
[I 2025-08-26 22:15:09,566] Trial 4 finished with value: 0.595698969393

🌱 KNN (min) - Train/Validation emissions: 0.000054 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/min/KNN/train_val.json
🌱 KNN (min) - Test emissions: 0.000003 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/min/KNN/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/min/KNN/test_classification_report.json

🔎 Running Optuna study for LogReg (min)...
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/min/LogReg' created successfully!


[I 2025-08-26 22:15:59,751] A new study created in memory with name: no-name-83bbbc5e-f86d-4289-9b72-e115f937d6f6
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 22:16:00,401] Trial 0 finished with value: 0.48672750633463596 and parameters: {'C': 0.0745934328572655}. Best is trial 0 with value: 0.48672750633463596.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 22:16:03,299] Trial 1 finished with value: 0.6293063574524472 and parameters: {'C': 56.69849511478853}. Best is trial 1 with value: 0.6293063574524472.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 22:16:05,269] Trial 2 finished with value: 0.6160899956287772 and parameters: {'C': 4.5705630998014515}. Best is trial 1 with value: 0.6293063574524472.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 22:16:06,756] Trial 3 finished with value: 0.584101272439943 and parameters: {'C': 0.9846738873614566}. Best is trial 1 with value: 0.6293063574524472.
  C = trial.suggest_loguniform

🌱 LogReg (min) - Train/Validation emissions: 0.000062 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/min/LogReg/train_val.json
🌱 LogReg (min) - Test emissions: 0.000002 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/min/LogReg/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/min/LogReg/test_classification_report.json

🔎 Running Optuna study for DecisionTree (min)...
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/min/DecisionTree' created successfully!


[I 2025-08-26 22:16:45,780] A new study created in memory with name: no-name-07d59796-3c3e-453d-9d4d-2c5c70e1fa6d
[I 2025-08-26 22:17:10,581] Trial 0 finished with value: 0.6732702342409117 and parameters: {'max_depth': 20, 'criterion': 'gini', 'min_samples_split': 4}. Best is trial 0 with value: 0.6732702342409117.
[I 2025-08-26 22:17:24,376] Trial 1 finished with value: 0.5965539984909756 and parameters: {'max_depth': 9, 'criterion': 'entropy', 'min_samples_split': 15}. Best is trial 0 with value: 0.6732702342409117.
[I 2025-08-26 22:17:29,054] Trial 2 finished with value: 0.5061234440136926 and parameters: {'max_depth': 3, 'criterion': 'gini', 'min_samples_split': 5}. Best is trial 0 with value: 0.6732702342409117.
[I 2025-08-26 22:17:44,146] Trial 3 finished with value: 0.6125439779111818 and parameters: {'max_depth': 10, 'criterion': 'entropy', 'min_samples_split': 7}. Best is trial 0 with value: 0.6732702342409117.
[I 2025-08-26 22:18:06,703] Trial 4 finished with value: 0.704035

🌱 DecisionTree (min) - Train/Validation emissions: 0.000330 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/min/DecisionTree/train_val.json
🌱 DecisionTree (min) - Test emissions: 0.000001 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/min/DecisionTree/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/min/DecisionTree/test_classification_report.json

🔎 Running Optuna study for RandomForest (min)...
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/min/RandomForest' created successfully!


[I 2025-08-26 22:24:45,493] A new study created in memory with name: no-name-5f35bdfe-7bb0-4ca1-bc2d-5247704d7700
[I 2025-08-26 22:25:04,231] Trial 0 finished with value: 0.4781278074581373 and parameters: {'n_estimators': 125, 'criterion': 'gini', 'min_samples_split': 4, 'max_depth': 4}. Best is trial 0 with value: 0.4781278074581373.
[I 2025-08-26 22:25:25,951] Trial 1 finished with value: 0.7103258152209352 and parameters: {'n_estimators': 36, 'criterion': 'gini', 'min_samples_split': 2, 'max_depth': 20}. Best is trial 1 with value: 0.7103258152209352.
[I 2025-08-26 22:27:00,264] Trial 2 finished with value: 0.6082627319197274 and parameters: {'n_estimators': 253, 'criterion': 'gini', 'min_samples_split': 7, 'max_depth': 11}. Best is trial 1 with value: 0.7103258152209352.
[I 2025-08-26 22:27:43,075] Trial 3 finished with value: 0.5533696076260374 and parameters: {'n_estimators': 141, 'criterion': 'entropy', 'min_samples_split': 7, 'max_depth': 8}. Best is trial 1 with value: 0.7103

🌱 RandomForest (min) - Train/Validation emissions: 0.001031 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/min/RandomForest/train_val.json
🌱 RandomForest (min) - Test emissions: 0.000001 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/min/RandomForest/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/min/RandomForest/test_classification_report.json

🔎 Running Optuna study for SVM (min)...
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/min/SVM' created successfully!


[I 2025-08-26 22:48:18,061] A new study created in memory with name: no-name-1803ebfc-93e6-4a65-9d5a-21d81dbe4711
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 22:48:24,284] Trial 0 finished with value: 0.5199655646523518 and parameters: {'C': 0.0745934328572655, 'penalty': 'l1'}. Best is trial 0 with value: 0.5199655646523518.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 22:48:57,983] Trial 1 finished with value: 0.6107232647930322 and parameters: {'C': 0.9846738873614566, 'penalty': 'l1'}. Best is trial 1 with value: 0.6107232647930322.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 22:48:58,494] Trial 2 finished with value: 0.44073433550618596 and parameters: {'C': 0.0019517224641449498, 'penalty': 'l1'}. Best is trial 1 with value: 0.6107232647930322.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 22:49:05,932] Trial 3 finished with value: 0.6117120000295557 and parameters: {'C': 3.4702669886504163, 'penalty': 'l2'}. Best i

🌱 SVM (min) - Train/Validation emissions: 0.000402 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/min/SVM/train_val.json




🌱 SVM (min) - Test emissions: 0.000002 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/min/SVM/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/min/SVM/test_classification_report.json

🔀 Testing combination method: max
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/max' created successfully!

🔎 Running Optuna study for KNN (max)...
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/max/KNN' created successfully!


[I 2025-08-26 22:58:14,500] A new study created in memory with name: no-name-c7c5a713-e10f-4925-af43-6d21bdb7ee51
[I 2025-08-26 22:58:15,794] Trial 0 finished with value: 0.6241156805728642 and parameters: {'n_neighbors': 76, 'weights': 'uniform', 'metric': 'euclidean'}. Best is trial 0 with value: 0.6241156805728642.
[I 2025-08-26 22:58:17,062] Trial 1 finished with value: 0.6878582207161519 and parameters: {'n_neighbors': 13, 'weights': 'uniform', 'metric': 'cosine'}. Best is trial 1 with value: 0.6878582207161519.
[I 2025-08-26 22:58:18,508] Trial 2 finished with value: 0.591730370320373 and parameters: {'n_neighbors': 167, 'weights': 'uniform', 'metric': 'cosine'}. Best is trial 1 with value: 0.6878582207161519.
[I 2025-08-26 22:58:19,782] Trial 3 finished with value: 0.7383786792769492 and parameters: {'n_neighbors': 87, 'weights': 'distance', 'metric': 'cosine'}. Best is trial 3 with value: 0.7383786792769492.
[I 2025-08-26 22:58:31,090] Trial 4 finished with value: 0.59627674272

🌱 KNN (max) - Train/Validation emissions: 0.000055 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/max/KNN/train_val.json
🌱 KNN (max) - Test emissions: 0.000003 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/max/KNN/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/max/KNN/test_classification_report.json

🔎 Running Optuna study for LogReg (max)...
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/max/LogReg' created successfully!


[I 2025-08-26 22:59:21,637] A new study created in memory with name: no-name-e5ae8b6a-a185-434b-9148-94150b33906d
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 22:59:22,372] Trial 0 finished with value: 0.49812207636301126 and parameters: {'C': 0.0745934328572655}. Best is trial 0 with value: 0.49812207636301126.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 22:59:25,188] Trial 1 finished with value: 0.631473533237984 and parameters: {'C': 56.69849511478853}. Best is trial 1 with value: 0.631473533237984.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 22:59:27,292] Trial 2 finished with value: 0.6156890268641979 and parameters: {'C': 4.5705630998014515}. Best is trial 1 with value: 0.631473533237984.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 22:59:29,137] Trial 3 finished with value: 0.5865731831626635 and parameters: {'C': 0.9846738873614566}. Best is trial 1 with value: 0.631473533237984.
  C = trial.suggest_loguniform("C

🌱 LogReg (max) - Train/Validation emissions: 0.000065 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/max/LogReg/train_val.json
🌱 LogReg (max) - Test emissions: 0.000001 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/max/LogReg/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/max/LogReg/test_classification_report.json

🔎 Running Optuna study for DecisionTree (max)...
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/max/DecisionTree' created successfully!


[I 2025-08-26 23:00:08,975] A new study created in memory with name: no-name-3bb33732-a48c-43f5-a33e-d09aa1a4f3dc
[I 2025-08-26 23:00:33,614] Trial 0 finished with value: 0.6880875832123506 and parameters: {'max_depth': 20, 'criterion': 'gini', 'min_samples_split': 4}. Best is trial 0 with value: 0.6880875832123506.
[I 2025-08-26 23:00:48,268] Trial 1 finished with value: 0.6068286998714493 and parameters: {'max_depth': 9, 'criterion': 'entropy', 'min_samples_split': 15}. Best is trial 0 with value: 0.6880875832123506.
[I 2025-08-26 23:00:52,983] Trial 2 finished with value: 0.5347408099752468 and parameters: {'max_depth': 3, 'criterion': 'gini', 'min_samples_split': 5}. Best is trial 0 with value: 0.6880875832123506.
[I 2025-08-26 23:01:08,987] Trial 3 finished with value: 0.6336781480530369 and parameters: {'max_depth': 10, 'criterion': 'entropy', 'min_samples_split': 7}. Best is trial 0 with value: 0.6880875832123506.
[I 2025-08-26 23:01:33,107] Trial 4 finished with value: 0.710652

🌱 DecisionTree (max) - Train/Validation emissions: 0.000339 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/max/DecisionTree/train_val.json
🌱 DecisionTree (max) - Test emissions: 0.000001 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/max/DecisionTree/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/max/DecisionTree/test_classification_report.json

🔎 Running Optuna study for RandomForest (max)...
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/max/RandomForest' created successfully!


[I 2025-08-26 23:08:24,774] A new study created in memory with name: no-name-cbe3b8a7-43b0-45dd-8952-4ce3b5d28ac7
[I 2025-08-26 23:08:43,816] Trial 0 finished with value: 0.4888965032813361 and parameters: {'n_estimators': 125, 'criterion': 'gini', 'min_samples_split': 4, 'max_depth': 4}. Best is trial 0 with value: 0.4888965032813361.
[I 2025-08-26 23:09:05,974] Trial 1 finished with value: 0.7053430175105122 and parameters: {'n_estimators': 36, 'criterion': 'gini', 'min_samples_split': 2, 'max_depth': 20}. Best is trial 1 with value: 0.7053430175105122.
[I 2025-08-26 23:10:41,760] Trial 2 finished with value: 0.59334006205824 and parameters: {'n_estimators': 253, 'criterion': 'gini', 'min_samples_split': 7, 'max_depth': 11}. Best is trial 1 with value: 0.7053430175105122.
[I 2025-08-26 23:11:25,502] Trial 3 finished with value: 0.5557682710430357 and parameters: {'n_estimators': 141, 'criterion': 'entropy', 'min_samples_split': 7, 'max_depth': 8}. Best is trial 1 with value: 0.705343

🌱 RandomForest (max) - Train/Validation emissions: 0.000764 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/max/RandomForest/train_val.json
🌱 RandomForest (max) - Test emissions: 0.000001 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/max/RandomForest/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/max/RandomForest/test_classification_report.json

🔎 Running Optuna study for SVM (max)...
Directory 'results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/max/SVM' created successfully!


[I 2025-08-26 23:26:03,658] A new study created in memory with name: no-name-3c17fc80-68c9-486d-88b5-721ce03a6a03
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 23:26:10,457] Trial 0 finished with value: 0.5288361577554057 and parameters: {'C': 0.0745934328572655, 'penalty': 'l1'}. Best is trial 0 with value: 0.5288361577554057.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 23:26:42,937] Trial 1 finished with value: 0.6074336442030409 and parameters: {'C': 0.9846738873614566, 'penalty': 'l1'}. Best is trial 1 with value: 0.6074336442030409.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 23:26:43,443] Trial 2 finished with value: 0.44073433550618596 and parameters: {'C': 0.0019517224641449498, 'penalty': 'l1'}. Best is trial 1 with value: 0.6074336442030409.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 23:26:49,777] Trial 3 finished with value: 0.6167466169436414 and parameters: {'C': 3.4702669886504163, 'penalty': 'l2'}. Best i

🌱 SVM (max) - Train/Validation emissions: 0.000389 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/max/SVM/train_val.json




🌱 SVM (max) - Test emissions: 0.000002 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/max/SVM/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/max/SVM/test_classification_report.json

🟢 Running pipeline for embedding: ibm-granite/granite-embedding-278m-multilingual
Directory 'results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual' created successfully!

🔀 Testing combination method: concatenate
Directory 'results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/concatenate' created successfully!

🔎 Running Optuna study for KNN (concatenate)...
Directory 'results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/concatenate/KNN' created successfully!


[I 2025-08-26 23:35:53,235] A new study created in memory with name: no-name-02b6225b-416c-43fc-939c-5398f64bf1f7
[I 2025-08-26 23:35:55,513] Trial 0 finished with value: 0.6313753763116103 and parameters: {'n_neighbors': 76, 'weights': 'uniform', 'metric': 'euclidean'}. Best is trial 0 with value: 0.6313753763116103.
[I 2025-08-26 23:35:57,345] Trial 1 finished with value: 0.7152273630072548 and parameters: {'n_neighbors': 13, 'weights': 'uniform', 'metric': 'cosine'}. Best is trial 1 with value: 0.7152273630072548.
[I 2025-08-26 23:35:59,155] Trial 2 finished with value: 0.6032595892829582 and parameters: {'n_neighbors': 167, 'weights': 'uniform', 'metric': 'cosine'}. Best is trial 1 with value: 0.7152273630072548.
[I 2025-08-26 23:36:00,992] Trial 3 finished with value: 0.7416651166647039 and parameters: {'n_neighbors': 87, 'weights': 'distance', 'metric': 'cosine'}. Best is trial 3 with value: 0.7416651166647039.
[I 2025-08-26 23:36:19,751] Trial 4 finished with value: 0.6129833620

🌱 KNN (concatenate) - Train/Validation emissions: 0.000118 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/concatenate/KNN/train_val.json
🌱 KNN (concatenate) - Test emissions: 0.000007 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/concatenate/KNN/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/concatenate/KNN/test_classification_report.json

🔎 Running Optuna study for LogReg (concatenate)...
Directory 'results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/concatenate/LogReg' created successfully!


[I 2025-08-26 23:37:57,253] A new study created in memory with name: no-name-a8926b7d-71d6-4ea8-9af1-562e686a2edc
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 23:37:58,248] Trial 0 finished with value: 0.4996923083783537 and parameters: {'C': 0.0745934328572655}. Best is trial 0 with value: 0.4996923083783537.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 23:38:00,587] Trial 1 finished with value: 0.6294807795357757 and parameters: {'C': 56.69849511478853}. Best is trial 1 with value: 0.6294807795357757.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 23:38:02,875] Trial 2 finished with value: 0.6287755556681128 and parameters: {'C': 4.5705630998014515}. Best is trial 1 with value: 0.6294807795357757.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-26 23:38:04,414] Trial 3 finished with value: 0.6015022992819026 and parameters: {'C': 0.9846738873614566}. Best is trial 1 with value: 0.6294807795357757.
  C = trial.suggest_loguniform(

🌱 LogReg (concatenate) - Train/Validation emissions: 0.000071 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/concatenate/LogReg/train_val.json
🌱 LogReg (concatenate) - Test emissions: 0.000001 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/concatenate/LogReg/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/concatenate/LogReg/test_classification_report.json

🔎 Running Optuna study for DecisionTree (concatenate)...
Directory 'results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/concatenate/DecisionTree' created successfully!


[I 2025-08-26 23:38:50,087] A new study created in memory with name: no-name-593d9f4c-ce89-49cd-9599-e3d5ad88ea52
[I 2025-08-26 23:39:32,068] Trial 0 finished with value: 0.7010240580127381 and parameters: {'max_depth': 20, 'criterion': 'gini', 'min_samples_split': 4}. Best is trial 0 with value: 0.7010240580127381.
[I 2025-08-26 23:39:57,728] Trial 1 finished with value: 0.6358098711957141 and parameters: {'max_depth': 9, 'criterion': 'entropy', 'min_samples_split': 15}. Best is trial 0 with value: 0.7010240580127381.
[I 2025-08-26 23:40:06,224] Trial 2 finished with value: 0.5069909784488533 and parameters: {'max_depth': 3, 'criterion': 'gini', 'min_samples_split': 5}. Best is trial 0 with value: 0.7010240580127381.
[I 2025-08-26 23:40:34,023] Trial 3 finished with value: 0.6531585953843229 and parameters: {'max_depth': 10, 'criterion': 'entropy', 'min_samples_split': 7}. Best is trial 0 with value: 0.7010240580127381.
[I 2025-08-26 23:41:09,862] Trial 4 finished with value: 0.696528

🌱 DecisionTree (concatenate) - Train/Validation emissions: 0.000536 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/concatenate/DecisionTree/train_val.json
🌱 DecisionTree (concatenate) - Test emissions: 0.000001 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/concatenate/DecisionTree/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/concatenate/DecisionTree/test_classification_report.json

🔎 Running Optuna study for RandomForest (concatenate)...
Directory 'results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/concatenate/RandomForest' created successfully!


[I 2025-08-26 23:51:40,946] A new study created in memory with name: no-name-a4143a20-d0f6-43a9-8c56-878fbb05c5a1
[I 2025-08-26 23:52:05,657] Trial 0 finished with value: 0.5132675133230217 and parameters: {'n_estimators': 125, 'criterion': 'gini', 'min_samples_split': 4, 'max_depth': 4}. Best is trial 0 with value: 0.5132675133230217.
[I 2025-08-26 23:52:32,532] Trial 1 finished with value: 0.7195344937397712 and parameters: {'n_estimators': 36, 'criterion': 'gini', 'min_samples_split': 2, 'max_depth': 20}. Best is trial 1 with value: 0.7195344937397712.
[I 2025-08-26 23:54:35,727] Trial 2 finished with value: 0.6020057349865491 and parameters: {'n_estimators': 253, 'criterion': 'gini', 'min_samples_split': 7, 'max_depth': 11}. Best is trial 1 with value: 0.7195344937397712.
[I 2025-08-26 23:55:31,711] Trial 3 finished with value: 0.5429719240926327 and parameters: {'n_estimators': 141, 'criterion': 'entropy', 'min_samples_split': 7, 'max_depth': 8}. Best is trial 1 with value: 0.7195

🌱 RandomForest (concatenate) - Train/Validation emissions: 0.000036 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/concatenate/RandomForest/train_val.json
🌱 RandomForest (concatenate) - Test emissions: 0.000001 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/concatenate/RandomForest/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/concatenate/RandomForest/test_classification_report.json

🔎 Running Optuna study for SVM (concatenate)...
Directory 'results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/concatenate/SVM' created successfully!


[I 2025-08-27 00:29:10,808] A new study created in memory with name: no-name-24e21963-e16b-4ac2-ba5e-bb9cbd99a5b3
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-27 00:29:20,193] Trial 0 finished with value: 0.5145589080342383 and parameters: {'C': 0.0745934328572655, 'penalty': 'l1'}. Best is trial 0 with value: 0.5145589080342383.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-27 00:29:54,093] Trial 1 finished with value: 0.5985658655282777 and parameters: {'C': 0.9846738873614566, 'penalty': 'l1'}. Best is trial 1 with value: 0.5985658655282777.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-27 00:29:55,056] Trial 2 finished with value: 0.44073433550618596 and parameters: {'C': 0.0019517224641449498, 'penalty': 'l1'}. Best is trial 1 with value: 0.5985658655282777.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-27 00:30:17,831] Trial 3 finished with value: 0.6126827200294818 and parameters: {'C': 3.4702669886504163, 'penalty': 'l2'}. Best i

🌱 SVM (concatenate) - Train/Validation emissions: 0.000740 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/concatenate/SVM/train_val.json




🌱 SVM (concatenate) - Test emissions: 0.000001 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/concatenate/SVM/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/concatenate/SVM/test_classification_report.json

🔀 Testing combination method: mean
Directory 'results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/mean' created successfully!

🔎 Running Optuna study for KNN (mean)...
Directory 'results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/mean/KNN' created successfully!


[I 2025-08-27 00:47:47,936] A new study created in memory with name: no-name-8aa5b9b8-c41d-4f0f-a8db-502226810e0b
[I 2025-08-27 00:47:49,238] Trial 0 finished with value: 0.6363547188554913 and parameters: {'n_neighbors': 76, 'weights': 'uniform', 'metric': 'euclidean'}. Best is trial 0 with value: 0.6363547188554913.
[I 2025-08-27 00:47:50,495] Trial 1 finished with value: 0.71692514986923 and parameters: {'n_neighbors': 13, 'weights': 'uniform', 'metric': 'cosine'}. Best is trial 1 with value: 0.71692514986923.
[I 2025-08-27 00:47:51,735] Trial 2 finished with value: 0.6065774416114487 and parameters: {'n_neighbors': 167, 'weights': 'uniform', 'metric': 'cosine'}. Best is trial 1 with value: 0.71692514986923.
[I 2025-08-27 00:47:53,002] Trial 3 finished with value: 0.7413035244375723 and parameters: {'n_neighbors': 87, 'weights': 'distance', 'metric': 'cosine'}. Best is trial 3 with value: 0.7413035244375723.
[I 2025-08-27 00:48:02,358] Trial 4 finished with value: 0.6154352789649318

🌱 KNN (mean) - Train/Validation emissions: 0.000064 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/mean/KNN/train_val.json
🌱 KNN (mean) - Test emissions: 0.000000 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/mean/KNN/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/mean/KNN/test_classification_report.json

🔎 Running Optuna study for LogReg (mean)...
Directory 'results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/mean/LogReg' created successfully!


[I 2025-08-27 00:49:01,839] A new study created in memory with name: no-name-e20b894e-febc-4ef6-aa49-fbfcd929a8e5
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-27 00:49:02,323] Trial 0 finished with value: 0.4441878397741866 and parameters: {'C': 0.0745934328572655}. Best is trial 0 with value: 0.4441878397741866.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-27 00:49:04,874] Trial 1 finished with value: 0.5771095837213454 and parameters: {'C': 56.69849511478853}. Best is trial 1 with value: 0.5771095837213454.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-27 00:49:07,046] Trial 2 finished with value: 0.560632345389329 and parameters: {'C': 4.5705630998014515}. Best is trial 1 with value: 0.5771095837213454.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-27 00:49:08,534] Trial 3 finished with value: 0.5393955253997882 and parameters: {'C': 0.9846738873614566}. Best is trial 1 with value: 0.5771095837213454.
  C = trial.suggest_loguniform("

🌱 LogReg (mean) - Train/Validation emissions: 0.000058 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/mean/LogReg/train_val.json
🌱 LogReg (mean) - Test emissions: 0.000002 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/mean/LogReg/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/mean/LogReg/test_classification_report.json

🔎 Running Optuna study for DecisionTree (mean)...
Directory 'results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/mean/DecisionTree' created successfully!


[I 2025-08-27 00:49:43,927] A new study created in memory with name: no-name-4cefc561-6b3a-47c7-9a2e-3f452f5de5ed
[I 2025-08-27 00:50:21,460] Trial 0 finished with value: 0.6713542857093405 and parameters: {'max_depth': 20, 'criterion': 'gini', 'min_samples_split': 4}. Best is trial 0 with value: 0.6713542857093405.
[I 2025-08-27 00:50:42,724] Trial 1 finished with value: 0.6021977000177763 and parameters: {'max_depth': 9, 'criterion': 'entropy', 'min_samples_split': 15}. Best is trial 0 with value: 0.6713542857093405.
[I 2025-08-27 00:50:49,906] Trial 2 finished with value: 0.49368679212230243 and parameters: {'max_depth': 3, 'criterion': 'gini', 'min_samples_split': 5}. Best is trial 0 with value: 0.6713542857093405.
[I 2025-08-27 00:51:12,916] Trial 3 finished with value: 0.6271992054483542 and parameters: {'max_depth': 10, 'criterion': 'entropy', 'min_samples_split': 7}. Best is trial 0 with value: 0.6713542857093405.
[I 2025-08-27 00:51:47,137] Trial 4 finished with value: 0.70067

🌱 DecisionTree (mean) - Train/Validation emissions: 0.000493 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/mean/DecisionTree/train_val.json
🌱 DecisionTree (mean) - Test emissions: 0.000002 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/mean/DecisionTree/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/mean/DecisionTree/test_classification_report.json

🔎 Running Optuna study for RandomForest (mean)...
Directory 'results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/mean/RandomForest' created successfully!


[I 2025-08-27 01:01:20,075] A new study created in memory with name: no-name-57147556-beef-4c26-97d8-b5041ddfb447
[I 2025-08-27 01:01:48,079] Trial 0 finished with value: 0.45807916533583837 and parameters: {'n_estimators': 125, 'criterion': 'gini', 'min_samples_split': 4, 'max_depth': 4}. Best is trial 0 with value: 0.45807916533583837.
[I 2025-08-27 01:02:19,223] Trial 1 finished with value: 0.6982169924106468 and parameters: {'n_estimators': 36, 'criterion': 'gini', 'min_samples_split': 2, 'max_depth': 20}. Best is trial 1 with value: 0.6982169924106468.
[I 2025-08-27 01:04:39,575] Trial 2 finished with value: 0.5733030797639067 and parameters: {'n_estimators': 253, 'criterion': 'gini', 'min_samples_split': 7, 'max_depth': 11}. Best is trial 1 with value: 0.6982169924106468.
[I 2025-08-27 01:05:44,790] Trial 3 finished with value: 0.5341766418073806 and parameters: {'n_estimators': 141, 'criterion': 'entropy', 'min_samples_split': 7, 'max_depth': 8}. Best is trial 1 with value: 0.69

🌱 RandomForest (mean) - Train/Validation emissions: 0.001921 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/mean/RandomForest/train_val.json
🌱 RandomForest (mean) - Test emissions: 0.000002 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/mean/RandomForest/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/mean/RandomForest/test_classification_report.json

🔎 Running Optuna study for SVM (mean)...
Directory 'results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/mean/SVM' created successfully!


[I 2025-08-27 01:48:24,066] A new study created in memory with name: no-name-ca67643d-5baa-4d97-8f5a-ad255a0e602e
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-27 01:48:28,109] Trial 0 finished with value: 0.46643655285962576 and parameters: {'C': 0.0745934328572655, 'penalty': 'l1'}. Best is trial 0 with value: 0.46643655285962576.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-27 01:48:46,086] Trial 1 finished with value: 0.5520126128531958 and parameters: {'C': 0.9846738873614566, 'penalty': 'l1'}. Best is trial 1 with value: 0.5520126128531958.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-27 01:48:46,584] Trial 2 finished with value: 0.44073433550618596 and parameters: {'C': 0.0019517224641449498, 'penalty': 'l1'}. Best is trial 1 with value: 0.5520126128531958.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-27 01:48:55,611] Trial 3 finished with value: 0.5634244274003999 and parameters: {'C': 3.4702669886504163, 'penalty': 'l2'}. Best

🌱 SVM (mean) - Train/Validation emissions: 0.000008 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/mean/SVM/train_val.json




🌱 SVM (mean) - Test emissions: 0.000001 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/mean/SVM/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/mean/SVM/test_classification_report.json

🔀 Testing combination method: min
Directory 'results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/min' created successfully!

🔎 Running Optuna study for KNN (min)...
Directory 'results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/min/KNN' created successfully!


[I 2025-08-27 01:57:19,088] A new study created in memory with name: no-name-361d133a-9ede-497b-9fcd-e117f2c7ea97
[I 2025-08-27 01:57:20,397] Trial 0 finished with value: 0.6387161243904389 and parameters: {'n_neighbors': 76, 'weights': 'uniform', 'metric': 'euclidean'}. Best is trial 0 with value: 0.6387161243904389.
[I 2025-08-27 01:57:21,625] Trial 1 finished with value: 0.7196541948444666 and parameters: {'n_neighbors': 13, 'weights': 'uniform', 'metric': 'cosine'}. Best is trial 1 with value: 0.7196541948444666.
[I 2025-08-27 01:57:22,905] Trial 2 finished with value: 0.6097402427344287 and parameters: {'n_neighbors': 167, 'weights': 'uniform', 'metric': 'cosine'}. Best is trial 1 with value: 0.7196541948444666.
[I 2025-08-27 01:57:24,167] Trial 3 finished with value: 0.7433752871753606 and parameters: {'n_neighbors': 87, 'weights': 'distance', 'metric': 'cosine'}. Best is trial 3 with value: 0.7433752871753606.
[I 2025-08-27 01:57:33,411] Trial 4 finished with value: 0.6169659167

🌱 KNN (min) - Train/Validation emissions: 0.000001 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/min/KNN/train_val.json
🌱 KNN (min) - Test emissions: 0.000003 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/min/KNN/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/min/KNN/test_classification_report.json

🔎 Running Optuna study for LogReg (min)...
Directory 'results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/min/LogReg' created successfully!


[I 2025-08-27 01:58:26,528] A new study created in memory with name: no-name-b714e9e9-54d1-43ee-9e24-2008cd867b56
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-27 01:58:27,039] Trial 0 finished with value: 0.44852475610574866 and parameters: {'C': 0.0745934328572655}. Best is trial 0 with value: 0.44852475610574866.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-27 01:58:29,299] Trial 1 finished with value: 0.6735031432679491 and parameters: {'C': 56.69849511478853}. Best is trial 1 with value: 0.6735031432679491.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-27 01:58:30,729] Trial 2 finished with value: 0.6463759988784523 and parameters: {'C': 4.5705630998014515}. Best is trial 1 with value: 0.6735031432679491.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-27 01:58:32,091] Trial 3 finished with value: 0.5972430217970832 and parameters: {'C': 0.9846738873614566}. Best is trial 1 with value: 0.6735031432679491.
  C = trial.suggest_logunifor

🌱 LogReg (min) - Train/Validation emissions: 0.000001 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/min/LogReg/train_val.json
🌱 LogReg (min) - Test emissions: 0.000001 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/min/LogReg/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/min/LogReg/test_classification_report.json

🔎 Running Optuna study for DecisionTree (min)...
Directory 'results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/min/DecisionTree' created successfully!


[I 2025-08-27 01:59:10,698] A new study created in memory with name: no-name-483c664e-bde4-4746-a4be-020396178a48
[I 2025-08-27 01:59:34,504] Trial 0 finished with value: 0.6890452963985118 and parameters: {'max_depth': 20, 'criterion': 'gini', 'min_samples_split': 4}. Best is trial 0 with value: 0.6890452963985118.
[I 2025-08-27 01:59:48,156] Trial 1 finished with value: 0.6175523380531664 and parameters: {'max_depth': 9, 'criterion': 'entropy', 'min_samples_split': 15}. Best is trial 0 with value: 0.6890452963985118.
[I 2025-08-27 01:59:52,803] Trial 2 finished with value: 0.48914718138867025 and parameters: {'max_depth': 3, 'criterion': 'gini', 'min_samples_split': 5}. Best is trial 0 with value: 0.6890452963985118.
[I 2025-08-27 02:00:07,597] Trial 3 finished with value: 0.6392953056519806 and parameters: {'max_depth': 10, 'criterion': 'entropy', 'min_samples_split': 7}. Best is trial 0 with value: 0.6890452963985118.
[I 2025-08-27 02:00:28,335] Trial 4 finished with value: 0.70099

🌱 DecisionTree (min) - Train/Validation emissions: 0.000290 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/min/DecisionTree/train_val.json
🌱 DecisionTree (min) - Test emissions: 0.000002 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/min/DecisionTree/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/min/DecisionTree/test_classification_report.json

🔎 Running Optuna study for RandomForest (min)...
Directory 'results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/min/RandomForest' created successfully!


[I 2025-08-27 02:06:18,233] A new study created in memory with name: no-name-3f2e270e-17e0-4002-8b32-c9ef67e4e523
[I 2025-08-27 02:06:37,051] Trial 0 finished with value: 0.4646964489261015 and parameters: {'n_estimators': 125, 'criterion': 'gini', 'min_samples_split': 4, 'max_depth': 4}. Best is trial 0 with value: 0.4646964489261015.
[I 2025-08-27 02:06:57,999] Trial 1 finished with value: 0.694999504676139 and parameters: {'n_estimators': 36, 'criterion': 'gini', 'min_samples_split': 2, 'max_depth': 20}. Best is trial 1 with value: 0.694999504676139.
[I 2025-08-27 02:08:32,115] Trial 2 finished with value: 0.5863213238444291 and parameters: {'n_estimators': 253, 'criterion': 'gini', 'min_samples_split': 7, 'max_depth': 11}. Best is trial 1 with value: 0.694999504676139.
[I 2025-08-27 02:09:15,043] Trial 3 finished with value: 0.5382273892194259 and parameters: {'n_estimators': 141, 'criterion': 'entropy', 'min_samples_split': 7, 'max_depth': 8}. Best is trial 1 with value: 0.6949995

🌱 RandomForest (min) - Train/Validation emissions: 0.000747 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/min/RandomForest/train_val.json
🌱 RandomForest (min) - Test emissions: 0.000002 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/min/RandomForest/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/min/RandomForest/test_classification_report.json

🔎 Running Optuna study for SVM (min)...
Directory 'results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/min/SVM' created successfully!


[I 2025-08-27 02:24:02,788] A new study created in memory with name: no-name-412ea47e-2ef6-400d-a030-db1d97ccadde
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-27 02:24:11,235] Trial 0 finished with value: 0.5179936327415432 and parameters: {'C': 0.0745934328572655, 'penalty': 'l1'}. Best is trial 0 with value: 0.5179936327415432.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-27 02:24:52,120] Trial 1 finished with value: 0.6483013562971656 and parameters: {'C': 0.9846738873614566, 'penalty': 'l1'}. Best is trial 1 with value: 0.6483013562971656.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-27 02:24:52,618] Trial 2 finished with value: 0.44073433550618596 and parameters: {'C': 0.0019517224641449498, 'penalty': 'l1'}. Best is trial 1 with value: 0.6483013562971656.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-27 02:24:58,650] Trial 3 finished with value: 0.657054176956129 and parameters: {'C': 3.4702669886504163, 'penalty': 'l2'}. Best is

🌱 SVM (min) - Train/Validation emissions: 0.000478 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/min/SVM/train_val.json




🌱 SVM (min) - Test emissions: 0.000001 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/min/SVM/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/min/SVM/test_classification_report.json

🔀 Testing combination method: max
Directory 'results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/max' created successfully!

🔎 Running Optuna study for KNN (max)...
Directory 'results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/max/KNN' created successfully!


[I 2025-08-27 02:36:00,199] A new study created in memory with name: no-name-cd39d591-391c-4539-ab02-17760c670049
[I 2025-08-27 02:36:01,501] Trial 0 finished with value: 0.6323789143718708 and parameters: {'n_neighbors': 76, 'weights': 'uniform', 'metric': 'euclidean'}. Best is trial 0 with value: 0.6323789143718708.
[I 2025-08-27 02:36:02,751] Trial 1 finished with value: 0.720696247668295 and parameters: {'n_neighbors': 13, 'weights': 'uniform', 'metric': 'cosine'}. Best is trial 1 with value: 0.720696247668295.
[I 2025-08-27 02:36:04,091] Trial 2 finished with value: 0.6024972997830714 and parameters: {'n_neighbors': 167, 'weights': 'uniform', 'metric': 'cosine'}. Best is trial 1 with value: 0.720696247668295.
[I 2025-08-27 02:36:05,346] Trial 3 finished with value: 0.7411098258713287 and parameters: {'n_neighbors': 87, 'weights': 'distance', 'metric': 'cosine'}. Best is trial 3 with value: 0.7411098258713287.
[I 2025-08-27 02:36:16,811] Trial 4 finished with value: 0.6087053963468

🌱 KNN (max) - Train/Validation emissions: 0.000054 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/max/KNN/train_val.json
🌱 KNN (max) - Test emissions: 0.000003 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/max/KNN/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/max/KNN/test_classification_report.json

🔎 Running Optuna study for LogReg (max)...
Directory 'results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/max/LogReg' created successfully!


[I 2025-08-27 02:37:06,198] A new study created in memory with name: no-name-08b0a2de-def0-495d-8020-52af6c009088
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-27 02:37:06,782] Trial 0 finished with value: 0.4454382608320624 and parameters: {'C': 0.0745934328572655}. Best is trial 0 with value: 0.4454382608320624.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-27 02:37:09,372] Trial 1 finished with value: 0.6787214247740564 and parameters: {'C': 56.69849511478853}. Best is trial 1 with value: 0.6787214247740564.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-27 02:37:10,906] Trial 2 finished with value: 0.6684214355061465 and parameters: {'C': 4.5705630998014515}. Best is trial 1 with value: 0.6787214247740564.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-27 02:37:12,337] Trial 3 finished with value: 0.5986964769049179 and parameters: {'C': 0.9846738873614566}. Best is trial 1 with value: 0.6787214247740564.
  C = trial.suggest_loguniform(

🌱 LogReg (max) - Train/Validation emissions: 0.000062 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/max/LogReg/train_val.json
🌱 LogReg (max) - Test emissions: 0.000000 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/max/LogReg/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/max/LogReg/test_classification_report.json

🔎 Running Optuna study for DecisionTree (max)...
Directory 'results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/max/DecisionTree' created successfully!


[I 2025-08-27 02:37:52,690] A new study created in memory with name: no-name-11383421-fa5b-4d12-8304-91c36c9032be
[I 2025-08-27 02:38:15,316] Trial 0 finished with value: 0.7063280925402872 and parameters: {'max_depth': 20, 'criterion': 'gini', 'min_samples_split': 4}. Best is trial 0 with value: 0.7063280925402872.
[I 2025-08-27 02:38:29,222] Trial 1 finished with value: 0.6058263484180041 and parameters: {'max_depth': 9, 'criterion': 'entropy', 'min_samples_split': 15}. Best is trial 0 with value: 0.7063280925402872.
[I 2025-08-27 02:38:33,864] Trial 2 finished with value: 0.48306486480054267 and parameters: {'max_depth': 3, 'criterion': 'gini', 'min_samples_split': 5}. Best is trial 0 with value: 0.7063280925402872.
[I 2025-08-27 02:38:49,016] Trial 3 finished with value: 0.6337885541333599 and parameters: {'max_depth': 10, 'criterion': 'entropy', 'min_samples_split': 7}. Best is trial 0 with value: 0.7063280925402872.
[I 2025-08-27 02:39:10,305] Trial 4 finished with value: 0.71015

🌱 DecisionTree (max) - Train/Validation emissions: 0.000308 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/max/DecisionTree/train_val.json
🌱 DecisionTree (max) - Test emissions: 0.000001 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/max/DecisionTree/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/max/DecisionTree/test_classification_report.json

🔎 Running Optuna study for RandomForest (max)...
Directory 'results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/max/RandomForest' created successfully!


[I 2025-08-27 02:45:15,820] A new study created in memory with name: no-name-06129c4b-29a9-447a-94c5-a4ed2f5e34f9
[I 2025-08-27 02:45:34,451] Trial 0 finished with value: 0.47540691978443844 and parameters: {'n_estimators': 125, 'criterion': 'gini', 'min_samples_split': 4, 'max_depth': 4}. Best is trial 0 with value: 0.47540691978443844.
[I 2025-08-27 02:45:55,173] Trial 1 finished with value: 0.7174070811069685 and parameters: {'n_estimators': 36, 'criterion': 'gini', 'min_samples_split': 2, 'max_depth': 20}. Best is trial 1 with value: 0.7174070811069685.
[I 2025-08-27 02:47:28,132] Trial 2 finished with value: 0.5843549790397848 and parameters: {'n_estimators': 253, 'criterion': 'gini', 'min_samples_split': 7, 'max_depth': 11}. Best is trial 1 with value: 0.7174070811069685.
[I 2025-08-27 02:48:10,500] Trial 3 finished with value: 0.5310834524275406 and parameters: {'n_estimators': 141, 'criterion': 'entropy', 'min_samples_split': 7, 'max_depth': 8}. Best is trial 1 with value: 0.71

🌱 RandomForest (max) - Train/Validation emissions: 0.001161 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/max/RandomForest/train_val.json
🌱 RandomForest (max) - Test emissions: 0.000000 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/max/RandomForest/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/max/RandomForest/test_classification_report.json

🔎 Running Optuna study for SVM (max)...
Directory 'results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/max/SVM' created successfully!


[I 2025-08-27 03:14:28,167] A new study created in memory with name: no-name-0c1ff43b-f56c-4f2c-b0cd-1775e6002f81
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-27 03:14:34,720] Trial 0 finished with value: 0.5169983445069235 and parameters: {'C': 0.0745934328572655, 'penalty': 'l1'}. Best is trial 0 with value: 0.5169983445069235.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-27 03:15:14,151] Trial 1 finished with value: 0.6399837290457941 and parameters: {'C': 0.9846738873614566, 'penalty': 'l1'}. Best is trial 1 with value: 0.6399837290457941.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-27 03:15:14,647] Trial 2 finished with value: 0.44073433550618596 and parameters: {'C': 0.0019517224641449498, 'penalty': 'l1'}. Best is trial 1 with value: 0.6399837290457941.
  C = trial.suggest_loguniform("C", 1e-3, 1e2)
[I 2025-08-27 03:15:20,620] Trial 3 finished with value: 0.664313135817252 and parameters: {'C': 3.4702669886504163, 'penalty': 'l2'}. Best is

🌱 SVM (max) - Train/Validation emissions: 0.000011 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/max/SVM/train_val.json
🌱 SVM (max) - Test emissions: 0.000001 kg CO₂
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/max/SVM/test_results.json
✅ Dictionary saved at: results/BERT_classifiers/ibm-granite_granite-embedding-278m-multilingual/max/SVM/test_classification_report.json
