<a href="https://colab.research.google.com/github/MoamenTlili/nlp-AI/blob/main/P3_ENSEMBLE_MODEL_Task_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install sacremoses

Collecting sacremoses
  Downloading sacremoses-0.1.1-py3-none-any.whl.metadata (8.3 kB)
Downloading sacremoses-0.1.1-py3-none-any.whl (897 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/897.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m174.1/897.5 kB[0m [31m5.0 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m890.9/897.5 kB[0m [31m14.2 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m897.5/897.5 kB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sacremoses
Successfully installed sacremoses-0.1.1


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import AutoTokenizer
from sklearn.metrics import f1_score, classification_report

In [4]:
df = pd.read_csv('/content/sample_data/MLMA_hate_speech.csv')
# Split the data
train_df, test_val_df = train_test_split(df, test_size=0.2, random_state=42)
val_df, test_df = train_test_split(test_val_df, test_size=0.5, random_state=42)

# Prepare test data
test_texts = test_df['tweet'].values
test_labels = test_df['directness'].apply(lambda x: 1 if x == 'direct' else 0).values

# Function to preprocess the text data
def preprocess_data(texts, tokenizer, max_length=128):
    input_ids = []
    attention_masks = []
    for text in texts:
        encoding = tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=max_length,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='tf'
        )
        input_ids.append(encoding['input_ids'])
        attention_masks.append(encoding['attention_mask'])
    return tf.concat(input_ids, axis=0), tf.concat(attention_masks, axis=0)

# Define paths to saved models
labse_model_path = '/content/drive/MyDrive/fine_tuned_mult_models_tf/p3_TASK1_labert_Mult_full'
xlm_roberta_model_path = '/content/drive/MyDrive/fine_tuned_mult_models_tf/p3_TASK1_xlmro_Mult_full'
mbert_model_path = '/content/drive/MyDrive/fine_tuned_mult_models_tf/p3_TASK1_mbert_Mult_full'
xlm_model_path = '/content/drive/MyDrive/fine_tuned_mult_models_tf/p3_TASK1_xlm_Mult_full'

# Load models using TFSMLayer
labse_model = tf.saved_model.load(labse_model_path)
xlm_roberta_model = tf.saved_model.load(xlm_roberta_model_path)
mbert_model = tf.saved_model.load(mbert_model_path)
xlm_model = tf.saved_model.load(xlm_model_path)

# Use the 'serving_default' signature
serving_fn_labse = labse_model.signatures['serving_default']
serving_fn_xlm_roberta = xlm_roberta_model.signatures['serving_default']
serving_fn_mbert = mbert_model.signatures['serving_default']
serving_fn_xlm = xlm_model.signatures['serving_default']

# Load tokenizers
labse_tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/LaBSE")
xlm_roberta_tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base")
mbert_tokenizer = AutoTokenizer.from_pretrained('bert-base-multilingual-cased')
xlm_tokenizer = AutoTokenizer.from_pretrained('xlm-mlm-enfr-1024')

# Preprocess test data
labse_input_ids, labse_attention_masks = preprocess_data(test_texts, labse_tokenizer)
xlm_roberta_input_ids, xlm_roberta_attention_masks = preprocess_data(test_texts, xlm_roberta_tokenizer)
mbert_input_ids, mbert_attention_masks = preprocess_data(test_texts, mbert_tokenizer)
xlm_input_ids, xlm_attention_masks = preprocess_data(test_texts, xlm_tokenizer)

labse_inputs = {
    'input_word_ids': labse_input_ids,
    'input_mask': labse_attention_masks
}

xlm_roberta_inputs = {
    'input_word_ids': xlm_roberta_input_ids,
    'input_mask': xlm_roberta_attention_masks
}

mbert_inputs = {
    'input_word_ids': mbert_input_ids,
    'input_mask': mbert_attention_masks
}

xlm_inputs = {
    'input_word_ids': xlm_input_ids,
    'input_mask': xlm_attention_masks
}

# Call the models with the inputs
predictions_labse = serving_fn_labse(input_word_ids=labse_input_ids, input_mask=labse_attention_masks)['dense_3']
predictions_xlm_roberta = serving_fn_xlm_roberta(
    input_word_ids=xlm_roberta_inputs['input_word_ids'],
    input_mask=xlm_roberta_inputs['input_mask']
)['dense_3']

predictions_mbert = serving_fn_mbert(
    input_word_ids=mbert_inputs['input_word_ids'],
    input_mask=mbert_inputs['input_mask']
)['dense_3']

predictions_xlm = serving_fn_xlm(
    input_word_ids=xlm_inputs['input_word_ids'],
    input_mask=xlm_inputs['input_mask']
)['dense_3']
def find_best_threshold(predictions, true_labels):
    best_threshold = 0.5
    best_f1 = 0.0
    # Search for the best threshold (0 to 1 with a step size of 0.01)
    for threshold in np.arange(0.0, 1.0, 0.01):
        binary_predictions = tf.cast(predictions > threshold, tf.int32)

        f1 = f1_score(true_labels, binary_predictions)
        if f1 > best_f1:
            best_f1 = f1
            best_threshold = threshold
    return best_threshold, best_f1

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/397 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/804 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/5.22M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.62M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/625 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/996k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/131 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.02k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.45M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.01M [00:00<?, ?B/s]

In [5]:
best_threshold_labse, best_f1_labse = find_best_threshold(predictions_labse, test_labels)
best_threshold_xlm_roberta, best_f1_xlm_roberta = find_best_threshold(predictions_xlm_roberta, test_labels)
best_threshold_mbert, best_f1_mbert = find_best_threshold(predictions_mbert, test_labels)
best_threshold_xlm, best_f1_xlm = find_best_threshold(predictions_xlm, test_labels)

# Print the best thresholds and corresponding F1 scores
print(f"Best Threshold for LaBSE: {best_threshold_labse}, F1: {best_f1_labse}")
print(f"Best Threshold for XLM-Roberta: {best_threshold_xlm_roberta}, F1: {best_f1_xlm_roberta}")
print(f"Best Threshold for mBERT: {best_threshold_mbert}, F1: {best_f1_mbert}")
print(f"Best Threshold for XLM: {best_threshold_xlm}, F1: {best_f1_xlm}")

Best Threshold for LaBSE: 0.25, F1: 0.8288288288288288
Best Threshold for XLM-Roberta: 0.42, F1: 0.8295114656031904
Best Threshold for mBERT: 0.6, F1: 0.6485436893203883
Best Threshold for XLM: 0.6, F1: 0.7564766839378239


In [6]:
binary_predictions_labse = tf.cast(predictions_labse > best_threshold_labse, tf.int32)
binary_predictions_xlm_roberta = tf.cast(predictions_xlm_roberta > best_threshold_xlm_roberta, tf.int32)
binary_predictions_mbert = tf.cast(predictions_mbert > best_threshold_mbert, tf.int32)
binary_predictions_xlm = tf.cast(predictions_xlm > best_threshold_xlm, tf.int32)

binary_predictions = np.stack([
    binary_predictions_labse.numpy(),
    binary_predictions_xlm_roberta.numpy(),
    binary_predictions_mbert.numpy(),
    binary_predictions_xlm.numpy()
], axis=1)

# Apply majority voting: Choose the class with the most votes for each instance
majVot_predictions = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=1, arr=binary_predictions)
majVot_predictions = tf.convert_to_tensor(majVot_predictions, dtype=tf.int32)

# Averaging ensemble with best threshold
avg_predictions = (predictions_labse + predictions_xlm_roberta + predictions_mbert + predictions_xlm) / 4
best_threshold_avg, best_f1_avg = find_best_threshold(avg_predictions, test_labels)
binary_avg_predictions = tf.cast(avg_predictions > best_threshold_avg, tf.int32)

# Weighted averaging ensemble with best threshold
weights = [0.3, 0.3, 0.2, 0.2]  # Adjust weights if necessary
weighted_avg_predictions = (weights[0] * predictions_labse +
                            weights[1] * predictions_xlm_roberta +
                            weights[2] * predictions_mbert +
                            weights[3] * predictions_xlm)
best_threshold_weighted_avg, best_f1_weighted_avg = find_best_threshold(weighted_avg_predictions, test_labels)
binary_weighted_avg_predictions = tf.cast(weighted_avg_predictions > best_threshold_weighted_avg, tf.int32)

# Evaluate results for majority voting (no threshold optimization needed)
f1_majority = f1_score(test_labels, majVot_predictions)
f1_micro_majority = f1_score(test_labels, majVot_predictions, average='micro')
f1_macro_majority = f1_score(test_labels, majVot_predictions, average='macro')
report_majority = classification_report(test_labels, majVot_predictions, target_names=['Indirect', 'Direct'])

# Evaluate results for averaging ensemble
f1_avg = f1_score(test_labels, binary_avg_predictions)
f1_micro_avg = f1_score(test_labels, binary_avg_predictions, average='micro')
f1_macro_avg = f1_score(test_labels, binary_avg_predictions, average='macro')
report_avg = classification_report(test_labels, binary_avg_predictions, target_names=['Indirect', 'Direct'])

# Evaluate results for weighted averaging ensemble
f1_wavg = f1_score(test_labels, binary_weighted_avg_predictions)
f1_micro_wavg = f1_score(test_labels, binary_weighted_avg_predictions, average='micro')
f1_macro_wavg = f1_score(test_labels, binary_weighted_avg_predictions, average='macro')
report_wavg = classification_report(test_labels, binary_weighted_avg_predictions, target_names=['Indirect', 'Direct'])

# Print results
print("Majority Voting Results:")
print(f"  F1 Score: {f1_majority}")
print(f"  Micro F1 Score: {f1_micro_majority}")
print(f"  Macro F1 Score: {f1_macro_majority}")
print("Classification Report:")
print(report_majority)

print("\nAveraging Results (Best Threshold):")
print(f"  Best F1 Score: {best_f1_avg} (Threshold: {best_threshold_avg})")
print(f"  Micro F1 Score: {f1_micro_avg}")
print(f"  Macro F1 Score: {f1_macro_avg}")
print("Classification Report:")
print(report_avg)

print("\nWeighted Averaging Results (Best Threshold):")
print(f"  Best F1 Score: {best_f1_weighted_avg} (Threshold: {best_threshold_weighted_avg})")
print(f"  Micro F1 Score: {f1_micro_wavg}")
print(f"  Macro F1 Score: {f1_macro_wavg}")
print("Classification Report:")
print(report_wavg)

Majority Voting Results:
  F1 Score: 0.8072805139186295
  Micro F1 Score: 0.903588644884842
  Macro F1 Score: 0.8714973998164577
Classification Report:
              precision    recall  f1-score   support

    Indirect       0.91      0.96      0.94      1361
      Direct       0.88      0.75      0.81       506

    accuracy                           0.90      1867
   macro avg       0.90      0.85      0.87      1867
weighted avg       0.90      0.90      0.90      1867


Averaging Results (Best Threshold):
  Best F1 Score: 0.8343434343434344 (Threshold: 0.49)
  Micro F1 Score: 0.9121585431173005
  Macro F1 Score: 0.8872883352475189
Classification Report:
              precision    recall  f1-score   support

    Indirect       0.93      0.95      0.94      1361
      Direct       0.85      0.82      0.83       506

    accuracy                           0.91      1867
   macro avg       0.89      0.88      0.89      1867
weighted avg       0.91      0.91      0.91      1867


Weigh

Meta Learner – linear regression

In [7]:
from sklearn.linear_model import LogisticRegression
meta_features = np.hstack((
    predictions_labse.numpy(),
    predictions_xlm_roberta.numpy(),
    predictions_mbert.numpy(),
    predictions_xlm.numpy()
))
# Train meta-learner using validation data
val_texts = val_df['tweet'].values
val_labels = val_df['directness'].apply(lambda x: 1 if x == 'direct' else 0).values

val_labse_input_ids, val_labse_attention_masks = preprocess_data(val_texts, labse_tokenizer)
val_xlm_roberta_input_ids, val_xlm_roberta_attention_masks = preprocess_data(val_texts, xlm_roberta_tokenizer)
val_mbert_input_ids, val_mbert_attention_masks = preprocess_data(val_texts, mbert_tokenizer)
val_xlm_input_ids, val_xlm_attention_masks = preprocess_data(val_texts, xlm_tokenizer)

val_predictions_labse = serving_fn_labse(input_word_ids=val_labse_input_ids, input_mask=val_labse_attention_masks)['dense_3']
val_predictions_xlm_roberta = serving_fn_xlm_roberta(input_word_ids=val_xlm_roberta_input_ids, input_mask=val_xlm_roberta_attention_masks)['dense_3']
val_predictions_mbert = serving_fn_mbert(input_word_ids=val_mbert_input_ids, input_mask=val_mbert_attention_masks)['dense_3']
val_predictions_xlm = serving_fn_xlm(input_word_ids=val_xlm_input_ids, input_mask=val_xlm_attention_masks)['dense_3']
val_meta_features = np.hstack((
    val_predictions_labse,
    val_predictions_xlm_roberta,
    val_predictions_mbert,
    val_predictions_xlm
))
# Train meta-learner
meta_learner = LogisticRegression()
meta_learner.fit(val_meta_features, val_labels)

# Meta-learner predictions
meta_predictions_prob = meta_learner.predict_proba(meta_features)[:, 1]
# Find the best threshold for meta-learning
best_threshold_meta, best_f1_meta = find_best_threshold(meta_predictions_prob, test_labels)
meta_binary_predictions = (meta_predictions_prob > best_threshold_meta).astype(int)

# Evaluation for meta-learner
f1_meta = f1_score(test_labels, meta_binary_predictions)
f1_micro_meta = f1_score(test_labels, meta_binary_predictions, average='micro')
f1_macro_meta = f1_score(test_labels, meta_binary_predictions, average='macro')
report_meta = classification_report(test_labels, meta_binary_predictions, target_names=['Indirect', 'Direct'])

# Print meta-learning results
print("Meta-Learning Results:")
print(f"  Best F1 Score: {best_f1_meta} (Threshold: {best_threshold_meta})")
print(f"  Micro F1 Score: {f1_micro_meta}")
print(f"  Macro F1 Score: {f1_macro_meta}")
print("Classification Report:")
print(report_meta)

Meta-Learning Results:
  Best F1 Score: 0.8648648648648649 (Threshold: 0.56)
  Micro F1 Score: 0.9303695768612747
  Macro F1 Score: 0.9089836589836591
Classification Report:
              precision    recall  f1-score   support

    Indirect       0.94      0.97      0.95      1361
      Direct       0.91      0.82      0.86       506

    accuracy                           0.93      1867
   macro avg       0.92      0.90      0.91      1867
weighted avg       0.93      0.93      0.93      1867



In [8]:
from sklearn.svm import SVC

# Train SVM meta-learner using validation data
svm_meta_learner = SVC(probability=True, kernel='linear')  # You can experiment with other kernels
svm_meta_learner.fit(val_meta_features, val_labels)

# SVM Meta-learner predictions
svm_meta_predictions_prob = svm_meta_learner.predict_proba(meta_features)[:, 1]

# Find the best threshold for SVM meta-learning
best_threshold_svm_meta, best_f1_svm_meta = find_best_threshold(svm_meta_predictions_prob, test_labels)
svm_meta_binary_predictions = (svm_meta_predictions_prob > best_threshold_svm_meta).astype(int)

# Evaluation for SVM meta-learner
f1_svm_meta = f1_score(test_labels, svm_meta_binary_predictions)
f1_micro_svm_meta = f1_score(test_labels, svm_meta_binary_predictions, average='micro')
f1_macro_svm_meta = f1_score(test_labels, svm_meta_binary_predictions, average='macro')
report_svm_meta = classification_report(test_labels, svm_meta_binary_predictions, target_names=['Indirect', 'Direct'])

# Print SVM meta-learning results
print("SVM Meta-Learning Results:")
print(f"  Best F1 Score: {best_f1_svm_meta} (Threshold: {best_threshold_svm_meta})")
print(f"  Micro F1 Score: {f1_micro_svm_meta}")
print(f"  Macro F1 Score: {f1_macro_svm_meta}")
print("Classification Report:")
print(report_svm_meta)

SVM Meta-Learning Results:
  Best F1 Score: 0.863773965691221 (Threshold: 0.39)
  Micro F1 Score: 0.9276914836636315
  Macro F1 Score: 0.907278889517138
Classification Report:
              precision    recall  f1-score   support

    Indirect       0.94      0.96      0.95      1361
      Direct       0.88      0.85      0.86       506

    accuracy                           0.93      1867
   macro avg       0.91      0.90      0.91      1867
weighted avg       0.93      0.93      0.93      1867



SVM-linear kernel

In [9]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

# Define parameter grid
param_grid = {
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'C': [0.1, 1, 10],  # Regularization parameter
    'degree': [2, 3],   # Only used for polynomial kernel
    'gamma': ['scale', 'auto']  # Used for RBF, poly, sigmoid
}

# Initialize SVC
svc = SVC(probability=True)

# Perform grid search
grid_search = GridSearchCV(svc, param_grid, cv=5, scoring='f1_macro')
grid_search.fit(val_meta_features, val_labels)

# Best parameters and model
best_params = grid_search.best_params_
print("Best Parameters:", best_params)

Best Parameters: {'C': 10, 'degree': 2, 'gamma': 'scale', 'kernel': 'rbf'}


SVM-Best param

In [10]:
from sklearn.svm import SVC

# Initialize SVM with the best parameters
svm_meta_learner = SVC(probability=True, kernel='rbf', C=10, gamma='scale')

# Train the SVM meta-learner
svm_meta_learner.fit(val_meta_features, val_labels)

# Generate predictions
svm_meta_predictions_prob = svm_meta_learner.predict_proba(meta_features)[:, 1]

# Find the best threshold
best_threshold_svm_meta, best_f1_svm_meta = find_best_threshold(svm_meta_predictions_prob, test_labels)
svm_meta_binary_predictions = (svm_meta_predictions_prob > best_threshold_svm_meta).astype(int)

# Evaluate performance
f1_svm_meta = f1_score(test_labels, svm_meta_binary_predictions)
f1_micro_svm_meta = f1_score(test_labels, svm_meta_binary_predictions, average='micro')
f1_macro_svm_meta = f1_score(test_labels, svm_meta_binary_predictions, average='macro')
report_svm_meta = classification_report(test_labels, svm_meta_binary_predictions, target_names=['Indirect', 'Direct'])

# Print results
print("SVM Meta-Learning Results:")
print(f"  Best F1 Score: {best_f1_svm_meta} (Threshold: {best_threshold_svm_meta})")
print(f"  Micro F1 Score: {f1_micro_svm_meta}")
print(f"  Macro F1 Score: {f1_macro_svm_meta}")
print("Classification Report:")
print(report_svm_meta)

SVM Meta-Learning Results:
  Best F1 Score: 0.86652977412731 (Threshold: 0.47000000000000003)
  Micro F1 Score: 0.9303695768612747
  Macro F1 Score: 0.9097141624259739
Classification Report:
              precision    recall  f1-score   support

    Indirect       0.94      0.97      0.95      1361
      Direct       0.90      0.83      0.87       506

    accuracy                           0.93      1867
   macro avg       0.92      0.90      0.91      1867
weighted avg       0.93      0.93      0.93      1867



Random Forest

In [11]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score, classification_report

# Initialize Random Forest Classifier
rf_meta_learner = RandomForestClassifier(
    n_estimators=100,
    max_depth=None,
    random_state=42,
    class_weight='balanced'
)

# Train Random Forest meta-learner
rf_meta_learner.fit(val_meta_features, val_labels)

# Random Forest Meta-learner predictions
rf_meta_predictions_prob = rf_meta_learner.predict_proba(meta_features)[:, 1]

# Find the best threshold for Random Forest meta-learning
best_threshold_rf_meta, best_f1_rf_meta = find_best_threshold(rf_meta_predictions_prob, test_labels)
rf_meta_binary_predictions = (rf_meta_predictions_prob > best_threshold_rf_meta).astype(int)

# Evaluation for Random Forest meta-learner
f1_rf_meta = f1_score(test_labels, rf_meta_binary_predictions)
f1_micro_rf_meta = f1_score(test_labels, rf_meta_binary_predictions, average='micro')
f1_macro_rf_meta = f1_score(test_labels, rf_meta_binary_predictions, average='macro')
report_rf_meta = classification_report(test_labels, rf_meta_binary_predictions, target_names=['Indirect', 'Direct'])

# Print Random Forest meta-learning results
print("Random Forest Meta-Learning Results:")
print(f"  Best F1 Score: {best_f1_rf_meta} (Threshold: {best_threshold_rf_meta})")
print(f"  Micro F1 Score: {f1_micro_rf_meta}")
print(f"  Macro F1 Score: {f1_macro_rf_meta}")
print("Classification Report:")
print(report_rf_meta)

Random Forest Meta-Learning Results:
  Best F1 Score: 0.8526970954356846 (Threshold: 0.53)
  Micro F1 Score: 0.923942153186931
  Macro F1 Score: 0.9007167787647737
Classification Report:
              precision    recall  f1-score   support

    Indirect       0.93      0.97      0.95      1361
      Direct       0.90      0.81      0.85       506

    accuracy                           0.92      1867
   macro avg       0.91      0.89      0.90      1867
weighted avg       0.92      0.92      0.92      1867



Random forest best param

In [12]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [10, 20, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
}

grid_search = GridSearchCV(RandomForestClassifier(random_state=42, class_weight='balanced'),
                           param_grid, scoring='f1_macro', cv=5, verbose=2)
grid_search.fit(val_meta_features, val_labels)

print("Best Parameters:", grid_search.best_params_)
best_params = {'max_depth': 20, 'min_samples_leaf': 4, 'min_samples_split': 2, 'n_estimators': 200}

# Create and train the Random Forest model with the best parameters
best_rf_model = RandomForestClassifier(
    max_depth=best_params['max_depth'],
    min_samples_leaf=best_params['min_samples_leaf'],
    min_samples_split=best_params['min_samples_split'],
    n_estimators=best_params['n_estimators'],
    random_state=42  # Optional: for reproducibility
)

# Train the model on validation data
best_rf_model.fit(val_meta_features, val_labels)

# Predict probabilities on meta-features
rf_meta_predictions_prob = best_rf_model.predict_proba(meta_features)[:, 1]

# Find the best threshold for classification
best_threshold_rf_meta, best_f1_rf_meta = find_best_threshold(rf_meta_predictions_prob, test_labels)
rf_meta_binary_predictions = (rf_meta_predictions_prob > best_threshold_rf_meta).astype(int)

# Evaluation metrics
f1_rf_meta = f1_score(test_labels, rf_meta_binary_predictions)
f1_micro_rf_meta = f1_score(test_labels, rf_meta_binary_predictions, average='micro')
f1_macro_rf_meta = f1_score(test_labels, rf_meta_binary_predictions, average='macro')
report_rf_meta = classification_report(test_labels, rf_meta_binary_predictions, target_names=['Indirect', 'Direct'])

# Print results
print("Random Forest Meta-Learning Results:")
print(f"  Best F1 Score: {best_f1_rf_meta} (Threshold: {best_threshold_rf_meta})")
print(f"  Micro F1 Score: {f1_micro_rf_meta}")
print(f"  Macro F1 Score: {f1_macro_rf_meta}")
print("Classification Report:")
print(report_rf_meta)

Fitting 5 folds for each of 81 candidates, totalling 405 fits
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   0.2s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   0.2s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   0.2s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   0.2s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   0.2s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.4s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.4s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.4s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.4s
[CV] END max_depth=10, min_samples