# Simple Ensembling

In [1]:
pip install transformers

Collecting transformers
  Downloading transformers-4.34.0-py3-none-any.whl (7.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.7/7.7 MB[0m [31m47.1 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.16.4 (from transformers)
  Downloading huggingface_hub-0.18.0-py3-none-any.whl (301 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.0/302.0 kB[0m [31m30.1 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers<0.15,>=0.14 (from transformers)
  Downloading tokenizers-0.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.8/3.8 MB[0m [31m92.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m85.3 MB/s[0m eta [36m0:00:00[0m
Col

In [2]:
pip install sentencepiece

Collecting sentencepiece
  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m16.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sentencepiece
Successfully installed sentencepiece-0.1.99


In [3]:
pip install accelerate -U

Collecting accelerate
  Downloading accelerate-0.23.0-py3-none-any.whl (258 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m258.1/258.1 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: accelerate
Successfully installed accelerate-0.23.0


In [4]:
from sklearn.ensemble import StackingClassifier, VotingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, f1_score, precision_score, recall_score
from transformers import DistilBertForSequenceClassification, XLNetForSequenceClassification, DistilBertTokenizer, XLNetTokenizer
import torch
import numpy as np
import pandas as pd

In [5]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [6]:
# Load pretrained models
distilbert_model = DistilBertForSequenceClassification.from_pretrained("/content/gdrive/MyDrive/fake_news/FakeNewsNet_DS/Models/DistilBERT/politifact_epoch_3")
xlnet_model = XLNetForSequenceClassification.from_pretrained("/content/gdrive/MyDrive/fake_news/FakeNewsNet_DS/Models/XLNet/politifact_epoch7")


In [7]:
# Tokenizers
distilbert_tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-cased")
xlnet_tokenizer = XLNetTokenizer.from_pretrained("xlnet-base-cased")


Downloading (…)okenizer_config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/465 [00:00<?, ?B/s]

Downloading (…)ve/main/spiece.model:   0%|          | 0.00/798k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.38M [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/760 [00:00<?, ?B/s]

In [8]:
# Load dataset
df = pd.read_csv("/content/gdrive/MyDrive/fake_news/FakeNewsNet_DS/politifact_cleaned.csv")
df['SECTION_CLEANED'] = df['SECTION_CLEANED'].astype(str)

# Randomly select  % of the data
df_data = df.sample(frac=1.0, random_state=42)


# Split the dataset into training, validation, and test sets
# First, split into training and temp sets (80% training, 20% temp)
train_df, temp_df = train_test_split(df_data, test_size=0.20, random_state=42)

# Then, split the temp set into validation and test sets (50% validation, 50% test)
validation_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)

# Optionally,reset the index of the DataFrames
train_df = train_df.reset_index(drop=True)
#validation_df = validation_df1.sample(frac=0.50, random_state=42)

validation_df = validation_df.reset_index(drop=True)

#test_df = test_df1.sample(frac=0.50, random_state=42)
test_df = test_df.reset_index(drop=True)

In [9]:
y_test = list(test_df['label'])
len(y_test)

106

In [10]:
X_test = list(test_df['SECTION_CLEANED'])

In [11]:
# Prepare input data
def preprocess_text(text, tokenizer):
    input_data = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=128)
    return input_data


In [12]:
X_test_distilbert = [preprocess_text(text, distilbert_tokenizer) for text in X_test]
X_test_xlnet = [preprocess_text(text, xlnet_tokenizer) for text in X_test]


In [13]:
X_test_distilbert

[{'input_ids': tensor([[ 101, 7661, 1116, 7368, 6707, 5709, 4487, 2007, 2616,  102]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])},
 {'input_ids': tensor([[ 101, 1426, 1880, 1697,  102]]), 'attention_mask': tensor([[1, 1, 1, 1, 1]])},
 {'input_ids': tensor([[ 101, 2978, 1305, 5818,  102]]), 'attention_mask': tensor([[1, 1, 1, 1, 1]])},
 {'input_ids': tensor([[  101,  1369,  9643, 11905,   102]]), 'attention_mask': tensor([[1, 1, 1, 1, 1]])},
 {'input_ids': tensor([[  101, 24570,   139, 24683,  1158, 14748, 20515,  5082,  5822,  6660,
           1318,  1955,  1371,   102]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])},
 {'input_ids': tensor([[  101,   139, 13329, 12880,  2137,  1938, 18239,  1116,  7661, 22087,
           2200,  8640, 25030,  1204,  1706,  4108,  9190, 27412,  1348, 22889,
           1116,  1706, 15327,  1262, 17168,  1161,   102]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1

In [14]:
X_test_xlnet

[{'input_ids': tensor([[ 1413,    23,  1709,  3587,   443, 14247,   720,     4,     3]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 2]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1]])},
 {'input_ids': tensor([[18203,   328,     4,     3]]), 'token_type_ids': tensor([[0, 0, 0, 2]]), 'attention_mask': tensor([[1, 1, 1, 1]])},
 {'input_ids': tensor([[1457,  360, 5493,    4,    3]]), 'token_type_ids': tensor([[0, 0, 0, 0, 2]]), 'attention_mask': tensor([[1, 1, 1, 1, 1]])},
 {'input_ids': tensor([[ 1130, 13319, 11386,     4,     3]]), 'token_type_ids': tensor([[0, 0, 0, 0, 2]]), 'attention_mask': tensor([[1, 1, 1, 1, 1]])},
 {'input_ids': tensor([[27623, 22227,    56,  4892,   741,  5976,  7474,   428,  1995,  1307,
              4,     3]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])},
 {'input_ids': tensor([[  322,  2023, 19425,   965, 26097,    23,  1413,  2100,  1227, 15855,
      

In [15]:
# Inference
def get_predictions(model, input_data):
    with torch.no_grad():
        output = model(**input_data)
    logits = output.logits
    probabilities = torch.softmax(logits, dim=1)
    return probabilities


In [16]:
distilbert_predictions = [get_predictions(distilbert_model, data) for data in X_test_distilbert]
xlnet_predictions = [get_predictions(xlnet_model, data) for data in X_test_xlnet]


In [17]:
distilbert_predictions

[tensor([[0.0732, 0.9268]]),
 tensor([[0.0362, 0.9638]]),
 tensor([[0.0478, 0.9522]]),
 tensor([[0.0567, 0.9433]]),
 tensor([[0.0573, 0.9427]]),
 tensor([[0.9777, 0.0223]]),
 tensor([[0.5066, 0.4934]]),
 tensor([[0.9781, 0.0219]]),
 tensor([[0.0283, 0.9717]]),
 tensor([[0.0418, 0.9582]]),
 tensor([[0.9779, 0.0221]]),
 tensor([[0.9704, 0.0296]]),
 tensor([[0.0334, 0.9666]]),
 tensor([[0.9589, 0.0411]]),
 tensor([[0.0431, 0.9569]]),
 tensor([[0.0791, 0.9209]]),
 tensor([[0.2369, 0.7631]]),
 tensor([[0.6645, 0.3355]]),
 tensor([[0.5300, 0.4700]]),
 tensor([[0.6445, 0.3555]]),
 tensor([[0.0317, 0.9683]]),
 tensor([[0.0527, 0.9473]]),
 tensor([[0.1407, 0.8593]]),
 tensor([[0.1181, 0.8819]]),
 tensor([[0.0402, 0.9598]]),
 tensor([[0.0430, 0.9570]]),
 tensor([[0.9529, 0.0471]]),
 tensor([[0.9706, 0.0294]]),
 tensor([[0.1177, 0.8823]]),
 tensor([[0.1156, 0.8844]]),
 tensor([[0.0370, 0.9630]]),
 tensor([[0.0233, 0.9767]]),
 tensor([[0.9472, 0.0528]]),
 tensor([[0.1448, 0.8552]]),
 tensor([[0.03

In [18]:
xlnet_predictions

[tensor([[0.1236, 0.8764]]),
 tensor([[0.0195, 0.9805]]),
 tensor([[0.0314, 0.9686]]),
 tensor([[0.0182, 0.9818]]),
 tensor([[0.0701, 0.9299]]),
 tensor([[0.9883, 0.0117]]),
 tensor([[0.2987, 0.7013]]),
 tensor([[0.9855, 0.0145]]),
 tensor([[0.0120, 0.9880]]),
 tensor([[0.1176, 0.8824]]),
 tensor([[0.9819, 0.0181]]),
 tensor([[0.9525, 0.0475]]),
 tensor([[0.0450, 0.9550]]),
 tensor([[0.8837, 0.1163]]),
 tensor([[0.1401, 0.8599]]),
 tensor([[0.0970, 0.9030]]),
 tensor([[0.3314, 0.6686]]),
 tensor([[0.9350, 0.0650]]),
 tensor([[0.0490, 0.9510]]),
 tensor([[0.9489, 0.0511]]),
 tensor([[0.0859, 0.9141]]),
 tensor([[0.1128, 0.8872]]),
 tensor([[0.4333, 0.5667]]),
 tensor([[0.1249, 0.8751]]),
 tensor([[0.4983, 0.5017]]),
 tensor([[0.0135, 0.9865]]),
 tensor([[0.8135, 0.1865]]),
 tensor([[0.9865, 0.0135]]),
 tensor([[0.4907, 0.5093]]),
 tensor([[0.0300, 0.9700]]),
 tensor([[0.0371, 0.9629]]),
 tensor([[0.0111, 0.9889]]),
 tensor([[0.8413, 0.1587]]),
 tensor([[0.1416, 0.8584]]),
 tensor([[0.03

In [19]:
def ensemble_predictions(predictions1, predictions2):
    ensemble_probs = (predictions1 + predictions2) / 2.0  # Simple average ensemble
    return ensemble_probs

ensembled_probabilities = [ensemble_predictions(distilbert_probs, xlnet_probs) for distilbert_probs, xlnet_probs in zip(distilbert_predictions, xlnet_predictions)]


In [20]:
final_predictions = [torch.argmax(ensemble_probs, dim=1).item() for ensemble_probs in ensembled_probabilities]

In [21]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
accuracy = accuracy_score(y_test, final_predictions)
precision = precision_score(y_test, final_predictions)
recall = recall_score(y_test, final_predictions)
f1 = f1_score(y_test, final_predictions)

In [22]:
print(f"Accuracy: {accuracy*100:.4f}")
print(f"Precision: {precision*100:.4f}")
print(f"Recall: {recall*100:.4f}")
print(f"F1 Score: {f1*100:.4f}")

Accuracy: 87.7358
Precision: 88.8889
Recall: 90.3226
F1 Score: 89.6000


In [23]:
import numpy as np

def majority_vote(predictions1, predictions2):
    predictions1 = [np.argmax(probs, axis=1) for probs in predictions1]
    predictions2 = [np.argmax(probs, axis=1) for probs in predictions2]
    ensemble_predictions = [int(np.median([p1, p2])) for p1, p2 in zip(predictions1, predictions2)]
    return ensemble_predictions


In [24]:
# Convert PyTorch tensors to NumPy arrays
distilbert_predictions_np = [probs.numpy() for probs in distilbert_predictions]
xlnet_predictions_np = [probs.numpy() for probs in xlnet_predictions]


In [25]:
# Perform majority vote ensembling
ensemble_predictions = majority_vote(distilbert_predictions_np, xlnet_predictions_np)

In [26]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
accuracy = accuracy_score(y_test, ensemble_predictions)
precision = precision_score(y_test, ensemble_predictions)
recall = recall_score(y_test, ensemble_predictions)
f1 = f1_score(y_test, ensemble_predictions)

In [27]:
print(f"Accuracy: {accuracy*100:.4f}")
print(f"Precision: {precision*100:.4f}")
print(f"Recall: {recall*100:.4f}")
print(f"F1 Score: {f1*100:.4f}")

Accuracy: 87.7358
Precision: 91.5254
Recall: 87.0968
F1 Score: 89.2562


In [28]:
def weighted_ensemble(predictions1, predictions2, weight_model1, weight_model2):
    predictions1 = [np.argmax(probs, axis=1) for probs in predictions1]
    predictions2 = [np.argmax(probs, axis=1) for probs in predictions2]
    ensemble_probs = [int((weight_model1 * p1 + weight_model2 * p2) / (weight_model1 + weight_model2)) for p1, p2 in zip(predictions1, predictions2)]
    return ensemble_probs


In [33]:
# Define the weights for each model
weight_model1 = 0.6
weight_model2 = 0.4


In [34]:
# Perform  weight ensembling
ensemble_predictions_weight = weighted_ensemble(distilbert_predictions_np, xlnet_predictions_np,weight_model1,weight_model2)

In [35]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
accuracy = accuracy_score(y_test, ensemble_predictions_weight)
precision = precision_score(y_test, ensemble_predictions_weight)
recall = recall_score(y_test, ensemble_predictions_weight)
f1 = f1_score(y_test, ensemble_predictions_weight)

In [36]:
print(f"Accuracy: {accuracy*100:.4f}")
print(f"Precision: {precision*100:.4f}")
print(f"Recall: {recall*100:.4f}")
print(f"F1 Score: {f1*100:.4f}")

Accuracy: 87.7358
Precision: 91.5254
Recall: 87.0968
F1 Score: 89.2562


# Stacking

In [None]:
pip install transformers

Collecting transformers
  Downloading transformers-4.34.0-py3-none-any.whl (7.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.7/7.7 MB[0m [31m30.3 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.16.4 (from transformers)
  Downloading huggingface_hub-0.18.0-py3-none-any.whl (301 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.0/302.0 kB[0m [31m33.1 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers<0.15,>=0.14 (from transformers)
  Downloading tokenizers-0.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.8/3.8 MB[0m [31m73.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m62.2 MB/s[0m eta [36m0:00:00[0m
Col

In [None]:
pip install sentencepiece

Collecting sentencepiece
  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m11.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sentencepiece
Successfully installed sentencepiece-0.1.99


In [None]:
pip install accelerate -U

Collecting accelerate
  Downloading accelerate-0.23.0-py3-none-any.whl (258 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/258.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━[0m [32m174.1/258.1 kB[0m [31m5.4 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m258.1/258.1 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: accelerate
Successfully installed accelerate-0.23.0


In [None]:
from sklearn.ensemble import StackingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, f1_score, precision_score, recall_score
from transformers import DistilBertForSequenceClassification, XLNetForSequenceClassification, DistilBertTokenizer, XLNetTokenizer
from sklearn.linear_model import LogisticRegression
import torch
import numpy as np
import pandas as pd

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted


class TransformersWrapper(BaseEstimator, ClassifierMixin):
    def __init__(self, model_name_or_path):
        self.model_name_or_path = model_name_or_path

    def fit(self, X, y):
        # Tokenize and preprocess the input data
        tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-cased')
        model = DistilBertForSequenceClassification.from_pretrained(self.model_name_or_path)

        X_encoded = tokenizer(X, padding=True, truncation=True, return_tensors='pt')
        input_ids = X_encoded['input_ids']

        # Convert labels to tensor
        y_tensor = torch.tensor(y)

        # Train the model (this may vary depending on your specific model and training process)
        outputs = model(input_ids, labels=y_tensor)
        loss, logits = outputs.loss, outputs.logits

        self.model = model

        return self

    def predict(self, X):
        # Check if the model is fitted
        check_is_fitted(self, 'model')

        # Tokenize and preprocess the input data
        tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-cased')
        X_encoded = tokenizer(X, padding=True, truncation=True, return_tensors='pt')
        input_ids = X_encoded['input_ids']

        # Make predictions
        with torch.no_grad():
            logits = self.model(input_ids).logits

        # Convert logits to class labels (assuming binary classification)
        predicted_labels = torch.argmax(logits, dim=1)

        return predicted_labels


In [None]:
distilbert_model = TransformersWrapper("/content/gdrive/MyDrive/fake_news/Models/DistilBERT/ds_35_h3")

In [None]:
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted


class TransformersWrapperXL(BaseEstimator, ClassifierMixin):
    def __init__(self, model_name_or_path):
        self.model_name_or_path = model_name_or_path

    def fit(self, X, y):
        # Tokenize and preprocess the input data
        tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased')
        model = XLNetForSequenceClassification.from_pretrained(self.model_name_or_path)

        X_encoded = tokenizer(X, padding=True, truncation=True, return_tensors='pt')
        input_ids = X_encoded['input_ids']

        # Convert labels to tensor
        y_tensor = torch.tensor(y)

        # Train the model (this may vary depending on your specific model and training process)
        outputs = model(input_ids, labels=y_tensor)
        loss, logits = outputs.loss, outputs.logits

        self.model = model

        return self

    def predict(self, X):
        # Check if the model is fitted
        check_is_fitted(self, 'model')

        # Tokenize and preprocess the input data
        tokenizer = XLNetTokenizer.from_pretrained('distilbert-base-cased')
        X_encoded = tokenizer(X, padding=True, truncation=True, return_tensors='pt')
        input_ids = X_encoded['input_ids']

        # Make predictions
        with torch.no_grad():
            logits = self.model(input_ids).logits

        # Convert logits to class labels (assuming binary classification)
        predicted_labels = torch.argmax(logits, dim=1)

        return predicted_labels


In [None]:
xlnet_model = TransformersWrapperXL("/content/gdrive/MyDrive/fake_news/Models/XLNet/ds_35")

In [None]:
df = pd.read_csv("/content/gdrive/MyDrive/fake_news/dfcleaned.csv")
df['SECTION_CLEANED'] = df['SECTION_CLEANED'].astype(str)

# Randomly select  % of the data
df_data = df.sample(frac=0.35, random_state=42)

In [None]:
labels = list(df_data['label'])
len(labels)

13526

In [None]:
labels = [0 if label == "FAKE" else 1 for label in labels]
len(labels)

13526

In [None]:
# Assuming you have labeled data for training and testing
X_train, X_test, y_train, y_test = train_test_split(list(df_data['SECTION_CLEANED']), labels, test_size=0.2,random_state=42)

In [None]:
base_models = [
    ('well_trained_model', distilbert_model),
    ('overfitting_model', xlnet_model)
]

In [None]:
meta_learner = LogisticRegression()

In [None]:
stacking_model = StackingClassifier(estimators=base_models, final_estimator=meta_learner)


In [None]:
stacking_model.fit(X_train, y_train)

Downloading (…)okenizer_config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/465 [00:00<?, ?B/s]

We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.


In [None]:
y_pred = stacking_model.predict(X_test)

In [None]:
# Calculate accuracy

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1:", f1)



In [None]:
# Generate a classification report
report = classification_report(y_test, y_pred)
print("Classification Report:\n", report)