In [3]:
!pip install pandas numpy scikit-learn imbalanced-learn
!pip install transformers datasets torch accelerate
!pip install fasttext langdetect openpyxl joblib

Collecting fasttext
  Downloading fasttext-0.9.3.tar.gz (73 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m73.4/73.4 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting langdetect
  Downloading langdetect-1.0.9.tar.gz (981 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m981.5/981.5 kB[0m [31m20.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pybind11>=2.2 (from fasttext)
  Using cached pybind11-3.0.1-py3-none-any.whl.metadata (10.0 kB)
Using cached pybind11-3.0.1-py3-none-any.whl (293 kB)
Building wheels for collected packages: fasttext, langdetect
  Building wheel for fasttext (pyproject.toml) ... [?25l[?25hdone
  Created wheel for fasttext: filename=fasttext-0.9.3-cp312-cp312-linux_x86_64.whl size=4498215 sha2

# final ensemble perfect model

In [4]:
"""
FIGNEWS-2024: ENSEMBLE INFERENCE (TEAM VERSION - SHARED DRIVE) - FIXED
======================================================================
- Fixes 'AttributeError: str object has no attribute re'
- Fixes DeBERTa loading path (updated to 'deberta_finetuned1')
"""

# ============================================================================
# GOOGLE DRIVE MOUNT
# ============================================================================
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
print("✓ Google Drive mounted")

# ============================================================================
# IMPORTS
# ============================================================================
import os
import warnings
import re
import string
import joblib
import pandas as pd
import numpy as np
from collections import Counter
from typing import Dict, List, Tuple

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, f1_score, confusion_matrix
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import fasttext
import fasttext.util
from langdetect import detect

warnings.filterwarnings('ignore')

print("="*80)
print("ENSEMBLE INFERENCE PIPELINE (Shared Drive)")
print("="*80)


# ============================================================================
# CONFIGURATION
# ============================================================================

class Config:
    """Pipeline configuration"""

    # ========== SHARED DRIVE PATHS ==========
    BASE_PATH = "/content/drive/MyDrive/fignews_shared_project/"

    # Data paths
    MAIN_FILE = BASE_PATH + "data/Main.xlsx"
    IAA_FILES = [
        BASE_PATH + "data/IAA-1.xlsx",
        BASE_PATH + "data/IAA-2.xlsx",
        BASE_PATH + "data/IAA-3.xlsx",
        BASE_PATH + "data/IAA-4.xlsx"
    ]

    # Model directories
    # UPDATED paths based on your training logs
    CLASSICAL_DIR = BASE_PATH + "models/classical/"
    MARBERT_DIR = BASE_PATH + "models/marbert_finetuned/"
    # Note: Updated to 'deberta_finetuned1' to match your training output
    DEBERTA_DIR = BASE_PATH + "models/deberta_finetuned/"
    XLMR_DIR = BASE_PATH + "models/xlm_roberta_best_model/" # Updated based on your log error

    # Fallback if XLM path is different:
    if not os.path.exists(XLMR_DIR):
        XLMR_DIR = BASE_PATH + "models/xlmr_finetuned/"

    # Labels
    LABEL_MAP = {
        'Unbiased': 'Unbiased',
        'Biased against Palestine': 'Biased Against Palestine',
        'Biased Against Palestine': 'Biased Against Palestine',
        'Biased against Israel': 'Biased Against Israel',
        'Biased Against Israel': 'Biased Against Israel',
        'Unclear': 'Others',
        'Biased against others': 'Others',
        'Biased against both': 'Others',
        'Biased against both Palestine and Israel': 'Others',
        'Not Applicable': 'Others',
        'Others': 'Others'
    }

    TARGET_LABELS = ['Unbiased', 'Biased Against Palestine',
                     'Biased Against Israel', 'Others']
    LABEL2ID = {label: idx for idx, label in enumerate(TARGET_LABELS)}
    ID2LABEL = {idx: label for label, idx in LABEL2ID.items()}

    FASTTEXT_AR_MODEL = "cc.ar.300.bin"
    FASTTEXT_DIM = 300

    IAA_TRAIN_SPLIT = 0.8
    RANDOM_STATE = 42


# ============================================================================
# PREPROCESSING (FIXED)
# ============================================================================

def preprocess_classical_arabic(text: str) -> str:
    """Preprocessing for Arabic Classical Model."""
    if not isinstance(text, str): return ""
    text = re.sub(r'http\S+|www\.\S+', '', text).replace(':=:', ' ')
    text = re.sub(r'[a-zA-Z]', '', text)
    text = re.sub(r'\d+', '', text)
    arabic_punctuation = '،؛؟!()[]{}"""\'\'`'
    text = text.translate(str.maketrans('', '', string.punctuation + arabic_punctuation))
    text = re.sub(r'[إأآا]', 'ا', text).replace('ى', 'ي')
    text = text.replace('ة', 'ه').replace('ئ', 'ي')
    return re.sub(r'\s+', ' ', text).strip()


def preprocess_classical_english(text: str) -> str:
    """Preprocessing for English Classical Model."""
    if not isinstance(text, str): return ""
    # Fixed the regex chaining error here
    text = re.sub(r'http\S+|www\.\S+', '', text).replace(':=:', ' ')
    text = re.sub(r'#\w+', '', text)
    text = re.sub(r'@\w+', '', text)
    text = re.sub(r'\d+', '', text)
    text = text.translate(str.maketrans('', '', string.punctuation)).lower()
    return re.sub(r'\s+', ' ', text).strip()


def clean_urls_and_format(text: str) -> str:
    if not isinstance(text, str): return ""
    return re.sub(r'\s+', ' ', re.sub(r'http\S+|www\.\S+', '', text).replace(':=:', ' ')).strip()


def filter_valid_data(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    check_cols = [c for c in ['Text', 'Arabic MT', 'English MT'] if c in df.columns]
    if not check_cols: return df
    mask = df[check_cols].apply(lambda x: x.astype(str).str.strip().str.len() > 0).any(axis=1)
    return df[mask].copy()


# ============================================================================
# DATA LOADING
# ============================================================================

def load_and_clean_data() -> Tuple[pd.DataFrame, pd.DataFrame]:
    print("\n[STEP 1] Loading test data...")
    if not os.path.exists(Config.MAIN_FILE):
        raise FileNotFoundError(f"{Config.MAIN_FILE} not found!")

    main_df = pd.read_excel(Config.MAIN_FILE)
    main_df = main_df[main_df['Bias'].notna() & (main_df['Bias'] != '')]
    main_df['Bias'] = main_df['Bias'].astype(str).str.strip()
    for col in ['Text', 'Arabic MT', 'English MT']:
        if col in main_df.columns: main_df[col] = main_df[col].apply(clean_urls_and_format)
    main_df = filter_valid_data(main_df)

    iaa_dfs = []
    for f in Config.IAA_FILES:
        if os.path.exists(f):
            t = pd.read_excel(f)
            if 'Bais' in t.columns: t['Bias'] = t['Bais']
            t = t[t['Bias'].notna() & (t['Bias'] != '')]
            t['Bias'] = t['Bias'].astype(str).str.strip()
            for c in ['Text', 'Arabic MT', 'English MT']:
                if c in t.columns: t[c] = t[c].apply(clean_urls_and_format)
            iaa_dfs.append(filter_valid_data(t))
    iaa_df = pd.concat(iaa_dfs, ignore_index=True) if iaa_dfs else pd.DataFrame()
    return main_df, iaa_df


def map_labels(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    df['Bias_Mapped'] = df['Bias'].map(Config.LABEL_MAP).fillna('Others')
    return df


def apply_majority_vote(df: pd.DataFrame) -> pd.DataFrame:
    df['Text_ID'] = df['ID'].astype(str) + "_" + df['Text'].str[:20]
    gold_rows = []
    for _, g in df.groupby('Text_ID'):
        maj = Counter(g['Bias_Mapped']).most_common(1)[0][0]
        r = g.iloc[0].copy()
        r['Bias_Mapped'] = maj
        gold_rows.append(r)
    return pd.DataFrame(gold_rows)


def get_test_set(main_df, iaa_df):
    u_ids = (iaa_df['Text_ID'].unique() if 'Text_ID' in iaa_df.columns else iaa_df['ID'].unique())
    _, test_ids = train_test_split(u_ids, test_size=(1 - Config.IAA_TRAIN_SPLIT), random_state=Config.RANDOM_STATE)

    if 'Text_ID' in iaa_df.columns:
        iaa_test = iaa_df[iaa_df['Text_ID'].isin(test_ids)].copy()
    else:
        iaa_test = iaa_df[iaa_df['ID'].isin(test_ids)].copy()
    return apply_majority_vote(iaa_test)


# ============================================================================
# MODEL LOADERS
# ============================================================================

class ArabicRFLoader:
    def __init__(self, model_dir):
        print("\n[Loading] Arabic Random Forest...")
        path = os.path.join(model_dir, 'rf_arabic.pkl')
        if not os.path.exists(path): raise FileNotFoundError(f"Missing {path}")
        self.rf = joblib.load(path)

        if not os.path.exists(Config.FASTTEXT_AR_MODEL):
             print("  Downloading FastText...")
             fasttext.util.download_model('ar', if_exists='ignore')
        self.ft = fasttext.load_model(Config.FASTTEXT_AR_MODEL)

    def predict(self, texts):
        X = np.array([self._vec(t) for t in texts])
        return [Config.ID2LABEL[p] for p in self.rf.predict(X)]

    def _vec(self, text):
        if not isinstance(text, str) or not text.strip(): return np.zeros(300)
        w = text.split()
        v = [self.ft.get_word_vector(x) for x in w if x.strip()]
        return np.mean(v, axis=0) if v else np.zeros(300)

class EnglishRFLoader:
    def __init__(self, model_dir):
        print("\n[Loading] English Random Forest...")
        m_path = os.path.join(model_dir, 'rf_english.pkl')
        v_path = os.path.join(model_dir, 'tfidf_english.pkl')
        if not os.path.exists(m_path): raise FileNotFoundError(f"Missing {m_path}")
        self.rf = joblib.load(m_path)
        self.vec = joblib.load(v_path)

    def predict(self, texts):
        X = self.vec.transform(texts)
        return [Config.ID2LABEL[p] for p in self.rf.predict(X)]

class TransformerLoader:
    def __init__(self, model_dir, name):
        print(f"\n[Loading] {name}...")

        # Robust path checking
        if model_dir is None or not os.path.exists(model_dir):
            raise FileNotFoundError(f"Missing model directory: {model_dir}")

        try:
            self.tok = AutoTokenizer.from_pretrained(model_dir)
            self.mod = AutoModelForSequenceClassification.from_pretrained(model_dir)
            self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
            self.mod.to(self.device)
            self.mod.eval()
            print(f"  ✓ Loaded successfully from {model_dir}")
        except Exception as e:
            print(f"  ❌ Failed to load {name}: {e}")
            raise e

    def predict(self, texts):
        if not texts: return []
        inp = self.tok(texts, padding=True, truncation=True, max_length=128, return_tensors="pt").to(self.device)
        with torch.no_grad():
            out = self.mod(**inp)
            preds = torch.argmax(out.logits, dim=-1)
        return [Config.ID2LABEL[p.item()] for p in preds]


# ============================================================================
# ENSEMBLE PIPELINE
# ============================================================================

class EnsemblePipeline:
    def __init__(self):
        print("\n" + "="*80)
        print("LOADING ALL MODELS")
        print("="*80)

        self.arabic_rf = None
        self.english_rf = None
        self.marbert = None
        self.deberta = None
        self.xlmr = None

        # Load Safely
        try: self.arabic_rf = ArabicRFLoader(Config.CLASSICAL_DIR)
        except Exception as e: print(f"  Warning: Arabic RF failed: {e}")

        try: self.english_rf = EnglishRFLoader(Config.CLASSICAL_DIR)
        except Exception as e: print(f"  Warning: English RF failed: {e}")

        try: self.marbert = TransformerLoader(Config.MARBERT_DIR, "MARBERTv2")
        except Exception as e: print(f"  Warning: MARBERT failed: {e}")

        try: self.deberta = TransformerLoader(Config.DEBERTA_DIR, "DeBERTa-v3")
        except Exception as e: print(f"  Warning: DeBERTa failed: {e}")

        try: self.xlmr = TransformerLoader(Config.XLMR_DIR, "XLM-RoBERTa")
        except Exception as e: print(f"  Warning: XLM-R failed: {e}")

    def detect_language(self, text: str) -> str:
        try:
            lang = detect(text)
            return 'ar' if lang == 'ar' else 'en'
        except:
            return 'en'

    def predict_system1(self, df: pd.DataFrame) -> List[str]:
        """System 1: Route to Language Specialist + Vote (Updated with RF Tie-Breaker)."""
        print("\n[System 1] Ensemble Voting Inference...")
        predictions = []
        texts = df['Text'].tolist()

        texts_ar_clean = [preprocess_classical_arabic(t) for t in texts]
        texts_en_clean = [preprocess_classical_english(t) for t in texts]

        for i, text in enumerate(texts):
            lang = self.detect_language(text)
            votes = []
            rf_vote = None  # NEW: Track the RF vote specifically

            # --- ARABIC ROUTE ---
            if lang == 'ar':
                if self.marbert:
                    votes.append(self.marbert.predict([text])[0])
                if self.arabic_rf:
                    p = self.arabic_rf.predict([texts_ar_clean[i]])[0]
                    votes.append(p)
                    rf_vote = p  # Capture RF decision
                if self.xlmr:
                    votes.append(self.xlmr.predict([text])[0])

            # --- ENGLISH ROUTE ---
            else:
                if self.deberta:
                    votes.append(self.deberta.predict([text])[0])
                if self.english_rf:
                    p = self.english_rf.predict([texts_en_clean[i]])[0]
                    votes.append(p)
                    rf_vote = p  # Capture RF decision
                if self.xlmr:
                    votes.append(self.xlmr.predict([text])[0])

            # --- FALLBACK ---
            if not votes:
                votes = ["Others"]

            # Vote Counting
            vote_counts = Counter(votes)
            most_common = vote_counts.most_common()

            if len(most_common) == 1:
                predictions.append(most_common[0][0])
            elif most_common[0][1] > most_common[1][1]:
                predictions.append(most_common[0][0])
            else:
                # --- TIE BREAKER CHANGED HERE ---
                # Old logic: predictions.append(llm_vote if llm_vote else most_common[0][0])
                # New logic: Priority to Random Forest (Highest Accuracy)
                if rf_vote:
                    predictions.append(rf_vote)
                else:
                    # Fallback if RF somehow failed to run
                    predictions.append(most_common[0][0])

        return predictions

    def predict_system2(self, df: pd.DataFrame) -> List[str]:
        """System 2: Baseline XLM-R only."""
        print("\n[System 2] XLM-R Baseline Inference...")
        if not self.xlmr:
            return ["Others"] * len(df)
        return self.xlmr.predict(df['Text'].tolist())

    def evaluate(self, test_df: pd.DataFrame, system='system1'):
        true_labels = test_df['Bias_Mapped'].tolist()

        if system == 'system1':
            predictions = self.predict_system1(test_df)
        else:
            predictions = self.predict_system2(test_df)

        print("\nClassification Report:")
        print(classification_report(true_labels, predictions, digits=4, zero_division=0))

        acc = accuracy_score(true_labels, predictions)
        f1 = f1_score(true_labels, predictions, average='macro', zero_division=0)
        print(f"Accuracy: {acc:.4f}, Macro F1: {f1:.4f}")


# ============================================================================
# MAIN
# ============================================================================

def main():
    np.random.seed(Config.RANDOM_STATE)

    try: main_df, iaa_df = load_and_clean_data()
    except FileNotFoundError: return

    main_df = map_labels(main_df)
    iaa_df = map_labels(iaa_df) if len(iaa_df) > 0 else iaa_df

    # Get Test Set
    test_df = get_test_set(main_df, iaa_df)
    print(f"  Test set: {len(test_df)} samples")

    # Run Inference
    pipeline = EnsemblePipeline()

    print("\n" + "="*80)
    print("SYSTEM 1: ENSEMBLE EVALUATION")
    print("="*80)
    pipeline.evaluate(test_df, system='system1')

    print("\n" + "="*80)
    print("SYSTEM 2: BASELINE EVALUATION")
    print("="*80)
    pipeline.evaluate(test_df, system='system2')

if __name__ == "__main__":
    main()

Mounted at /content/drive
✓ Google Drive mounted
ENSEMBLE INFERENCE PIPELINE (Shared Drive)

[STEP 1] Loading test data...
  Test set: 240 samples

LOADING ALL MODELS

[Loading] Arabic Random Forest...
  Downloading FastText...
Downloading https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ar.300.bin.gz


[Loading] English Random Forest...

[Loading] MARBERTv2...
  ✓ Loaded successfully from /content/drive/MyDrive/fignews_shared_project/models/marbert_finetuned/

[Loading] DeBERTa-v3...


The tokenizer you are loading from '/content/drive/MyDrive/fignews_shared_project/models/deberta_finetuned/' with an incorrect regex pattern: https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503/discussions/84#69121093e8b480e709447d5e. This will lead to incorrect tokenization. You should set the `fix_mistral_regex=True` flag when loading this tokenizer to fix this issue.


  ✓ Loaded successfully from /content/drive/MyDrive/fignews_shared_project/models/deberta_finetuned/

[Loading] XLM-RoBERTa...


The tokenizer you are loading from '/content/drive/MyDrive/fignews_shared_project/models/xlm_roberta_best_model/' with an incorrect regex pattern: https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503/discussions/84#69121093e8b480e709447d5e. This will lead to incorrect tokenization. You should set the `fix_mistral_regex=True` flag when loading this tokenizer to fix this issue.


  ✓ Loaded successfully from /content/drive/MyDrive/fignews_shared_project/models/xlm_roberta_best_model/

SYSTEM 1: ENSEMBLE EVALUATION

[System 1] Ensemble Voting Inference...

Classification Report:
                          precision    recall  f1-score   support

   Biased Against Israel     0.0000    0.0000    0.0000         6
Biased Against Palestine     0.5417    0.2097    0.3023        62
                  Others     0.1500    0.8000    0.2526        15
                Unbiased     0.7407    0.6369    0.6849       157

                accuracy                         0.5208       240
               macro avg     0.3581    0.4117    0.3100       240
            weighted avg     0.6339    0.5208    0.5419       240

Accuracy: 0.5208, Macro F1: 0.3100

SYSTEM 2: BASELINE EVALUATION

[System 2] XLM-R Baseline Inference...

Classification Report:
                          precision    recall  f1-score   support

   Biased Against Israel     0.0000    0.0000    0.0000         6
Bias

# Testing arabic and english ensemble model predictions

In [5]:
# ============================================================================
# CUSTOM INFERENCE WITH RF TIE-BREAKER
# ============================================================================

# 1. Ensure Pipeline is Loaded
if 'pipeline' not in locals():
    print("Initializing Pipeline...")
    pipeline = EnsemblePipeline()
else:
    print("Using existing Pipeline instance.")

def predict_with_rf_priority(text: str, pipeline_obj):
    """
    Predicts bias with a specific tie-breaker:
    If votes are tied, default to the Random Forest model (Highest Accuracy).
    """
    # Detect Language
    lang = pipeline_obj.detect_language(text)
    votes = {}
    rf_prediction = None

    print(f"\nScanning Text ({lang}): '{text}'")

    # --- ARABIC LOGIC ---
    if lang == 'ar':
        clean_text = preprocess_classical_arabic(text)

        # 1. MARBERT
        if pipeline_obj.marbert:
            pred = pipeline_obj.marbert.predict([text])[0]
            votes['MARBERT'] = pred

        # 2. XLM-R
        if pipeline_obj.xlmr:
            pred = pipeline_obj.xlmr.predict([text])[0]
            votes['XLM-R'] = pred

        # 3. Random Forest (The Tie-Breaker)
        if pipeline_obj.arabic_rf:
            pred = pipeline_obj.arabic_rf.predict([clean_text])[0]
            votes['RandomForest_AR'] = pred
            rf_prediction = pred

    # --- ENGLISH LOGIC ---
    else:
        clean_text = preprocess_classical_english(text)

        # 1. DeBERTa
        if pipeline_obj.deberta:
            pred = pipeline_obj.deberta.predict([text])[0]
            votes['DeBERTa'] = pred

        # 2. XLM-R
        if pipeline_obj.xlmr:
            pred = pipeline_obj.xlmr.predict([text])[0]
            votes['XLM-R'] = pred

        # 3. Random Forest (The Tie-Breaker)
        if pipeline_obj.english_rf:
            pred = pipeline_obj.english_rf.predict([clean_text])[0]
            votes['RandomForest_EN'] = pred
            rf_prediction = pred

    # --- VOTING LOGIC ---
    vote_list = list(votes.values())
    if not vote_list:
        return "Others"

    counts = Counter(vote_list)
    most_common = counts.most_common()

    print(f"   Individual Votes: {votes}")

    # DECISION TREE
    final_pred = ""

    # Case A: Unanimous
    if len(most_common) == 1:
        final_pred = most_common[0][0]
        print(f"   Result: Unanimous ({final_pred})")

    # Case B: Clear Majority
    elif most_common[0][1] > most_common[1][1]:
        final_pred = most_common[0][0]
        print(f"   Result: Majority Vote ({final_pred})")

    # Case C: Tie -> Use RF
    else:
        if rf_prediction:
            final_pred = rf_prediction
            print(f"   Result: TIE DETECTED -> Using Highest Acc Model (RF): {final_pred}")
        else:
            # Fallback if RF is missing for some reason
            final_pred = most_common[0][0]
            print(f"   Result: Tie (RF missing, using first option): {final_pred}")

    return final_pred

# ============================================================================
# TEST INPUTS
# ============================================================================

# Input 1: Arabic Sentence (Biased example)
# Transl: "The occupation forces committed a massacre against civilians."
arabic_text = "قوات الاحتلال ارتكبت مجزرة مروعة بحق المدنيين العزل في القطاع"

# Input 2: English Sentence (Biased example)
english_text = "The barbaric actions of the regime show they have no regard for human life."

# ============================================================================
# EXECUTION
# ============================================================================

print("-" * 60)
ar_result = predict_with_rf_priority(arabic_text, pipeline)
print(f"FINAL ARABIC PREDICTION: {ar_result}")

print("-" * 60)
en_result = predict_with_rf_priority(english_text, pipeline)
print(f"FINAL ENGLISH PREDICTION: {en_result}")
print("-" * 60)

Initializing Pipeline...

LOADING ALL MODELS

[Loading] Arabic Random Forest...

[Loading] English Random Forest...

[Loading] MARBERTv2...
  ✓ Loaded successfully from /content/drive/MyDrive/fignews_shared_project/models/marbert_finetuned/

[Loading] DeBERTa-v3...


The tokenizer you are loading from '/content/drive/MyDrive/fignews_shared_project/models/deberta_finetuned/' with an incorrect regex pattern: https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503/discussions/84#69121093e8b480e709447d5e. This will lead to incorrect tokenization. You should set the `fix_mistral_regex=True` flag when loading this tokenizer to fix this issue.


  ✓ Loaded successfully from /content/drive/MyDrive/fignews_shared_project/models/deberta_finetuned/

[Loading] XLM-RoBERTa...


The tokenizer you are loading from '/content/drive/MyDrive/fignews_shared_project/models/xlm_roberta_best_model/' with an incorrect regex pattern: https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503/discussions/84#69121093e8b480e709447d5e. This will lead to incorrect tokenization. You should set the `fix_mistral_regex=True` flag when loading this tokenizer to fix this issue.


  ✓ Loaded successfully from /content/drive/MyDrive/fignews_shared_project/models/xlm_roberta_best_model/
------------------------------------------------------------

Scanning Text (ar): 'قوات الاحتلال ارتكبت مجزرة مروعة بحق المدنيين العزل في القطاع'
   Individual Votes: {'MARBERT': 'Biased Against Israel', 'XLM-R': 'Unbiased', 'RandomForest_AR': 'Unbiased'}
   Result: Majority Vote (Unbiased)
FINAL ARABIC PREDICTION: Unbiased
------------------------------------------------------------

Scanning Text (en): 'The barbaric actions of the regime show they have no regard for human life.'
   Individual Votes: {'DeBERTa': 'Biased Against Palestine', 'XLM-R': 'Unbiased', 'RandomForest_EN': 'Unbiased'}
   Result: Majority Vote (Unbiased)
FINAL ENGLISH PREDICTION: Unbiased
------------------------------------------------------------
