In [2]:
!pip install pandas numpy scikit-learn imbalanced-learn
!pip install transformers datasets torch accelerate
!pip install fasttext langdetect openpyxl joblib

Collecting fasttext
  Downloading fasttext-0.9.3.tar.gz (73 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m73.4/73.4 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting langdetect
  Downloading langdetect-1.0.9.tar.gz (981 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m981.5/981.5 kB[0m [31m21.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pybind11>=2.2 (from fasttext)
  Using cached pybind11-3.0.1-py3-none-any.whl.metadata (10.0 kB)
Using cached pybind11-3.0.1-py3-none-any.whl (293 kB)
Building wheels for collected packages: fasttext, langdetect
  Building wheel for fasttext (pyproject.toml) ... [?25l[?25hdone
  Created wheel for fasttext: filename=fasttext-0.9.3-cp312-cp312-linux_x86_64.whl size=4498210 sha2

# last version

In [4]:
"""
FIGNEWS-2024: CLASSICAL MODELS (TEAM VERSION - SHARED DRIVE)
=============================================================
- Uses shared Google Drive for storage
- Saves only final models
- Data Logic: MAXIMIZE DATA (Filters by Text Availability, NOT Source Language label)
  * Arabic Model: Uses Text if Source=Arabic, otherwise uses Arabic MT.
  * English Model: Uses Text if Source=English, otherwise uses English MT.
"""

# ============================================================================
# GOOGLE DRIVE MOUNT (With Force Remount)
# ============================================================================
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
print("✓ Google Drive mounted")

# ============================================================================
# IMPORTS
# ============================================================================
import os
import warnings
import re
import string
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
from collections import Counter
from typing import Dict, List, Tuple

from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, f1_score
from imblearn.over_sampling import SMOTE
import joblib

import fasttext
import fasttext.util

print("="*80)
print("CLASSICAL MODELS PIPELINE (Shared Drive + Data Maximization)")
print("="*80)


# ============================================================================
# CONFIGURATION
# ============================================================================

class Config:
    """Pipeline configuration"""

    # ========== SHARED DRIVE PATHS ==========
    BASE_PATH = "/content/drive/MyDrive/fignews_shared_project/"

    # Data paths
    MAIN_FILE = BASE_PATH + "data/Main.xlsx"
    IAA_FILES = [
        BASE_PATH + "data/IAA-1.xlsx",
        BASE_PATH + "data/IAA-2.xlsx",
        BASE_PATH + "data/IAA-3.xlsx",
        BASE_PATH + "data/IAA-4.xlsx"
    ]

    # Output directory
    OUTPUT_DIR = BASE_PATH + "models/classical/"

    # Label mapping
    LABEL_MAP = {
        'Unbiased': 'Unbiased',
        'Biased against Palestine': 'Biased Against Palestine',
        'Biased Against Palestine': 'Biased Against Palestine',
        'Biased against Israel': 'Biased Against Israel',
        'Biased Against Israel': 'Biased Against Israel',
        'Unclear': 'Others',
        'Biased against others': 'Others',
        'Biased against both': 'Others',
        'Biased against both Palestine and Israel': 'Others',
        'Not Applicable': 'Others',
        'Others': 'Others'
    }

    TARGET_LABELS = ['Unbiased', 'Biased Against Palestine',
                     'Biased Against Israel', 'Others']
    LABEL2ID = {label: idx for idx, label in enumerate(TARGET_LABELS)}
    ID2LABEL = {idx: label for label, idx in LABEL2ID.items()}

    # FastText
    FASTTEXT_AR_MODEL = "cc.ar.300.bin"
    FASTTEXT_DIM = 300

    # Training parameters
    IAA_TRAIN_SPLIT = 0.8
    RANDOM_STATE = 42


# ============================================================================
# PREPROCESSING FUNCTIONS
# ============================================================================

def preprocess_classical_arabic(text: str) -> str:
    """Extended preprocessing for Arabic Classical Model."""
    if not isinstance(text, str): return ""
    text = re.sub(r'http\S+|www\.\S+', '', text)
    text = text.replace(':=:', ' ')
    text = re.sub(r'[a-zA-Z]', '', text)
    text = re.sub(r'\d+', '', text)
    arabic_punctuation = '،؛؟!()[]{}"""\'\'`'
    text = text.translate(str.maketrans('', '', string.punctuation + arabic_punctuation))
    text = re.sub(r'[إأآا]', 'ا', text)
    text = re.sub(r'ى', 'ي', text)
    text = re.sub(r'ة', 'ه', text)
    text = re.sub(r'ئ', 'ي', text)
    return re.sub(r'\s+', ' ', text).strip()

def preprocess_classical_english(text: str) -> str:
    """Extended preprocessing for English Classical Model."""
    if not isinstance(text, str): return ""
    text = re.sub(r'http\S+|www\.\S+', '', text)
    text = text.replace(':=:', ' ')
    text = re.sub(r'#\w+', '', text)
    text = re.sub(r'@\w+', '', text)
    text = re.sub(r'\d+', '', text)
    text = text.translate(str.maketrans('', '', string.punctuation))
    text = text.lower()
    return re.sub(r'\s+', ' ', text).strip()

def clean_urls_and_format(text: str) -> str:
    """Basic cleaning for initial loading."""
    if not isinstance(text, str): return ""
    text = re.sub(r'http\S+|www\.\S+', '', text)
    text = text.replace(':=:', ' ')
    return re.sub(r'\s+', ' ', text).strip()


# ============================================================================
# DATA LOADING & PREPARATION
# ============================================================================

def filter_valid_data(df: pd.DataFrame) -> pd.DataFrame:
    """
    CRITICAL CHANGE: Do NOT filter by 'Source Language' column.
    Instead, keep rows where valid text exists in Text, Arabic MT, or English MT.
    """
    df = df.copy()

    # Columns to check for content
    check_cols = [c for c in ['Text', 'Arabic MT', 'English MT'] if c in df.columns]

    if not check_cols:
        return df

    # Keep row if ANY of these columns has non-empty text
    # (Checks if length of stripped text > 0)
    mask = df[check_cols].apply(
        lambda x: x.astype(str).str.strip().str.len() > 0
    ).any(axis=1)

    df_filtered = df[mask].copy()

    dropped = len(df) - len(df_filtered)
    if dropped > 0:
        print(f"    Dropped {dropped} rows with no text content in any column.")

    return df_filtered


def load_and_clean_data() -> Tuple[pd.DataFrame, pd.DataFrame]:
    """Load MAIN and IAA files."""
    print("\n[STEP 1] Loading and cleaning data...")

    if not os.path.exists(Config.MAIN_FILE):
        print(f"ERROR: File not found at {Config.MAIN_FILE}")
        raise FileNotFoundError(f"{Config.MAIN_FILE} not found!")

    print(f"  Loading {Config.MAIN_FILE}...")
    main_df = pd.read_excel(Config.MAIN_FILE)

    main_df = main_df[main_df['Bias'].notna() & (main_df['Bias'] != '')]
    main_df['Bias'] = main_df['Bias'].astype(str).str.strip()

    # Clean text columns
    for col in ['Text', 'Arabic MT', 'English MT']:
        if col in main_df.columns:
            main_df[col] = main_df[col].apply(clean_urls_and_format)

    # Filter based on data availability (not source language label)
    main_df = filter_valid_data(main_df)
    print(f"    MAIN: {len(main_df)} rows")

    # Load IAA
    iaa_dfs = []
    for iaa_file in Config.IAA_FILES:
        if os.path.exists(iaa_file):
            print(f"  Loading {iaa_file}...")
            iaa_df_temp = pd.read_excel(iaa_file)

            if 'Bais' in iaa_df_temp.columns:
                iaa_df_temp['Bias'] = iaa_df_temp['Bais']

            iaa_df_temp = iaa_df_temp[iaa_df_temp['Bias'].notna() & (iaa_df_temp['Bias'] != '')]
            iaa_df_temp['Bias'] = iaa_df_temp['Bias'].astype(str).str.strip()

            if len(iaa_df_temp) > 0:
                for col in ['Text', 'Arabic MT', 'English MT']:
                    if col in iaa_df_temp.columns:
                        iaa_df_temp[col] = iaa_df_temp[col].apply(clean_urls_and_format)

                # Filter valid data
                iaa_df_temp = filter_valid_data(iaa_df_temp)
                iaa_dfs.append(iaa_df_temp)
                print(f"    {iaa_file}: {len(iaa_df_temp)} rows")

    iaa_df = pd.concat(iaa_dfs, ignore_index=True) if iaa_dfs else pd.DataFrame()
    print(f"\n  Total IAA: {len(iaa_df)} rows")

    return main_df, iaa_df


def map_labels(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    df['Bias_Mapped'] = df['Bias'].map(Config.LABEL_MAP)
    df['Bias_Mapped'] = df['Bias_Mapped'].fillna('Others')
    return df


def apply_majority_vote(df: pd.DataFrame) -> pd.DataFrame:
    print("\n  Applying majority vote...")
    df['Text_ID'] = df['ID'].astype(str) + "_" + df['Text'].str[:20]
    gold_rows = []
    for text_id, group in df.groupby('Text_ID'):
        labels = group['Bias_Mapped'].tolist()
        majority_label = Counter(labels).most_common(1)[0][0]
        gold_row = group.iloc[0].copy()
        gold_row['Bias_Mapped'] = majority_label
        gold_rows.append(gold_row)
    return pd.DataFrame(gold_rows)


def create_train_test_split(main_df: pd.DataFrame, iaa_df: pd.DataFrame):
    print("\n[STEP 2] Creating train/test splits...")
    unique_ids = (iaa_df['Text_ID'].unique() if 'Text_ID' in iaa_df.columns else iaa_df['ID'].unique())
    train_ids, test_ids = train_test_split(unique_ids, test_size=(1 - Config.IAA_TRAIN_SPLIT), random_state=Config.RANDOM_STATE)

    if 'Text_ID' in iaa_df.columns:
        iaa_train = iaa_df[iaa_df['Text_ID'].isin(train_ids)].copy()
        iaa_test = iaa_df[iaa_df['Text_ID'].isin(test_ids)].copy()
    else:
        iaa_train = iaa_df[iaa_df['ID'].isin(train_ids)].copy()
        iaa_test = iaa_df[iaa_df['ID'].isin(test_ids)].copy()

    train_df = pd.concat([main_df, apply_majority_vote(iaa_train)], ignore_index=True)
    test_df = apply_majority_vote(iaa_test)

    print(f"\n  Training: {len(train_df)} samples")
    print(f"  Test: {len(test_df)} samples")
    return train_df, test_df


def prepare_text_columns(df: pd.DataFrame, target_lang: str) -> pd.DataFrame:
    """
    CRITICAL DATA LOGIC: Select text column to MAXIMIZE data usage.
    Does NOT filter by Source Language, only prioritizes columns.

    Logic:
    1. If target is 'arabic':
       - If Source=Arabic, use 'Text'.
       - Otherwise (English/Unknown), use 'Arabic MT'.
    2. If target is 'english':
       - If Source=English, use 'Text'.
       - Otherwise (Arabic/Unknown), use 'English MT'.
    """
    df = df.copy()

    if target_lang == 'arabic':
        # Use Text if source is explicitly Arabic, else default to MT
        df['ModelText'] = df.apply(
            lambda row: row['Text'] if 'Arabic' in str(row.get('Source Language', ''))
            else row['Arabic MT'],
            axis=1
        )
    else: # english
        # Use Text if source is explicitly English, else default to MT
        df['ModelText'] = df.apply(
            lambda row: row['Text'] if 'English' in str(row.get('Source Language', ''))
            else row['English MT'],
            axis=1
        )

    # Drop rows where the *selected* text is empty
    initial_len = len(df)
    df = df[df['ModelText'].notna() & (df['ModelText'].astype(str).str.strip() != '')]
    if len(df) < initial_len:
        print(f"  Warning: Dropped {initial_len - len(df)} rows due to missing '{target_lang}' content")

    return df


# ============================================================================
# MODELS (RF)
# ============================================================================

class ArabicRFModel:
    def __init__(self):
        self.ft_model = None
        self.rf_model = None
        self.label_encoder = Config.LABEL2ID

    def load_fasttext(self):
        print("\n[Arabic RF] Loading FastText...")
        if not os.path.exists(Config.FASTTEXT_AR_MODEL):
            try:
                fasttext.util.download_model('ar', if_exists='ignore')
            except: pass
        try:
            self.ft_model = fasttext.load_model(Config.FASTTEXT_AR_MODEL)
            print("  ✓ FastText loaded.")
        except: print("  Error loading FastText.")

    def vectorize_text(self, text: str):
        if not isinstance(text, str) or len(text.strip()) == 0: return np.zeros(Config.FASTTEXT_DIM)
        if self.ft_model is None: return np.zeros(Config.FASTTEXT_DIM)
        words = text.split()
        vecs = [self.ft_model.get_word_vector(w) for w in words if w.strip()]
        return np.mean(vecs, axis=0) if vecs else np.zeros(Config.FASTTEXT_DIM)

    def train(self, df):
        print("\n[Arabic RF] Training...")
        df['Text_Processed'] = df['ModelText'].apply(preprocess_classical_arabic)
        X = np.array([self.vectorize_text(t) for t in df['Text_Processed']])
        y = df['Bias_Mapped'].map(self.label_encoder).values

        # SMOTE
        min_samp = min(Counter(y).values())
        k = min(5, min_samp - 1) if min_samp > 1 else 1
        if min_samp > 1:
            X, y = SMOTE(random_state=42, k_neighbors=k).fit_resample(X, y)

        self.rf_model = RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced')
        self.rf_model.fit(X, y)
        print("  ✓ Arabic RF trained.")

    def predict(self, df):
        df['Text_Processed'] = df['ModelText'].apply(preprocess_classical_arabic)
        X = np.array([self.vectorize_text(t) for t in df['Text_Processed']])
        return [Config.ID2LABEL[p] for p in self.rf_model.predict(X)]

    def save(self, path):
        os.makedirs(path, exist_ok=True)
        joblib.dump(self.rf_model, os.path.join(path, 'rf_arabic.pkl'))
        joblib.dump({'label_encoder': self.label_encoder}, os.path.join(path, 'rf_arabic_metadata.pkl'))

class EnglishRFModel:
    def __init__(self):
        self.vectorizer = None
        self.rf_model = None
        self.label_encoder = Config.LABEL2ID

    def train(self, df):
        print("\n[English RF] Training...")
        df['Text_Processed'] = df['ModelText'].apply(preprocess_classical_english)
        self.vectorizer = TfidfVectorizer(max_features=5000, stop_words='english', ngram_range=(1,2))
        X = self.vectorizer.fit_transform(df['Text_Processed'])
        y = df['Bias_Mapped'].map(self.label_encoder).values

        # SMOTE
        min_samp = min(Counter(y).values())
        k = min(5, min_samp - 1) if min_samp > 1 else 1
        if min_samp > 1:
            X, y = SMOTE(random_state=42, k_neighbors=k).fit_resample(X, y)

        self.rf_model = RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced')
        self.rf_model.fit(X, y)
        print("  ✓ English RF trained.")

    def predict(self, df):
        df['Text_Processed'] = df['ModelText'].apply(preprocess_classical_english)
        X = self.vectorizer.transform(df['Text_Processed'])
        return [Config.ID2LABEL[p] for p in self.rf_model.predict(X)]

    def save(self, path):
        os.makedirs(path, exist_ok=True)
        joblib.dump(self.rf_model, os.path.join(path, 'rf_english.pkl'))
        joblib.dump(self.vectorizer, os.path.join(path, 'tfidf_english.pkl'))
        joblib.dump({'label_encoder': self.label_encoder}, os.path.join(path, 'rf_english_metadata.pkl'))


# ============================================================================
# MAIN
# ============================================================================

def main():
    np.random.seed(Config.RANDOM_STATE)

    try: main_df, iaa_df = load_and_clean_data()
    except FileNotFoundError: return

    main_df = map_labels(main_df)
    iaa_df = map_labels(iaa_df) if len(iaa_df) > 0 else iaa_df
    train_df, test_df = create_train_test_split(main_df, iaa_df)

    # Train Arabic RF
    print("\n" + "="*80 + "\nTRAINING ARABIC RANDOM FOREST\n" + "="*80)
    train_ar = prepare_text_columns(train_df, 'arabic')
    test_ar = prepare_text_columns(test_df, 'arabic')
    ar_model = ArabicRFModel()
    ar_model.load_fasttext()
    ar_model.train(train_ar)
    ar_model.save(Config.OUTPUT_DIR)

    # Train English RF
    print("\n" + "="*80 + "\nTRAINING ENGLISH RANDOM FOREST\n" + "="*80)
    train_en = prepare_text_columns(train_df, 'english')
    test_en = prepare_text_columns(test_df, 'english')
    en_model = EnglishRFModel()
    en_model.train(train_en)
    en_model.save(Config.OUTPUT_DIR)

    print(f"\nCompleted! Models saved to {Config.OUTPUT_DIR}")

if __name__ == "__main__":
    main()

Mounted at /content/drive
✓ Google Drive mounted
CLASSICAL MODELS PIPELINE (Shared Drive + Data Maximization)

[STEP 1] Loading and cleaning data...
  Loading /content/drive/MyDrive/fignews_shared_project/data/Main.xlsx...
    MAIN: 10800 rows
  Loading /content/drive/MyDrive/fignews_shared_project/data/IAA-1.xlsx...
    /content/drive/MyDrive/fignews_shared_project/data/IAA-1.xlsx: 1200 rows
  Loading /content/drive/MyDrive/fignews_shared_project/data/IAA-2.xlsx...
    /content/drive/MyDrive/fignews_shared_project/data/IAA-2.xlsx: 1200 rows
  Loading /content/drive/MyDrive/fignews_shared_project/data/IAA-3.xlsx...
  Loading /content/drive/MyDrive/fignews_shared_project/data/IAA-4.xlsx...

  Total IAA: 2400 rows

[STEP 2] Creating train/test splits...

  Applying majority vote...

  Applying majority vote...

  Training: 11760 samples
  Test: 240 samples

TRAINING ARABIC RANDOM FOREST

[Arabic RF] Loading FastText...
Downloading https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.

In [7]:
"""
FIGNEWS-2024: STANDALONE EVALUATION (CLASSICAL MODELS) - FIXED v2
=================================================================
"""

# ============================================================================
# 1. SETUP & CONFIGURATION
# ============================================================================
import os
import joblib
import re
import string
import pandas as pd
import numpy as np
import fasttext
import fasttext.util
from sklearn.metrics import classification_report, accuracy_score, f1_score
from collections import Counter
from sklearn.model_selection import train_test_split
from google.colab import drive

# Force remount
drive.mount('/content/drive', force_remount=True)

class Config:
    BASE_PATH = "/content/drive/MyDrive/fignews_shared_project/"
    MAIN_FILE = BASE_PATH + "data/Main.xlsx"
    IAA_FILES = [
        BASE_PATH + "data/IAA-1.xlsx",
        BASE_PATH + "data/IAA-2.xlsx",
        BASE_PATH + "data/IAA-3.xlsx",
        BASE_PATH + "data/IAA-4.xlsx"
    ]
    OUTPUT_DIR = BASE_PATH + "models/classical/"

    LABEL_MAP = {
        'Unbiased': 'Unbiased',
        'Biased against Palestine': 'Biased Against Palestine',
        'Biased Against Palestine': 'Biased Against Palestine',
        'Biased against Israel': 'Biased Against Israel',
        'Biased Against Israel': 'Biased Against Israel',
        'Unclear': 'Others',
        'Biased against others': 'Others',
        'Biased against both': 'Others',
        'Biased against both Palestine and Israel': 'Others',
        'Not Applicable': 'Others',
        'Others': 'Others'
    }
    TARGET_LABELS = ['Unbiased', 'Biased Against Palestine', 'Biased Against Israel', 'Others']
    LABEL2ID = {label: idx for idx, label in enumerate(TARGET_LABELS)}
    ID2LABEL = {idx: label for label, idx in LABEL2ID.items()}

    FASTTEXT_AR_MODEL = "cc.ar.300.bin"
    FASTTEXT_DIM = 300
    IAA_TRAIN_SPLIT = 0.8
    RANDOM_STATE = 42

# ============================================================================
# 2. PREPROCESSING & DATA FUNCTIONS (Fixed)
# ============================================================================
def preprocess_classical_arabic(text):
    """Extended preprocessing for Arabic."""
    if not isinstance(text, str): return ""
    # Remove URLs
    text = re.sub(r'http\S+|www\.\S+', '', text).replace(':=:', ' ')
    # Remove English chars and digits
    text = re.sub(r'[a-zA-Z]', '', text)
    text = re.sub(r'\d+', '', text)
    # Remove punctuation
    arabic_punc = '،؛؟!()[]{}"""\'\'`'
    text = text.translate(str.maketrans('', '', string.punctuation + arabic_punc))
    # Normalize Arabic
    text = re.sub(r'[إأآا]', 'ا', text)
    text = re.sub(r'ى', 'ي', text)
    text = re.sub(r'ة', 'ه', text)
    text = re.sub(r'ئ', 'ي', text)
    return re.sub(r'\s+', ' ', text).strip()

def preprocess_classical_english(text):
    """Extended preprocessing for English."""
    if not isinstance(text, str): return ""
    # Remove URLs
    text = re.sub(r'http\S+|www\.\S+', '', text).replace(':=:', ' ')
    # Remove hashtags and mentions
    text = re.sub(r'#\w+', '', text)
    text = re.sub(r'@\w+', '', text)
    # Remove digits
    text = re.sub(r'\d+', '', text)
    # Remove punctuation and lowercase
    text = text.translate(str.maketrans('', '', string.punctuation)).lower()
    return re.sub(r'\s+', ' ', text).strip()

def clean_urls_and_format(text):
    if not isinstance(text, str): return ""
    return re.sub(r'\s+', ' ', re.sub(r'http\S+|www\.\S+', '', text).replace(':=:', ' ')).strip()

def filter_valid_data(df):
    df = df.copy()
    check_cols = [c for c in ['Text', 'Arabic MT', 'English MT'] if c in df.columns]
    if not check_cols: return df
    mask = df[check_cols].apply(lambda x: x.astype(str).str.strip().str.len() > 0).any(axis=1)
    return df[mask].copy()

def load_data():
    print("Loading data to recreate test set...")
    if not os.path.exists(Config.MAIN_FILE):
        print(f"❌ Error: Cannot find {Config.MAIN_FILE}")
        return pd.DataFrame(), pd.DataFrame()

    main_df = pd.read_excel(Config.MAIN_FILE)
    main_df = main_df[main_df['Bias'].notna()]
    main_df['Bias'] = main_df['Bias'].astype(str).str.strip()

    for c in ['Text', 'Arabic MT', 'English MT']:
        if c in main_df.columns: main_df[c] = main_df[c].apply(clean_urls_and_format)
    main_df = filter_valid_data(main_df)

    iaa_dfs = []
    for f in Config.IAA_FILES:
        if os.path.exists(f):
            t = pd.read_excel(f)
            if 'Bais' in t.columns: t['Bias'] = t['Bais']
            t = t[t['Bias'].notna()]
            t['Bias'] = t['Bias'].astype(str).str.strip()
            for c in ['Text', 'Arabic MT', 'English MT']:
                if c in t.columns: t[c] = t[c].apply(clean_urls_and_format)
            iaa_dfs.append(filter_valid_data(t))
    iaa_df = pd.concat(iaa_dfs, ignore_index=True) if iaa_dfs else pd.DataFrame()
    return main_df, iaa_df

def get_test_set():
    main_df, iaa_df = load_data()
    if main_df.empty: return pd.DataFrame()

    # Map labels
    for df in [main_df, iaa_df]:
        if not df.empty:
            df['Bias_Mapped'] = df['Bias'].map(Config.LABEL_MAP).fillna('Others')

    # Majority Vote Logic
    if iaa_df.empty:
        print("Warning: No IAA files found. Using split from Main file only.")
        main_df['Text_ID'] = main_df['ID'].astype(str)
        u_ids = main_df['Text_ID'].unique()
        _, test_ids = train_test_split(u_ids, test_size=(1-Config.IAA_TRAIN_SPLIT), random_state=Config.RANDOM_STATE)
        return main_df[main_df['Text_ID'].isin(test_ids)].copy()

    iaa_df['Text_ID'] = iaa_df['ID'].astype(str) + "_" + iaa_df['Text'].str[:20]
    gold_rows = []
    for _, g in iaa_df.groupby('Text_ID'):
        maj_label = Counter(g['Bias_Mapped']).most_common(1)[0][0]
        r = g.iloc[0].copy()
        r['Bias_Mapped'] = maj_label
        gold_rows.append(r)
    iaa_collapsed = pd.DataFrame(gold_rows)

    # Split
    u_ids = iaa_df['Text_ID'].unique()
    _, test_ids = train_test_split(u_ids, test_size=(1-Config.IAA_TRAIN_SPLIT), random_state=Config.RANDOM_STATE)

    # Return Test Set
    return iaa_collapsed[iaa_collapsed['Text_ID'].isin(test_ids)].copy()

def prepare_cols(df, lang):
    df = df.copy()
    if lang == 'arabic':
        df['ModelText'] = df.apply(lambda r: r['Text'] if 'Arabic' in str(r.get('Source Language','')) else r['Arabic MT'], axis=1)
    else:
        df['ModelText'] = df.apply(lambda r: r['Text'] if 'English' in str(r.get('Source Language','')) else r['English MT'], axis=1)
    return df[df['ModelText'].str.strip().str.len() > 0]

# ============================================================================
# 3. EVALUATION LOGIC
# ============================================================================

def evaluate_arabic():
    print("\n" + "="*40 + "\nEVALUATING ARABIC MODEL\n" + "="*40)

    # 1. Load Model
    model_path = os.path.join(Config.OUTPUT_DIR, 'rf_arabic.pkl')
    if not os.path.exists(model_path): print(f"❌ Model not found: {model_path}"); return
    rf_model = joblib.load(model_path)

    # 2. Load FastText
    if not os.path.exists(Config.FASTTEXT_AR_MODEL):
         print("Downloading FastText...")
         fasttext.util.download_model('ar', if_exists='ignore')

    print("Loading FastText (this may take a moment)...")
    ft_model = fasttext.load_model(Config.FASTTEXT_AR_MODEL)
    print("✓ FastText loaded")

    # 3. Prepare Data
    test_df = get_test_set()
    if test_df.empty: print("❌ No test data found"); return

    test_ar = prepare_cols(test_df, 'arabic')

    # 4. Predict
    print(f"Predicting on {len(test_ar)} samples...")
    test_ar['Text_Processed'] = test_ar['ModelText'].apply(preprocess_classical_arabic)

    def vec(t):
        w = t.split()
        v = [ft_model.get_word_vector(x) for x in w if x.strip()]
        return np.mean(v, axis=0) if v else np.zeros(300)

    X = np.array([vec(t) for t in test_ar['Text_Processed']])
    preds_idx = rf_model.predict(X)
    preds_label = [Config.ID2LABEL[p] for p in preds_idx]

    # 5. Report
    print(classification_report(test_ar['Bias_Mapped'], preds_label, digits=4))

def evaluate_english():
    print("\n" + "="*40 + "\nEVALUATING ENGLISH MODEL\n" + "="*40)

    # 1. Load Model & Vectorizer
    model_path = os.path.join(Config.OUTPUT_DIR, 'rf_english.pkl')
    vect_path = os.path.join(Config.OUTPUT_DIR, 'tfidf_english.pkl')

    if not os.path.exists(model_path): print(f"❌ Model not found: {model_path}"); return
    rf_model = joblib.load(model_path)
    vectorizer = joblib.load(vect_path)
    print("✓ Models loaded")

    # 2. Prepare Data
    test_df = get_test_set()
    if test_df.empty: print("❌ No test data found"); return

    test_en = prepare_cols(test_df, 'english')

    # 3. Predict
    print(f"Predicting on {len(test_en)} samples...")
    test_en['Text_Processed'] = test_en['ModelText'].apply(preprocess_classical_english)
    X = vectorizer.transform(test_en['Text_Processed'])

    preds_idx = rf_model.predict(X)
    preds_label = [Config.ID2LABEL[p] for p in preds_idx]

    # 4. Report
    print(classification_report(test_en['Bias_Mapped'], preds_label, digits=4))

# ============================================================================
# MAIN
# ============================================================================
if __name__ == "__main__":
    evaluate_arabic()
    evaluate_english()

Mounted at /content/drive

EVALUATING ARABIC MODEL
Loading FastText (this may take a moment)...
✓ FastText loaded
Loading data to recreate test set...
Predicting on 240 samples...
                          precision    recall  f1-score   support

   Biased Against Israel     1.0000    1.0000    1.0000         1
Biased Against Palestine     0.8889    0.8533    0.8707        75
                  Others     0.7500    0.7059    0.7273        17
                Unbiased     0.9205    0.9456    0.9329       147

                accuracy                         0.9000       240
               macro avg     0.8899    0.8762    0.8827       240
            weighted avg     0.8989    0.9000    0.8992       240


EVALUATING ENGLISH MODEL
✓ Models loaded
Loading data to recreate test set...
Predicting on 240 samples...
                          precision    recall  f1-score   support

   Biased Against Israel     0.5000    1.0000    0.6667         1
Biased Against Palestine     0.8889    0.8533   