In [None]:
import pandas as pd
import joblib
import os
from sklearn.metrics import accuracy_score, classification_report
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

# --- File Paths and Constants ---
# Meta-Model and Vectorizer from the ensemble tuning
META_MODEL_PATH = '../models/ensemble_meta_model.pkl'
VECTORIZER_PATH = '../models/vectorizer_advanced_tuned.pkl'

# Base models (needed for their predictions)
GB_MODEL_PATH = '../models/gb_model_advanced_tuned.pkl'
# The path to your local TinyBERT model folder
TINYBERT_MODEL_PATH = '../models/tinybert_finetuned'

# Dataset to be tested
TEST_DATA_PATH = '../data/test_labeled_dataset_sampled.csv' # Using the sampled data for faster testing

def test_ensemble_model():
    """
    Loads the full ensemble pipeline, makes predictions on the pseudo-labeled
    test data, and prints the performance report.
    """
    print("\n--- Starting Ensemble Model Evaluation ---")

    # --- 1. Load the Necessary Components ---
    if not os.path.exists(META_MODEL_PATH):
        print(f"Error: The meta-model file '{META_MODEL_PATH}' was not found.")
        return
    if not os.path.exists(VECTORIZER_PATH):
        print(f"Error: The vectorizer file '{VECTORIZER_PATH}' was not found.")
        return
    if not os.path.exists(GB_MODEL_PATH):
        print(f"Error: The Gradient Boosting model '{GB_MODEL_PATH}' was not found.")
        return
    if not os.path.exists(TEST_DATA_PATH):
        print(f"Error: The test data file '{TEST_DATA_PATH}' was not found.")
        return
    if not os.path.isdir(TINYBERT_MODEL_PATH):
        print(f"Error: The TinyBERT model directory '{TINYBERT_MODEL_PATH}' was not found.")
        print("Please ensure your TinyBERT model is saved in this folder path.")
        return

    print("Loading all models and data...")
    meta_model = joblib.load(META_MODEL_PATH)
    vectorizer = joblib.load(VECTORIZER_PATH)
    gb_model = joblib.load(GB_MODEL_PATH)
    
    # TinyBERT model and tokenizer from the local directory
    tokenizer = AutoTokenizer.from_pretrained(TINYBERT_MODEL_PATH)
    tinybert_model = AutoModelForSequenceClassification.from_pretrained(TINYBERT_MODEL_PATH)
    tinybert_model.eval()

    test_df = pd.read_csv(TEST_DATA_PATH)
    print(f"Loaded {len(test_df)} reviews for testing.")

    # --- 2. Prepare the Data for the Ensemble Pipeline ---
    X_test_text = test_df['text']
    y_true = test_df['violation_type']

    # --- 3. Get Probability Predictions from the Base Models ---
    print("Generating predictions from base models...")

    # Gradient Boosting Model
    X_test_tfidf = vectorizer.transform(X_test_text)
    gb_probs = gb_model.predict_proba(X_test_tfidf)

    # TinyBERT Model
    tinybert_probs = []
    for text in X_test_text:
        # Tokenize the text and get model outputs
        inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True)
        with torch.no_grad():
            outputs = tinybert_model(**inputs)
        # Apply softmax to get probabilities
        probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1).numpy()
        tinybert_probs.append(probabilities[0])

    tinybert_probs = pd.DataFrame(tinybert_probs)

    # --- 4. Combine Probabilities and Make Final Prediction ---
    # The meta-model was trained on the concatenated probabilities
    # The columns must be in the same order as during training
    X_meta_test = pd.concat([pd.DataFrame(gb_probs), tinybert_probs], axis=1)

    print("Making final predictions with the RandomForest meta-model...")
    y_pred = meta_model.predict(X_meta_test)

    # --- 5. Evaluate and Report Results ---
    print("\n--- Evaluation Report ---")
    accuracy = accuracy_score(y_true, y_pred)
    print(f"Overall Ensemble Accuracy on New Data: {accuracy:.4f}\n")

    report = classification_report(y_true, y_pred)
    print("Full Classification Report:\n")
    print(report)

if __name__ == "__main__":
    test_ensemble_model()



--- Starting Ensemble Model Evaluation ---
Error: The TinyBERT model directory '../models/tinybert_model' was not found.
Please ensure your TinyBERT model is saved in this folder path.
