In [1]:
# CELL TO TEST THE MODEL WITH EXAMPLES

import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import re # Ensure re is imported

# --- Global Configuration for Testing (used when loading from disk) ---
# These values MUST match those used during saving/training of the ENGLISH model
# ***** USER: PLEASE UPDATE THESE PATHS AND MODEL NAME *****
MODEL_NAME_FOR_ENGLISH_TEST = "bert-base-uncased"  # Or whatever English model you used (e.g., "roberta-base")
MAX_LEN_FOR_ENGLISH_TEST = 160              # Must be the same MAX_LEN as for training
# Construct paths based on the model name for consistency with the saving cell
_safe_model_name_for_path_en_test = MODEL_NAME_FOR_ENGLISH_TEST.replace('/', '_')
MODEL_PATH_FOR_ENGLISH_LOAD = f"save_model_en/{_safe_model_name_for_path_en_test}_coherence_final_model.bin"
TOKENIZER_PATH_FOR_ENGLISH_LOAD = f"save_model_en/{_safe_model_name_for_path_en_test}_coherence_tokenizer/"
DEVICE_FOR_ENGLISH_TEST = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# --- Text cleaning function for English prediction ---
def clean_text_for_english_prediction(text):
    text = str(text)
    text = re.sub(r'<.*?>', '', text) # Remove HTML tags
    # Keep alphanumeric, spaces, and basic punctuation relevant for English
    text = re.sub(r'[^a-zA-Z0-9\s\.\?,!\']', '', text)
    text = text.lower()
    text = re.sub(r'\s+', ' ', text).strip() # Remove extra whitespace
    return text

# --- Initialize variables for testing ---
# These will be populated by loading from disk.
loaded_english_model = None
loaded_english_tokenizer = None

print("Attempting to load the English model and tokenizer from disk.")
print(f"Using device for loading: {DEVICE_FOR_ENGLISH_TEST}")

try:
    print(f"Loading English tokenizer from: {TOKENIZER_PATH_FOR_ENGLISH_LOAD}")
    loaded_english_tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_PATH_FOR_ENGLISH_LOAD)

    print(f"Loading English model architecture: {MODEL_NAME_FOR_ENGLISH_TEST}")
    # Load the model architecture first
    loaded_english_model = AutoModelForSequenceClassification.from_pretrained(
        MODEL_NAME_FOR_ENGLISH_TEST,
        num_labels=2 # Should match the number of labels used during training (e.g., 2 for coherent/not coherent)
    )
    print(f"Loading saved model weights from: {MODEL_PATH_FOR_ENGLISH_LOAD}")
    # Load the saved weights
    loaded_english_model.load_state_dict(
        torch.load(MODEL_PATH_FOR_ENGLISH_LOAD, map_location=DEVICE_FOR_ENGLISH_TEST)
    )
    loaded_english_model.to(DEVICE_FOR_ENGLISH_TEST)
    loaded_english_model.eval() # VERY IMPORTANT: Set model to evaluation mode

    print("English model and tokenizer loaded successfully from disk and ready for inference.")

except Exception as e:
    print(f"Error loading model or tokenizer: {e}")
    print("Please ensure MODEL_NAME_FOR_ENGLISH_TEST, MODEL_PATH_FOR_ENGLISH_LOAD, and TOKENIZER_PATH_FOR_ENGLISH_LOAD are correct.")
    print("And that the model and tokenizer were saved correctly in the specified paths.")
    loaded_english_model = None # Ensure it's None if loading failed
    loaded_english_tokenizer = None



  from .autonotebook import tqdm as notebook_tqdm


Attempting to load the English model and tokenizer from disk.
Using device for loading: cpu
Loading English tokenizer from: save_model_en/bert-base-uncased_coherence_tokenizer/
Loading English model architecture: bert-base-uncased


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading saved model weights from: save_model_en/bert-base-uncased_coherence_final_model.bin
English model and tokenizer loaded successfully from disk and ready for inference.


In [2]:
# --- Adapted Prediction Function (for English BERT-like models) ---
def predict_coherence(question, answer, model, tokenizer, device, max_len, clean_fn):
    """
    Predicts the coherence between a question and an answer using the provided model.

    Args:
        question (str): The input question.
        answer (str): The input answer.
        model: The loaded Hugging Face model for sequence classification.
        tokenizer: The loaded Hugging Face tokenizer.
        device: The torch device (e.g., 'cuda' or 'cpu').
        max_len (int): The maximum sequence length for tokenization.
        clean_fn (function): The text cleaning function to apply.

    Returns:
        tuple: (predicted_class_index, probability_score)
               predicted_class_index (int): 0 for not coherent, 1 for coherent.
               probability_score (float): The confidence score for the predicted class.
    """
    if model is None or tokenizer is None:
        print("Model or tokenizer not available for prediction.")
        return None, None

    cleaned_question = clean_fn(question)
    cleaned_answer = clean_fn(answer)

    encoding = tokenizer.encode_plus(
        cleaned_question,
        cleaned_answer,
        add_special_tokens=True,
        max_length=max_len,
        return_token_type_ids=True,  # MODIFIED: Set to True for BERT-like models
        padding='max_length',
        truncation=True,
        return_attention_mask=True,
        return_tensors='pt',
    )

    input_ids = encoding['input_ids'].to(device)
    attention_mask = encoding['attention_mask'].to(device)
    # MODIFIED: Get token_type_ids if the tokenizer provides them
    token_type_ids = encoding.get('token_type_ids')
    if token_type_ids is not None:
        token_type_ids = token_type_ids.to(device)


    with torch.no_grad(): # Important for inference
        # MODIFIED: Pass token_type_ids to the model if they exist
        model_inputs = {
            'input_ids': input_ids,
            'attention_mask': attention_mask
        }
        if token_type_ids is not None:
            model_inputs['token_type_ids'] = token_type_ids

        outputs = model(**model_inputs)

        logits = outputs.logits
        probs = torch.softmax(logits, dim=1)
        # Get the predicted class index and its probability
        confidence_scores_for_all_classes, predicted_indices = torch.max(probs, dim=1)

        prediction_idx = predicted_indices.cpu().item()
        probability_score = confidence_scores_for_all_classes.cpu().item()

    return prediction_idx, probability_score


In [3]:

if loaded_english_model and loaded_english_tokenizer: # Ensure the English model and tokenizer are available
    english_test_samples = [
        {"question": "Where can I find the milk?", "answer": "It's next to the cheese in the dairy section."},
        {"question": "Do you have gluten-free pasta?", "answer": "Yes, of course, it's in the special diet aisle, next to the organic products."},
        {"question": "I'm looking for ground coffee.", "answer": "The sky is blue and the birds are singing."}, # Incoherent
        {"question": "Where are the canned tomatoes?", "answer": "Have you thought about checking the weather forecast?"}, # Incoherent
        {"question": "What is the price of apples?", "answer": "They are $2.50 per pound this week."},
        {"question": "Do you have whole wheat bread?", "answer": "No, sorry, we are out of whole wheat bread today."},
        {"question": "I'd like some AA batteries.", "answer": "Batteries are usually found near the checkouts or in the electronics aisle."},
        {"question": "What time does the store close?", "answer": "We close at 9 PM tonight."},
        {"question": "Can you recommend a good red wine?", "answer": "I had toast for breakfast."}, # Incoherent
    ]

    print("\n--- Testing with specific English examples ---")
    for i, sample in enumerate(english_test_samples):
        q = sample["question"]
        a = sample["answer"]

        print(f"\nExample {i+1}:")
        print(f"  Question: \"{q}\"")
        print(f"  Answer:   \"{a}\"")

        # Call the adapted predict_coherence function
        # Ensure all arguments are for the English setup
        pred_label, pred_prob = predict_coherence(
            question=q,
            answer=a,
            model=loaded_english_model,              # Your loaded English model
            tokenizer=loaded_english_tokenizer,      # Your loaded English tokenizer
            device=DEVICE_FOR_ENGLISH_TEST,          # Device for English model
            max_len=MAX_LEN_FOR_ENGLISH_TEST,        # Max length for English model
            clean_fn=clean_text_for_english_prediction # English cleaning function
        )

        if pred_label is not None: # Check if prediction was successful
            coherence_status = "Coherent" if pred_label == 1 else "Not Coherent"
            print(f"  Prediction: {coherence_status} (Label: {pred_label}, Confidence: {pred_prob:.4f})")
        else:
            print("  Prediction failed (model/tokenizer might not be loaded correctly).")
else:
    print("\nEnglish model or tokenizer not loaded. Cannot run test examples.")


--- Testing with specific English examples ---

Example 1:
  Question: "Where can I find the milk?"
  Answer:   "It's next to the cheese in the dairy section."
  Prediction: Coherent (Label: 1, Confidence: 0.9662)

Example 2:
  Question: "Do you have gluten-free pasta?"
  Answer:   "Yes, of course, it's in the special diet aisle, next to the organic products."
  Prediction: Coherent (Label: 1, Confidence: 0.9650)

Example 3:
  Question: "I'm looking for ground coffee."
  Answer:   "The sky is blue and the birds are singing."
  Prediction: Not Coherent (Label: 0, Confidence: 0.9527)

Example 4:
  Question: "Where are the canned tomatoes?"
  Answer:   "Have you thought about checking the weather forecast?"
  Prediction: Not Coherent (Label: 0, Confidence: 0.8576)

Example 5:
  Question: "What is the price of apples?"
  Answer:   "They are $2.50 per pound this week."
  Prediction: Coherent (Label: 1, Confidence: 0.9400)

Example 6:
  Question: "Do you have whole wheat bread?"
  Answer:  