# Model Testing and Evaluation

This notebook demonstrates testing and evaluating sentiment analysis models for both Vietnamese and English.

In [None]:
import sys
import os
import pandas as pd
import numpy as np
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report

# Add project root to path
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.append(project_root)

from src.config import Config
from src.models.model_predictor import SentimentPredictor
from src.data.preprocessor import DataPreprocessor
from src.features.feature_engineering import FeatureExtractor

In [None]:
def check_model_exists(language, config):
    """Check if trained model exists for given language"""
    model_path = os.path.join(config.DATA_DIR, "models", f"{language}_sentiment_model.pkl")
    return os.path.exists(model_path)

def load_model_components(language):
    """Load model components if model exists"""
    config = Config()
    
    if not check_model_exists(language, config):
        print(f"No trained model found for {language}")
        return None, None, None
        
    try:
        predictor = SentimentPredictor(language, config)
        preprocessor = DataPreprocessor(language, config)
        feature_extractor = FeatureExtractor(language, config)
        print(f"Successfully loaded model components for {language}")
        return predictor, preprocessor, feature_extractor
    except Exception as e:
        print(f"Error loading model for {language}: {str(e)}")
        return None, None, None

# Load available models
available_models = {}
for lang in ['vi', 'en']:
    components = load_model_components(lang)
    if all(components):
        available_models[lang] = {
            'predictor': components[0],
            'preprocessor': components[1],
            'feature_extractor': components[2]
        }

if not available_models:
    print("No models available for testing. Please train models first.")

## Test Individual Samples

In [None]:
def predict_sentiment(text, language='vi'):
    """Predict sentiment for a single text"""
    if language not in available_models:
        print(f"No model available for {language}")
        return None
    
    components = available_models[language]
    
    # Create DataFrame with single text
    df = pd.DataFrame({'text': [text]})
    
    # Process and predict
    try:
        processed_df = components['preprocessor'].preprocess(df)
        features = components['feature_extractor'].extract_features(processed_df['cleaned_text'])
        prediction = components['predictor'].predict(features)[0]
        probabilities = components['predictor'].predict_proba(features)[0]
        detailed_emotions = components['predictor'].predict_detailed_emotion(features)[0]
        
        sentiment_map = {0: 'Negative', 1: 'Neutral', 2: 'Positive'}
        
        print(f"Text: {text}")
        print(f"Sentiment: {sentiment_map[prediction]} (confidence: {max(probabilities):.2f})")
        print(f"Detailed emotion: {detailed_emotions['detailed_emotion']}")
        return prediction, probabilities, detailed_emotions
    except Exception as e:
        print(f"Error processing text: {str(e)}")
        return None

# Test samples for available languages
test_samples = {
    'vi': [
        "Sản phẩm tuyệt vời, rất đáng tiền",
        "Dịch vụ quá tệ, không bao giờ quay lại",
        "Tạm được, không tốt không xấu"
    ],
    'en': [
        "This product is amazing, totally worth it!",
        "Terrible service, never coming back",
        "It's okay, nothing special"
    ]
}

for lang in available_models:
    print(f"\nTesting {lang.upper()} model:")
    print("-" * 50)
    for sample in test_samples[lang]:
        predict_sentiment(sample, lang)
        print("-" * 30)

## Batch Testing

In [None]:
def evaluate_model(test_data, language='vi'):
    """Evaluate model on test dataset"""
    if language not in available_models:
        print(f"No model available for {language}")
        return None
    
    components = available_models[language]
    
    try:
        # Preprocess test data
        processed_df = components['preprocessor'].preprocess(test_data)
        features = components['feature_extractor'].extract_features(processed_df['cleaned_text'])
        
        # Get predictions
        predictions = components['predictor'].predict(features)
        probabilities = components['predictor'].predict_proba(features)
        
        # Calculate metrics
        conf_matrix = confusion_matrix(processed_df['label'], predictions)
        class_report = classification_report(processed_df['label'], predictions)
        
        # Plot confusion matrix
        plt.figure(figsize=(8, 6))
        sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
        plt.title(f'Confusion Matrix - {language.upper()}')
        plt.ylabel('True Label')
        plt.xlabel('Predicted Label')
        plt.show()
        
        print("\nClassification Report:")
        print(class_report)
        
        return predictions, probabilities, conf_matrix, class_report
    except Exception as e:
        print(f"Error evaluating model: {str(e)}")
        return None

# Evaluate available models
for lang in available_models:
    print(f"\nEvaluating {lang.upper()} model:")
    print("-" * 50)
    
    try:
        test_data = pd.read_csv(os.path.join(project_root, 'data', 'processed', f'{lang}_processed_data.csv'))
        evaluate_model(test_data, lang)
    except Exception as e:
        print(f"Error loading test data for {lang}: {str(e)}")

## Error Analysis

In [None]:
def analyze_errors(test_data, predictions, probabilities, language='vi'):
    df = test_data.copy()
    df['predicted'] = predictions
    df['confidence'] = np.max(probabilities, axis=1)
    
    # Find misclassified samples
    errors = df[df['label'] != df['predicted']].copy()
    errors['confidence'] = errors['confidence'].round(3)
    
    print(f"Total errors: {len(errors)}")
    print("\nSample errors with highest confidence:")
    print(errors.sort_values('confidence', ascending=False)[['text', 'label', 'predicted', 'confidence']].head())
    
    # Plot confidence distribution
    plt.figure(figsize=(10, 5))
    plt.hist(errors['confidence'], bins=20)
    plt.title('Confidence Distribution of Errors')
    plt.xlabel('Confidence')
    plt.ylabel('Count')
    plt.show()
    
    return errors

# Analyze errors for both languages
print("Vietnamese Error Analysis:")
vi_errors = analyze_errors(vi_test_data, vi_results[0], vi_results[1], 'vi')

print("\nEnglish Error Analysis:")
en_errors = analyze_errors(en_test_data, en_results[0], en_results[1], 'en')