# Model Evaluation - YAKE vs YAKE+ML Comparison

This notebook evaluates the trained model and compares pure YAKE rankings with YAKE+ML combined rankings.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc
import joblib
import sys
sys.path.append('../..')
from ml.predict import load_latest_model, combine_scores

sns.set_style('whitegrid')

## Load Model and Data

In [None]:
import os

# Change to project root directory so relative paths work
os.chdir('../..')
print(f"Working directory: {os.getcwd()}")

# Load trained model
model = load_latest_model()
print(f"Model loaded: {model is not None}")

if model:
    print(f"Model type: {type(model).__name__}")

# Load feedback data
df = pd.read_csv('data/feedback.csv')
print(f"Total samples: {len(df)}")

# Prepare features
feature_cols = ['length', 'yake_score', 'f1_wfreq', 'f2_wcase', 'f3_wpos', 'f4_wrel', 'f5_wspread']
X = df[feature_cols].values
y = df['label'].values

print(f"Features shape: {X.shape}")
print(f"Labels distribution: {np.bincount(y)}")

## Model Performance Metrics

In [None]:
# Predictions and probabilities
y_pred = model.predict(X)
y_proba = model.predict_proba(X)[:, 1]

# Classification report
print("Classification Report:")
print(classification_report(y, y_pred, target_names=['Rejected', 'Approved']))

In [None]:
# Confusion matrix
cm = confusion_matrix(y, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['Rejected', 'Approved'], 
            yticklabels=['Rejected', 'Approved'])
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.tight_layout()
plt.show()

## ROC Curve

In [None]:
# ROC curve
fpr, tpr, thresholds = roc_curve(y, y_proba)
roc_auc = auc(fpr, tpr)

plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--', label='Random')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc="lower right")
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()

## Compare YAKE vs YAKE+ML Rankings

In [None]:
# Get combined scores with different alpha values
yake_scores = df['yake_score'].values

# Test different alpha values
alphas = [0.5, 0.7, 0.9, 1.0]  # 1.0 = pure YAKE

for alpha in alphas:
    if alpha == 1.0:
        final_scores = yake_scores
        label = "Pure YAKE"
    else:
        final_scores = combine_scores(yake_scores, y_proba, alpha=alpha)
        label = f"Alpha={alpha}"
    
    # Create a copy with rankings
    df_temp = df.copy()
    df_temp['final_score'] = final_scores
    df_temp['rank'] = df_temp['final_score'].rank()
    
    # Calculate how many approved keywords are in top-k
    top_k = 10
    top_keywords = df_temp.nsmallest(top_k, 'final_score')
    approved_in_top_k = top_keywords['label'].sum()
    
    print(f"\n{label}: {approved_in_top_k}/{top_k} approved keywords in top-{top_k}")

## Ranking Quality Visualization

In [None]:
# Compare ranking quality at different alpha values
results = []

for alpha in np.linspace(0, 1, 11):
    if alpha == 1.0:
        final_scores = yake_scores
    else:
        final_scores = combine_scores(yake_scores, y_proba, alpha=alpha)
    
    df_temp = df.copy()
    df_temp['final_score'] = final_scores
    
    # Calculate precision@k for different k values
    for k in [5, 10, 15, 20]:
        top_k = df_temp.nsmallest(k, 'final_score')
        precision = top_k['label'].sum() / k
        results.append({
            'alpha': alpha,
            'k': k,
            'precision': precision
        })

results_df = pd.DataFrame(results)

# Plot
plt.figure(figsize=(10, 6))
for k in [5, 10, 15, 20]:
    data = results_df[results_df['k'] == k]
    plt.plot(data['alpha'], data['precision'], marker='o', label=f'P@{k}')

plt.xlabel('Alpha (1.0 = Pure YAKE, 0.0 = Pure ML)')
plt.ylabel('Precision@k')
plt.title('Ranking Quality vs Alpha Parameter')
plt.legend()
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()

## Example Rankings Comparison

In [None]:
# Show example of ranking differences
alpha = 0.7
final_scores = combine_scores(yake_scores, y_proba, alpha=alpha)

comparison_df = df[['keyword', 'yake_score', 'label']].copy()
comparison_df['ml_prob'] = y_proba
comparison_df['final_score'] = final_scores

# Top 10 by YAKE
print("Top 10 by Pure YAKE:")
print(comparison_df.nsmallest(10, 'yake_score')[['keyword', 'yake_score', 'label']])

# Top 10 by combined score
print("\nTop 10 by YAKE+ML (alpha=0.7):")
print(comparison_df.nsmallest(10, 'final_score')[['keyword', 'final_score', 'ml_prob', 'label']])