In [None]:
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output
import sys
import os

In [11]:
project_root = os.path.abspath("..")
if project_root not in sys.path:
    sys.path.insert(0, project_root)

In [12]:
from models.tfidf_cosine import TFIDFCosineSimilarity
from models.okapi_bm25 import BM25Similarity
from models.jaccard import JaccardSimilarity
from models.sbert import SentenceBERTSimilarity
from models.bertscore import BERTScoreSimilarity
from evaluation.metrics import evaluate_model
from evaluation.analyzer import generate_analysis_log

In [13]:
# Set style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

In [15]:
def load_dataset():
    """Load the Philippine legal dataset"""
    with open('../dataset/philippine-legal-dataset.json', 'r', encoding='utf-8') as f:
        return json.load(f)

In [16]:
def initialize_models():
    """Initialize all similarity models"""
    return {
        'TF-IDF Cosine': TFIDFCosineSimilarity(use_dimensionality_reduction=True, n_components=50),
        'BM25': BM25Similarity(),
        'Jaccard': JaccardSimilarity(),
        'Sentence BERT': SentenceBERTSimilarity(model_name='all-mpnet-base-v2'),
        'BERTScore': BERTScoreSimilarity(model_type='distilbert-base-uncased')
    }

In [17]:
def create_interactive_demo():
    """Interactive widgets for similarity testing"""
    
    # Load data and models
    dataset = load_dataset()
    models = initialize_models()
    
    # Create widgets
    query_widget = widgets.Textarea(
        value="What is the function of the Presidential Electoral Tribunal?",
        placeholder="Enter your query here...",
        description="Query:",
        layout=widgets.Layout(width='100%', height='80px')
    )
    
    model_widget = widgets.Dropdown(
        options=list(models.keys()),
        value='Sentence BERT',
        description="Model:",
    )
    
    example_widget = widgets.Dropdown(
        options=[(f"Example {i+1}: {item['query'][:50]}...", i) 
                for i, item in enumerate(dataset[:10])],
        description="Examples:",
    )
    
    def load_example(change):
        if change['new'] is not None:
            example = dataset[change['new']]
            query_widget.value = example['query']
    
    example_widget.observe(load_example, names='value')
    
    def run_similarity(b):
        with output:
            clear_output(wait=True)
            
            query = query_widget.value
            model_name = model_widget.value
            model = models[model_name]
            
            # Find matching example or use custom passages
            example = None
            for item in dataset:
                if item['query'] == query:
                    example = item
                    break
            
            if example:
                passages = [p['text'] for p in example['passages']]
                labels = [p['relevant'] for p in example['passages']]
                tags = [p['label'] for p in example['passages']]
            else:
                # Use first example's passages for demo
                example = dataset[0]
                passages = [p['text'] for p in example['passages']]
                labels = [p['relevant'] for p in example['passages']]
                tags = [p['label'] for p in example['passages']]
            
            # Calculate similarities
            scores = model.rank(query, passages)
            sorted_indices = np.argsort(scores)[::-1]
            
            # Create results DataFrame
            results_df = pd.DataFrame({
                'Rank': range(1, len(passages) + 1),
                'Passage': [passages[i] for i in sorted_indices],
                'Score': [scores[i] for i in sorted_indices],
                'Relevant': [labels[i] for i in sorted_indices],
                'Label': [tags[i] for i in sorted_indices]
            })
            
            # Display results
            print(f"Query: {query}")
            print(f"Model: {model_name}")
            print("=" * 80)
            
            # Show top 3 results
            for i in range(min(3, len(results_df))):
                row = results_df.iloc[i]
                status = "✅ Relevant" if row['Relevant'] else "❌ Irrelevant"
                print(f"\nRank {row['Rank']}: {status} (Score: {row['Score']:.4f})")
                print(f"Label: {row['Label']}")
                print(f"Passage: {row['Passage'][:200]}...")
            
            # Create visualization
            fig = make_subplots(
                rows=2, cols=2,
                subplot_titles=('Similarity Scores', 'Relevance Distribution', 
                              'Score vs Relevance', 'Label Distribution'),
                specs=[[{"secondary_y": False}, {"secondary_y": False}],
                       [{"secondary_y": False}, {"secondary_y": False}]]
            )
            
            # Bar chart of scores
            colors = ['green' if rel else 'red' for rel in results_df['Relevant']]
            fig.add_trace(
                go.Bar(x=results_df['Rank'], y=results_df['Score'], 
                      marker_color=colors, name='Similarity Score'),
                row=1, col=1
            )
            
            # Relevance pie chart
            relevance_counts = results_df['Relevant'].value_counts()
            fig.add_trace(
                go.Pie(labels=['Irrelevant', 'Relevant'], 
                      values=[relevance_counts.get(False, 0), relevance_counts.get(True, 0)],
                      name='Relevance'),
                row=1, col=2
            )
            
            # Scatter plot: Score vs Relevance
            fig.add_trace(
                go.Scatter(x=results_df['Score'], y=results_df['Relevant'].astype(int),
                          mode='markers', marker=dict(size=10, color=colors),
                          name='Score vs Relevance'),
                row=2, col=1
            )
            
            # Label distribution
            label_counts = results_df['Label'].value_counts()
            fig.add_trace(
                go.Bar(x=label_counts.index, y=label_counts.values,
                      name='Label Distribution'),
                row=2, col=2
            )
            
            fig.update_layout(height=800, showlegend=False, 
                            title_text=f"Similarity Analysis: {model_name}")
            fig.show()
    
    button = widgets.Button(description="Run Similarity Analysis", 
                           button_style='primary')
    button.on_click(run_similarity)
    
    output = widgets.Output()
    
    # Display widgets
    display(widgets.VBox([
        widgets.HTML("<h2>Interactive Text Similarity Demo</h2>"),
        example_widget,
        query_widget,
        model_widget,
        button,
        output
    ]))

In [19]:
def performance_comparison():
    """Compare performance of all models"""
    
    dataset = load_dataset()
    models = initialize_models()
    
    print("Running performance comparison...")
    results = {}
    
    for name, model in models.items():
        print(f"Evaluating {name}...")
        metrics = evaluate_model(model, dataset)
        results[name] = metrics
    
    # Create comparison DataFrame
    df = pd.DataFrame(results).T
    
    # Display results table
    display(HTML("<h3>Performance Comparison</h3>"))
    display(df.round(4))
    
    # Create comparison visualizations
    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=('Precision@1', 'Precision@3', 'Recall@3', 'MRR'),
    )
    
    metrics_to_plot = ['Precision@1', 'Precision@3', 'Recall@3', 'MRR']
    positions = [(1,1), (1,2), (2,1), (2,2)]
    
    for metric, (row, col) in zip(metrics_to_plot, positions):
        fig.add_trace(
            go.Bar(x=df.index, y=df[metric], name=metric),
            row=row, col=col
        )
    
    fig.update_layout(height=600, showlegend=False, 
                     title_text="Model Performance Comparison")
    fig.show()
    
    # Runtime and memory comparison
    fig2 = make_subplots(
        rows=1, cols=2,
        subplot_titles=('Runtime (seconds)', 'Peak Memory (KB)'),
    )
    
    fig2.add_trace(
        go.Bar(x=df.index, y=df['Runtime (in seconds)'], name='Runtime'),
        row=1, col=1
    )
    
    fig2.add_trace(
        go.Bar(x=df.index, y=df['Peak Memory (in KB)'], name='Memory'),
        row=1, col=2
    )
    
    fig2.update_layout(height=400, showlegend=False,
                      title_text="Resource Usage Comparison")
    fig2.show()
    
    return df

In [20]:
def live_monitoring_dashboard():
    """Live monitoring dashboard"""
    
    dataset = load_dataset()
    models = initialize_models()
    
    # Create monitoring widgets
    model_select = widgets.SelectMultiple(
        options=list(models.keys()),
        value=['Sentence BERT', 'BM25'],
        description="Models:",
        rows=5
    )
    
    metric_select = widgets.Dropdown(
        options=['Precision@1', 'Precision@3', 'Recall@3', 'MRR'],
        value='Precision@1',
        description="Metric:"
    )
    
    def update_monitoring(change):
        with monitoring_output:
            clear_output(wait=True)
            
            selected_models = model_select.value
            selected_metric = metric_select.value
            
            if not selected_models:
                print("Please select at least one model")
                return
            
            # Evaluate selected models
            results = {}
            for model_name in selected_models:
                model = models[model_name]
                metrics = evaluate_model(model, dataset)
                results[model_name] = metrics
            
            # Create real-time visualization
            df = pd.DataFrame(results).T
            
            fig = go.Figure()
            fig.add_trace(go.Bar(
                x=df.index,
                y=df[selected_metric],
                text=df[selected_metric].round(4),
                textposition='auto',
            ))
            
            fig.update_layout(
                title=f"Live Performance Monitoring: {selected_metric}",
                xaxis_title="Models",
                yaxis_title=selected_metric,
                height=400
            )
            
            fig.show()
            
            # Show detailed metrics table
            display(HTML(f"<h4>Detailed Metrics</h4>"))
            display(df.round(4))
    
    model_select.observe(update_monitoring, names='value')
    metric_select.observe(update_monitoring, names='value')
    
    monitoring_output = widgets.Output()
    
    display(widgets.VBox([
        widgets.HTML("<h3>Live Performance Monitoring</h3>"),
        widgets.HBox([model_select, metric_select]),
        monitoring_output
    ]))
    
    # Initial update
    update_monitoring(None)

In [None]:
if __name__ == "__main__":
    print("Text Similarity Evaluation Demo")
    print("=" * 50)
    
    # Interactive demo
    create_interactive_demo()
    
    # Performance comparison
    performance_comparison()
    
    # Live monitoring
    live_monitoring_dashboard()

In [None]:
'hi'