# RankNet Search Demo

This notebook demonstrates the RankNet search engine capabilities, from initialization to searching and evaluation.

In [None]:
# Add parent directory to path
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath('.'))))

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime

# Import our search engine
from src.engine import SearchEngine
from src.model import Document, Query
from src.utils import print_search_results, plot_training_history, plot_feature_importance

## 1. Initialize Search Engine

In [None]:
# Initialize search engine
engine = SearchEngine(use_ranknet=True)
print("Search engine initialized!")

## 2. Load or Create Data

In [None]:
# Option 1: Create sample data
# engine.create_sample_data(n_docs=200)

# Option 2: Load real data (uncomment to use)
engine.load_data('data/raw/medium_data.csv')

print(f"Total documents: {len(engine.documents)}")

## 3. Explore Documents

In [None]:
# Show first 5 documents
for i, (doc_id, doc) in enumerate(list(engine.documents.items())[:5]):
    print(f"Document {i+1}:")
    print(f"  Title: {doc.title}")
    print(f"  Content: {doc.content[:100]}...")
    print(f"  Author: {doc.author}")
    print(f"  Tags: {doc.tags}")
    print("")

## 4. Basic Search (Before Training)

In [None]:
# Try a search query
query = "machine learning tutorial"
results = engine.search(query)
print_search_results(results, query)

## 5. Train RankNet Model

In [None]:
# Train RankNet model
history = engine.train(epochs=30, batch_size=32)

# Plot training history
plot_training_history(history)

## 6. Search with RankNet

In [None]:
# Try the same query again
results_ranknet = engine.search(query)
print_search_results(results_ranknet, query)

## 7. Compare Results

In [None]:
# Disable RankNet to compare with basic search
engine_basic = SearchEngine(use_ranknet=False)
engine_basic.documents = engine.documents  # Share same documents
engine_basic.document_index = engine.document_index
engine_basic.corpus_stats = engine.corpus_stats

basic_results = engine_basic.search(query)

# Print comparison
print("===== Basic Search Results =====")
for i, res in enumerate(basic_results[:5]):
    print(f"{i+1}. {res['title']} (Score: {res['score']:.4f})")

print("\n===== RankNet Search Results =====")
for i, res in enumerate(results_ranknet[:5]):
    print(f"{i+1}. {res['title']} (Score: {res['score']:.4f})")

## 8. Evaluate Performance

In [None]:
# Evaluate search performance
metrics = engine.evaluate()
print(f"NDCG@10: {metrics['ndcg@10']:.4f}")
print(f"Number of queries: {metrics['num_queries']}")

## 9. Save Model

In [None]:
# Save the trained model
engine.save("data/models/")
print("Model saved to data/models/")

## 10. Test Different Queries

In [None]:
# Test different queries
test_queries = [
    "python programming",
    "deep learning neural networks",
    "data science tutorial"
]

for query in test_queries:
    print(f"\n\nQuery: {query}")
    results = engine.search(query, top_k=3)
    print_search_results(results)