In [15]:
import sys
import os
import numpy as np
from sentence_transformers import SentenceTransformer

# Add the project root to Python path and change to parent directory
project_root = '/home/ubuntu/ilab_stuff/fine-tuned-hybrid-rag'
os.chdir(project_root)
sys.path.append(project_root)

from retrieval.hybrid_retriever import load_retrieval_components, hybrid_retrieve

print("Imports successful")
print(f"Current working directory: {os.getcwd()}")

Imports successful
Current working directory: /home/ubuntu/ilab_stuff/fine-tuned-hybrid-rag


In [16]:
# Load base components (uses base model and embeddings)
print("Loading base retrieval components...")
base_components = load_retrieval_components()

print(f"Loaded {len(base_components['documents'])} documents")
print(f"Dense embeddings shape: {base_components['dense_embeddings'].shape}")
print(f"TF-IDF vocabulary size: {len(base_components['tfidf_vectorizer'].vocabulary_)}")
print(f"Base model: {base_components['model_name']}")
print(f"Device: {base_components['device']}")

Loading base retrieval components...
Loading hybrid retrieval components...
Loading dense embeddings from embeddings/dense.npy...
Loaded dense embeddings shape: (889289, 384)
Loading TF-IDF vectorizer from embeddings/tfidf_vectorizer.pkl...
TF-IDF vocabulary size: 10000
Loading documents from data/processed_docs.jsonl...


Loading documents: 889289it [00:29, 29726.11it/s]


Loaded 889289 documents
Loading sentence transformer model: sentence-transformers/all-MiniLM-L6-v2
Using device: cuda
All components loaded successfully!
Loaded 889289 documents
Dense embeddings shape: (889289, 384)
TF-IDF vocabulary size: 10000
Base model: sentence-transformers/all-MiniLM-L6-v2
Device: cuda


In [20]:
# Example query - using metadata-rich format that matches evaluation
query = "what is machine learning"

print(f"Query: '{query}'")
print(f"Searching with base model...")

# Retrieve with base model
base_results = hybrid_retrieve(
    query=query,
    components=base_components,
    top_k=5
)

print(f"\n=== BASE MODEL RESULTS (Top 5) ===")
for i, doc in enumerate(base_results):
    print(f"\n{i+1}. Title: {doc['metadata']['title'][:80]}...")
    print(f"   Final Score: {doc['scores']['final_score']:.4f}")
    print(f"   Dense: {doc['scores']['dense_score']:.4f} | Sparse: {doc['scores']['sparse_score']:.4f} | Boost: {doc['scores']['boost_score']:.4f}")
    if 'year' in doc['metadata']:
        print(f"   Year: {doc['metadata']['year']} | Venue: {doc['metadata'].get('venue', 'N/A')}")

Query: 'what is machine learning'
Searching with base model...
Retrieving top-5 documents for query: 'what is machine learning...'
Encoding query...
Computing dense similarities (top-1000)...
Computing sparse similarities...
Applying metadata boosting...
Combining scores...
Retrieved 5 documents

=== BASE MODEL RESULTS (Top 5) ===

1. Title: Will Skynet Need a Librarian? A Literature Review of Machine Learning–Based Data...
   Final Score: 0.7191
   Dense: 0.6086 | Sparse: 0.8464 | Boost: 1.0000
   Year: 2018 | Venue: 

2. Title: The papers of this issue on machine learning: editorial...
   Final Score: 0.6976
   Dense: 0.6380 | Sparse: 0.7160 | Boost: 1.0000
   Year: 1987 | Venue: Comput. Intell.

3. Title: Role of Intelligent Machines learning for the Successful Implementation of Busin...
   Final Score: 0.6872
   Dense: 0.6116 | Sparse: 0.7342 | Boost: 1.0000
   Year: 2019 | Venue: 

4. Title: Machine Learning for IoT...
   Final Score: 0.6871
   Dense: 0.6218 | Sparse: 0.7134 | Boo

In [18]:
# Load fine-tuned components
print("Loading fine-tuned components...")

# Start with base components
ft_components = load_retrieval_components()

# Replace with fine-tuned model and embeddings
config = ft_components['config']
ft_model_path = config['finetune']['output_path']
ft_embeddings_path = config['embeddings']['dense_finetuned_path']

print(f"Loading fine-tuned model from: {ft_model_path}")
print(f"Loading fine-tuned embeddings from: {ft_embeddings_path}")

# Load fine-tuned model
ft_components['sentence_model'] = SentenceTransformer(ft_model_path)

# Load fine-tuned embeddings
ft_components['dense_embeddings'] = np.load(ft_embeddings_path)

print(f"Fine-tuned model loaded")
print(f"Fine-tuned embeddings shape: {ft_components['dense_embeddings'].shape}")
print(f"Ready for improved retrieval!")

Loading fine-tuned components...
Loading hybrid retrieval components...
Loading dense embeddings from embeddings/dense.npy...
Loaded dense embeddings shape: (889289, 384)
Loading TF-IDF vectorizer from embeddings/tfidf_vectorizer.pkl...
TF-IDF vocabulary size: 10000
Loading documents from data/processed_docs.jsonl...


Loading documents: 889289it [00:44, 20102.11it/s]


Loaded 889289 documents
Loading sentence transformer model: sentence-transformers/all-MiniLM-L6-v2
Using device: cuda
All components loaded successfully!
Loading fine-tuned model from: finetune/model/
Loading fine-tuned embeddings from: embeddings/dense_finetuned.npy
Fine-tuned model loaded
Fine-tuned embeddings shape: (889289, 384)
Ready for improved retrieval!


In [21]:
# Same query with fine-tuned model
query = "what is machine learning"
print(f"Query: '{query}'")
print(f"Searching with fine-tuned model...")

ft_results = hybrid_retrieve(
    query=query,
    components=ft_components,
    top_k=100
)

print(f"\n=== FINE-TUNED MODEL RESULTS (Top 5) ===")
for i, doc in enumerate(ft_results):
    print(f"\n{i+1}. Title: {doc['metadata']['title'][:80]}...")
    print(f"   Final Score: {doc['scores']['final_score']:.4f}")
    print(f"   Dense: {doc['scores']['dense_score']:.4f} | Sparse: {doc['scores']['sparse_score']:.4f} | Boost: {doc['scores']['boost_score']:.4f}")
    if 'year' in doc['metadata']:
        print(f"   Year: {doc['metadata']['year']} | Venue: {doc['metadata'].get('venue', 'N/A')}")

Query: 'what is machine learning'
Searching with fine-tuned model...
Retrieving top-100 documents for query: 'what is machine learning...'
Encoding query...
Computing dense similarities (top-1000)...
Computing sparse similarities...
Applying metadata boosting...
Combining scores...
Retrieved 100 documents

=== FINE-TUNED MODEL RESULTS (Top 5) ===

1. Title: Machine Learning for IoT...
   Final Score: 0.7834
   Dense: 0.7823 | Sparse: 0.7134 | Boost: 1.0000
   Year: 2020 | Venue: 

2. Title: Machine Learning Techniques for Wireless-Powered Ambient Backscatter Communicati...
   Final Score: 0.7776
   Dense: 0.7479 | Sparse: 0.7630 | Boost: 1.0000
   Year: 2020 | Venue: Convergence of Artificial Intelligence and the Internet of Things

3. Title: Machine learning in the optimization of robotics in the operative field...
   Final Score: 0.7762
   Dense: 0.7648 | Sparse: 0.7246 | Boost: 1.0000
   Year: 2020 | Venue: Current opinion in urology

4. Title: Machine Learning Applications to Resti

In [22]:
# Example 1: Hard filters (exclude unwanted documents)
hard_filters = {
    "min_year": 2018,  # Only papers from 2018 onwards
    "excluded_venues": ["workshop"]  # Exclude workshop papers
}

print("=== QUERY WITH HARD FILTERS ===")
print(f"Filters: Only papers from 2018+, exclude workshops")

filtered_results = hybrid_retrieve(
    query=query,
    components=ft_components,
    user_filters=hard_filters,
    top_k=5
)

print(f"\nResults with hard filters:")
for i, doc in enumerate(filtered_results):
    year = doc['metadata'].get('year', 'N/A')
    venue = doc['metadata'].get('venue', 'N/A')
    print(f"{i+1}. {doc['metadata']['title'][:60]}...")
    print(f"   Year: {year} | Venue: {venue} | Score: {doc['scores']['final_score']:.4f}")

=== QUERY WITH HARD FILTERS ===
Filters: Only papers from 2018+, exclude workshops
Retrieving top-5 documents for query: 'what is machine learning...'
Encoding query...
Computing dense similarities (top-1000)...
Applying hard filters...
334 candidates after filtering
Computing sparse similarities...
Applying metadata boosting...
Combining scores...
Retrieved 5 documents

Results with hard filters:
1. Machine Learning for IoT...
   Year: 2020 | Venue:  | Score: 0.7834
2. Machine Learning Techniques for Wireless-Powered Ambient Bac...
   Year: 2020 | Venue: Convergence of Artificial Intelligence and the Internet of Things | Score: 0.7776
3. Machine learning in the optimization of robotics in the oper...
   Year: 2020 | Venue: Current opinion in urology | Score: 0.7762
4. Machine Learning for Mathematical Software...
   Year: 2019 | Venue:  | Score: 0.7602
5. Machine Learning Algorithms for Stratigraphy Classification ...
   Year: 2019 | Venue:  | Score: 0.7576


In [23]:
# Example 2: Soft boosting (prefer certain documents)
boost_filters = {
    "venue": "NIPS",  # Boost NIPS papers
    "field": "machine learning",  # Boost ML papers
    "year_after": 2019  # Boost recent papers
}

print("\n=== QUERY WITH SOFT BOOSTING ===")
print(f"Boosting: NIPS papers, ML field, recent papers (2019+)")

boosted_results = hybrid_retrieve(
    query=query,
    components=ft_components,
    user_filters=boost_filters,
    top_k=5
)

print(f"\nResults with boosting:")
for i, doc in enumerate(boosted_results):
    year = doc['metadata'].get('year', 'N/A')
    venue = doc['metadata'].get('venue', 'N/A')
    fields = doc['metadata'].get('fieldsOfStudy', [])
    print(f"{i+1}. {doc['metadata']['title'][:60]}...")
    print(f"   Year: {year} | Venue: {venue}")
    print(f"   Fields: {fields[:2]}...")  # Show first 2 fields
    print(f"   Score: {doc['scores']['final_score']:.4f} (Boost: {doc['scores']['boost_score']:.4f})")


=== QUERY WITH SOFT BOOSTING ===
Boosting: NIPS papers, ML field, recent papers (2019+)
Retrieving top-5 documents for query: 'what is machine learning...'
Encoding query...
Computing dense similarities (top-1000)...
Computing sparse similarities...
Applying metadata boosting...
Combining scores...
Retrieved 5 documents

Results with boosting:
1. Machine Learning for IoT...
   Year: 2020 | Venue: 
   Fields: ['Computer Science']...
   Score: 0.7854 (Boost: 1.0200)
2. Machine Learning Techniques for Wireless-Powered Ambient Bac...
   Year: 2020 | Venue: Convergence of Artificial Intelligence and the Internet of Things
   Fields: ['Computer Science']...
   Score: 0.7796 (Boost: 1.0200)
3. Machine learning in the optimization of robotics in the oper...
   Year: 2020 | Venue: Current opinion in urology
   Fields: ['Medicine']...
   Score: 0.7782 (Boost: 1.0200)
4. Machine Learning Applications to Resting-State Functional MR...
   Year: 2017 | Venue: Neuroimaging clinics of North America
 

In [24]:
# Analyze scoring components without metadata boosting 

def analyze_scores(results, model_name):
    print(f"\n=== SCORE ANALYSIS: {model_name} ===")
    
    dense_scores = [doc['scores']['dense_score'] for doc in results]
    sparse_scores = [doc['scores']['sparse_score'] for doc in results]
    boost_scores = [doc['scores']['boost_score'] for doc in results]
    final_scores = [doc['scores']['final_score'] for doc in results]
    
    print(f"Dense scores  - Min: {min(dense_scores):.4f}, Max: {max(dense_scores):.4f}, Avg: {np.mean(dense_scores):.4f}")
    print(f"Sparse scores - Min: {min(sparse_scores):.4f}, Max: {max(sparse_scores):.4f}, Avg: {np.mean(sparse_scores):.4f}")
    print(f"Boost scores  - Min: {min(boost_scores):.4f}, Max: {max(boost_scores):.4f}, Avg: {np.mean(boost_scores):.4f}")
    print(f"Final scores  - Min: {min(final_scores):.4f}, Max: {max(final_scores):.4f}, Avg: {np.mean(final_scores):.4f}")
    
    # Show scoring weights
    config = ft_components['config']
    print(f"\nScoring weights: Dense={config['scoring']['dense_weight']}, Sparse={config['scoring']['sparse_weight']}, Boost={config['scoring']['boost_weight']}")

# Analyze both models
analyze_scores(base_results, "BASE MODEL")
analyze_scores(ft_results, "FINE-TUNED MODEL")


=== SCORE ANALYSIS: BASE MODEL ===
Dense scores  - Min: 0.5843, Max: 0.6380, Avg: 0.6129
Sparse scores - Min: 0.7134, Max: 0.8464, Avg: 0.7524
Boost scores  - Min: 1.0000, Max: 1.0000, Avg: 1.0000
Final scores  - Min: 0.6762, Max: 0.7191, Avg: 0.6935

Scoring weights: Dense=0.6, Sparse=0.3, Boost=0.1

=== SCORE ANALYSIS: FINE-TUNED MODEL ===
Dense scores  - Min: 0.5211, Max: 0.8749, Avg: 0.7594
Sparse scores - Min: 0.1081, Max: 0.7840, Avg: 0.4056
Boost scores  - Min: 1.0000, Max: 1.0000, Avg: 1.0000
Final scores  - Min: 0.6224, Max: 0.7834, Avg: 0.6773

Scoring weights: Dense=0.6, Sparse=0.3, Boost=0.1
