## 1. Setup & Configuration

In [18]:
# Install requirements (uncomment if needed)
# !pip install -r requirements.txt

In [19]:
import os
import sys
import warnings
warnings.filterwarnings('ignore')

# Add project root to path
sys.path.insert(0, os.getcwd())

# Import standard libraries
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path

# Set display options
pd.set_option('display.max_colwidth', 100)
pd.set_option('display.max_rows', 50)

print("‚úÖ Standard libraries imported")

‚úÖ Standard libraries imported


In [20]:
# Load configuration
from src.config import load_config, get_config

# Load config from yaml and environment
config = load_config("config.yaml")

print("üìã Configuration loaded:")
print(f"   LLM Model: {config.llm.model}")
print(f"   Judge Model: {config.judge.model}")
print(f"   Embeddings Provider: {config.embeddings.provider}")
print(f"   Embeddings Model: {config.embeddings.model}")
print(f"   Chroma Collection: {config.chroma.collection}")
print(f"   Top-K: {config.retrieval.top_k}")
print(f"   API Key Set: {'‚úÖ' if config.openrouter_api_key else '‚ùå'}")

üìã Configuration loaded:
   LLM Model: mistralai/mistral-7b-instruct:free
   Judge Model: mistralai/mistral-7b-instruct:free
   Embeddings Provider: openrouter
   Embeddings Model: openai/text-embedding-3-small
   Chroma Collection: cv_rag
   Top-K: 6
   API Key Set: ‚úÖ


## 2. Load CV Data

In [21]:
from src.data.loaders import load_cvs, create_sample_cv_data, save_sample_data

# Check if sample data exists, create if not
cvs_path = Path(config.data.cvs_path)
if not cvs_path.exists():
    print("üìù Creating sample CV data...")
    save_sample_data(str(cvs_path))

# Load CVs
df_cvs = load_cvs(str(cvs_path))

print(f"\nüìÑ Loaded {len(df_cvs)} CVs")
print(f"   Columns: {list(df_cvs.columns)}")
df_cvs[["candidate_id", "name", "role", "location", "years_experience"]]


üìÑ Loaded 5 CVs
   Columns: ['candidate_id', 'name', 'email', 'role', 'location', 'years_experience', 'raw_text']


Unnamed: 0,candidate_id,name,role,location,years_experience
0,CV001,Alice Johnson,Senior Software Engineer,"San Francisco, CA",8
1,CV002,Bob Martinez,Data Scientist,"New York, NY",5
2,CV003,Carol Chen,Product Manager,"Seattle, WA",6
3,CV004,David Kim,DevOps Engineer,"Austin, TX",7
4,CV005,Emma Wilson,UX Designer,"Los Angeles, CA",4


In [22]:
# Preview a CV
sample_cv = df_cvs.iloc[0]
print(f"üìã Sample CV: {sample_cv['name']} ({sample_cv['role']})")
print("=" * 60)
print(sample_cv['raw_text'][:1500] + "...")

üìã Sample CV: Alice Johnson (Senior Software Engineer)

ALICE JOHNSON
Senior Software Engineer | San Francisco, CA
alice.johnson@email.com | (555) 123-4567 | linkedin.com/in/alicejohnson

SUMMARY
Experienced software engineer with 8+ years of expertise in building scalable web applications and distributed systems. Strong background in Python, JavaScript, and cloud technologies. Passionate about clean code and mentoring junior developers.

EXPERIENCE

Senior Software Engineer | TechCorp Inc. | 2020 - Present
- Led development of microservices architecture serving 10M+ daily users
- Implemented CI/CD pipelines reducing deployment time by 60%
- Mentored team of 5 junior developers
- Technologies: Python, FastAPI, Kubernetes, AWS, PostgreSQL

Software Engineer | StartupXYZ | 2017 - 2020
- Built real-time data processing pipeline handling 1M events/hour
- Developed RESTful APIs for mobile and web clients
- Reduced infrastructure costs by 40% through optimization
- Technologies: Python, Dj

## 3. Semantic Chunking

In [23]:
from src.data.chunking import CVChunker, chunk_dataframe, save_chunks

# Create chunker
chunker = CVChunker(
    split_experience_roles=True,
    min_chunk_length=50,
    max_chunk_length=2000,
)

# Chunk all CVs
chunks_df = chunk_dataframe(
    df_cvs,
    candidate_id_col="candidate_id",
    text_col="raw_text",
    chunker=chunker,
)

print(f"\n‚úÇÔ∏è Created {len(chunks_df)} chunks from {len(df_cvs)} CVs")
print(f"   Average chunks per CV: {len(chunks_df) / len(df_cvs):.1f}")


‚úÇÔ∏è Created 7 chunks from 5 CVs
   Average chunks per CV: 1.4


In [24]:
# View chunk distribution by section
section_counts = chunks_df['section_name'].value_counts()
print("üìä Chunks by Section:")
for section, count in section_counts.items():
    print(f"   {section}: {count}")

üìä Chunks by Section:
   header: 6
   publications: 1


In [25]:
# Preview chunks
display_cols = ['chunk_id', 'candidate_id', 'section_name', 'name']
existing_cols = [c for c in display_cols if c in chunks_df.columns]
chunks_df[existing_cols].head(10)

Unnamed: 0,chunk_id,candidate_id,section_name,name
0,768757522e20,CV001,header,Alice Johnson
1,fa63c002a8f3,CV002,header,Bob Martinez
2,18978d8b92ed,CV003,header,Carol Chen
3,800bef25d6bf,CV004,header,David Kim
4,eb97eaa3ef4a,CV005,header,Emma Wilson
5,9ca77c4b0f08,CV005,publications,Emma Wilson
6,75660f5aa874,CV005,header,Emma Wilson


In [26]:
# Save chunks (optional)
chunks_path = Path(config.data.chunks_path)
chunks_path.parent.mkdir(parents=True, exist_ok=True)
save_chunks(chunks_df, str(chunks_path))
print(f"üíæ Saved chunks to {chunks_path}")

üíæ Saved chunks to data\processed\chunks.parquet


## 4. Embeddings & Build Chroma Index

In [27]:
from src.embeddings.factory import EmbeddingsFactory
from src.vectordb.chroma_store import ChromaStore
from src.utils.timing import Timer

# Create embeddings
print(f"üî§ Creating embeddings with provider: {config.embeddings.provider}")
embeddings = EmbeddingsFactory.from_config(config)
print("‚úÖ Embeddings initialized")

üî§ Creating embeddings with provider: openrouter
‚úÖ Embeddings initialized


In [28]:
# Create Chroma store
chroma_store = ChromaStore(
    persist_dir=config.chroma.persist_dir,
    collection_name=config.chroma.collection,
    embeddings=embeddings,
)

# Check if index already exists
if chroma_store.load_index():
    print(f"üìÇ Loaded existing index with {chroma_store.document_count} documents")
else:
    print("üî® Building new index...")
    with Timer("Index build") as timer:
        chroma_store.build_index(
            chunks_df,
            text_col="chunk_text",
            id_col="chunk_id",
        )
    print(f"‚úÖ Built index with {chroma_store.document_count} documents in {timer.elapsed:.2f}s")

üìÇ Loaded existing index with 24 documents


In [29]:
# Test retrieval
test_query = "Python developer with AWS experience"
results = chroma_store.similarity_search_with_score(test_query, k=3)

print(f"üîç Test Query: '{test_query}'")
print("\nTop 3 Results:")
for i, (doc, score) in enumerate(results, 1):
    print(f"\n{i}. Score: {score:.4f}")
    print(f"   Candidate: {doc.metadata.get('candidate_id')}")
    print(f"   Section: {doc.metadata.get('section_name')}")
    print(f"   Preview: {doc.page_content[:150]}...")

üîç Test Query: 'Python developer with AWS experience'

Top 3 Results:

1. Score: 1.1174
   Candidate: CV004
   Section: header
   Preview: DAVID KIM
DevOps Engineer | Austin, TX
david.kim@email.com | (555) 321-0987 | github.com/davidkim

PROFILE
DevOps engineer with 7 years of experi...

2. Score: 1.1436
   Candidate: CV001
   Section: header
   Preview: ALICE JOHNSON
Senior Software Engineer | San Francisco, CA
alice.johnson@email.com | (555) 123-4567 | linkedin.com/in/alicejohnson

SUMMARY
Exper...

3. Score: 1.3727
   Candidate: CV002
   Section: header
   Preview: BOB MARTINEZ
Data Scientist | New York, NY
bob.martinez@email.com | (555) 987-6543 | github.com/bobmartinez

PROFILE
Data Scientist with 5 years ...


## 5. Query Demo (Single Query)

In [30]:
from src.rag.chain import RAGChain

# Create RAG chain
rag_chain = RAGChain.from_config(config, chroma_store)
print("üîó RAG Chain initialized")
print(f"   Model: {config.llm.model}")
print(f"   Top-K: {config.retrieval.top_k}")

üîó RAG Chain initialized
   Model: mistralai/mistral-7b-instruct:free
   Top-K: 6


In [31]:
# Reload templates to pick up fix
import importlib
import src.prompts.templates
importlib.reload(src.prompts.templates)
from src.prompts.templates import get_rag_prompt

In [32]:
# Single query demo
query = "Who has the most experience with Kubernetes and what are their qualifications?"

print(f"‚ùì Query: {query}")
print("=" * 70)

response = rag_chain.invoke(query)

print(f"\nüìù Answer:")
print(response.answer)

print(f"\n‚è±Ô∏è Timing:")
print(f"   Retrieval: {response.retrieval_time:.3f}s")
print(f"   Generation: {response.generation_time:.3f}s")
print(f"   Total: {response.retrieval_time + response.generation_time:.3f}s")

‚ùì Query: Who has the most experience with Kubernetes and what are their qualifications?

üìù Answer:
 The candidate with the most experience with Kubernetes is David Kim. Here are his qualifications and experience with Kubernetes:

- **Experience**:
  - Architected multi-region Kubernetes infrastructure handling 50K RPS [Candidate: CV004, Section: experience]
  - Built CI/CD pipelines for 50+ microservices [Candidate: CV004, Section: experience]
  - Technologies: Kubernetes, Terraform, AWS, Prometheus, Grafana [Candidate: CV004, Section: experience]

- **Certifications**:
  - Certified Kubernetes Administrator (CKA) [Candidate: CV004, Section: certifications]
  - Certified Kubernetes Security Specialist (CKS) [Candidate: CV004, Section: certifications]

- **Skills**:
  - Containers: Kubernetes, Docker, Helm, Istio [Candidate: CV004, Section: skills]

- **Projects**:
  - K8s Cost Optimizer - Open source tool for Kubernetes cost optimization [Candidate: CV004, Section: projects]

Alic

In [33]:
# Display sources
print(f"\nüìö Sources ({response.num_sources} chunks retrieved):")
print("=" * 70)

for i, source in enumerate(response.sources, 1):
    print(f"\n[{i}] Candidate: {source.candidate_id} | Section: {source.section_name}")
    print(f"    Chunk ID: {source.chunk_id}")
    if source.score:
        print(f"    Score: {source.score:.4f}")
    print(f"    Content: {source.content[:200]}...")


üìö Sources (6 chunks retrieved):

[1] Candidate: CV004 | Section: header
    Chunk ID: 
    Score: 0.8869
    Content: DAVID KIM
DevOps Engineer | Austin, TX
david.kim@email.com | (555) 321-0987 | github.com/davidkim

PROFILE
DevOps engineer with 7 years of experience in cloud infrastructure, automation, and site...

[2] Candidate: CV001 | Section: header
    Chunk ID: 
    Score: 1.2565
    Content: ALICE JOHNSON
Senior Software Engineer | San Francisco, CA
alice.johnson@email.com | (555) 123-4567 | linkedin.com/in/alicejohnson

SUMMARY
Experienced software engineer with 8+ years of expertis...

[3] Candidate: CV002 | Section: header
    Chunk ID: 
    Score: 1.2899
    Content: BOB MARTINEZ
Data Scientist | New York, NY
bob.martinez@email.com | (555) 987-6543 | github.com/bobmartinez

PROFILE
Data Scientist with 5 years of experience in machine learning, statistical mod...

[4] Candidate: CV003 | Section: header
    Chunk ID: 
    Score: 1.3194
    Content: CAROL CHEN
Product Mana

## 6. Batch Demo (Multiple Queries)

In [34]:
# Define test queries
test_queries = [
    "Which candidates have experience with machine learning and NLP?",
    "Find candidates with AWS certifications",
    "Who has product management experience at enterprise companies?",
    "Which designers have won awards for their work?",
    "Compare the education backgrounds of all candidates",
]

print(f"üîÑ Running batch of {len(test_queries)} queries...\n")

üîÑ Running batch of 5 queries...



In [35]:
# Run batch queries
batch_responses = rag_chain.batch_invoke(test_queries)

# Display results
for i, (query, response) in enumerate(zip(test_queries, batch_responses), 1):
    print(f"\n{'='*70}")
    print(f"Query {i}: {query}")
    print(f"{'='*70}")
    print(f"\n{response.answer[:500]}..." if len(response.answer) > 500 else f"\n{response.answer}")
    print(f"\n‚è±Ô∏è Time: {response.retrieval_time + response.generation_time:.2f}s | Sources: {response.num_sources}")


Query 1: Which candidates have experience with machine learning and NLP?

 Based on the provided CVs, only Bob Martinez (CV002) has experience with machine learning and NLP.

Relevant details from his CV:
- "Data Scientist with 5 years of experience in machine learning, statistical modeling, and data analysis. Expertise in NLP, computer vision, and recommendation systems." [Candidate: CV002, Section: header]
- "Developed NLP models for sentiment analysis achieving 94% accuracy" [Candidate: CV002, Section: experience]
- "Built recommendation engine increasing user enga...

‚è±Ô∏è Time: 4.40s | Sources: 6

Query 2: Find candidates with AWS certifications

 Here are the candidates with AWS certifications:

1. **David Kim** (CV004) - AWS Solutions Architect Professional [Candidate: CV004, Section: certifications]
2. **Alice Johnson** (CV001) - AWS Solutions Architect Professional [Candidate: CV001, Section: certifications]
3. **Bob Martinez** (CV002) - Uses AWS SageMaker and GCP AI Platfo

In [36]:
# Batch timing summary
batch_df = pd.DataFrame([
    {
        "query": q[:50] + "...",
        "retrieval_time": r.retrieval_time,
        "generation_time": r.generation_time,
        "total_time": r.retrieval_time + r.generation_time,
        "num_sources": r.num_sources,
        "answer_length": len(r.answer),
    }
    for q, r in zip(test_queries, batch_responses)
])

print("üìä Batch Query Summary:")
batch_df

üìä Batch Query Summary:


Unnamed: 0,query,retrieval_time,generation_time,total_time,num_sources,answer_length
0,Which candidates have experience with machine lear...,0.522698,3.878551,4.401249,6,751
1,Find candidates with AWS certifications...,0.316054,4.058564,4.374619,6,492
2,Who has product management experience at enterpris...,0.369751,2.785642,3.155394,6,670
3,Which designers have won awards for their work?...,0.322448,2.738126,3.060575,6,474
4,Compare the education backgrounds of all candidate...,0.720248,8.487695,9.207943,6,1549


## 7. Evaluation Pipeline

In [37]:
from src.eval.pipeline import EvaluationPipeline

# Create evaluation pipeline
eval_pipeline = EvaluationPipeline.from_config(config)
print(f"‚öñÔ∏è Evaluation Pipeline initialized")
print(f"   Judge Model: {config.judge.model}")

‚öñÔ∏è Evaluation Pipeline initialized
   Judge Model: mistralai/mistral-7b-instruct:free


In [38]:
# Evaluate batch responses
print(f"üîç Evaluating {len(batch_responses)} responses...\n")

eval_results = eval_pipeline.evaluate_batch(batch_responses)

üîç Evaluating 5 responses...



RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit exceeded: free-models-per-min. ', 'code': 429, 'metadata': {'headers': {'X-RateLimit-Limit': '16', 'X-RateLimit-Remaining': '0', 'X-RateLimit-Reset': '1768476300000'}, 'provider_name': None}}, 'user_id': 'user_38G32QtBpDVH8VepQay5IcVq2kr'}

In [None]:
# Display evaluation results as DataFrame
eval_df = eval_pipeline.to_dataframe()

# Select key columns for display
display_columns = [
    'query', 'relevance_score', 'faithfulness_score', 
    'correctness_score', 'average_score'
]

print("üìä Evaluation Results:")
eval_df[display_columns]

In [None]:
# Summary statistics
summary = eval_pipeline.get_summary_stats()

print("\nüìà Evaluation Summary:")
print(f"   Number of evaluations: {summary['num_evaluations']}")
print(f"   Avg Relevance Score: {summary['avg_relevance']}/5")
print(f"   Avg Faithfulness Score: {summary['avg_faithfulness']}/5")
print(f"   Avg Correctness Score: {summary['avg_correctness']}/5")
print(f"   Overall Average Score: {summary['avg_overall']}/5")
print(f"   Avg Retrieval Time: {summary['avg_retrieval_time']:.3f}s")
print(f"   Avg Generation Time: {summary['avg_generation_time']:.3f}s")

In [None]:
# Visualization: Evaluation Scores Bar Chart
fig, ax = plt.subplots(figsize=(10, 6))

metrics = ['relevance_score', 'faithfulness_score', 'correctness_score']
metric_labels = ['Relevance', 'Faithfulness', 'Correctness']
x = range(len(eval_df))
width = 0.25

for i, (metric, label) in enumerate(zip(metrics, metric_labels)):
    offset = (i - 1) * width
    ax.bar([xi + offset for xi in x], eval_df[metric], width, label=label)

ax.set_xlabel('Query Index')
ax.set_ylabel('Score (1-5)')
ax.set_title('Evaluation Scores by Query')
ax.set_xticks(x)
ax.set_xticklabels([f'Q{i+1}' for i in x])
ax.legend()
ax.set_ylim(0, 5.5)
ax.axhline(y=4, color='green', linestyle='--', alpha=0.5, label='Good threshold')

plt.tight_layout()
plt.savefig('./outputs/eval_scores_chart.png', dpi=150)
plt.show()

print("\nüìä Chart saved to ./outputs/eval_scores_chart.png")

In [None]:
# Visualization: Average Scores Pie Chart
fig, ax = plt.subplots(figsize=(8, 8))

avg_scores = [
    summary['avg_relevance'],
    summary['avg_faithfulness'],
    summary['avg_correctness'],
]

colors = ['#3498db', '#2ecc71', '#9b59b6']
explode = (0.05, 0.05, 0.05)

ax.pie(
    avg_scores, 
    labels=metric_labels, 
    autopct='%1.1f',
    colors=colors,
    explode=explode,
    startangle=90,
)
ax.set_title(f'Average Scores Distribution\n(Overall: {summary["avg_overall"]}/5)')

plt.tight_layout()
plt.savefig('./outputs/avg_scores_pie.png', dpi=150)
plt.show()

In [None]:
# Save evaluation results
eval_pipeline.save_results(
    csv_filename=config.outputs.eval_csv,
    json_filename=config.outputs.eval_json,
)

print(f"\nüíæ Results saved to:")
print(f"   - {config.outputs.dir}/{config.outputs.eval_csv}")
print(f"   - {config.outputs.dir}/{config.outputs.eval_json}")

In [None]:
# Display detailed explanations
print("\nüìã Detailed Evaluation Explanations:")
print("=" * 70)

for i, result in enumerate(eval_results, 1):
    print(f"\nQuery {i}: {result.query[:60]}...")
    print(f"  Relevance ({result.relevance_score}/5): {result.relevance_explanation[:100]}...")
    print(f"  Faithfulness ({result.faithfulness_score}/5): {result.faithfulness_explanation[:100]}...")
    print(f"  Correctness ({result.correctness_score}/5): {result.correctness_explanation[:100]}...")

## 8. Retrieval Evaluation (No LLM Required)

This section evaluates **retrieval quality only** ‚Äî no LLM generation needed.
We measure how well the retriever finds relevant CV chunks given ground-truth labels.

In [None]:
from src.eval.retrieval_eval import (
    RetrievalEvaluator,
    load_retrieval_eval_data,
    get_aggregate_metrics,
)

# Load retrieval evaluation dataset
eval_data_path = "./data/eval/retrieval_eval.jsonl"
retrieval_dataset = load_retrieval_eval_data(eval_data_path)

print(f"üìÇ Loaded {len(retrieval_dataset)} retrieval evaluation queries")
print("\nSample queries:")
for i, q in enumerate(retrieval_dataset[:3], 1):
    print(f"  {i}. {q.query[:60]}...")
    print(f"     Expected candidates: {q.expected_candidate_ids}")
    print(f"     Expected sections: {q.expected_section_names}")

In [None]:
# Create retrieval evaluator and run evaluation
retrieval_evaluator = RetrievalEvaluator(
    chroma_store=chroma_store,
    top_k=config.retrieval.top_k,
)

print(f"üîç Running retrieval evaluation with top_k={config.retrieval.top_k}...\n")

retrieval_results_df = retrieval_evaluator.evaluate(retrieval_dataset)

print(f"‚úÖ Evaluated {len(retrieval_results_df)} queries")
retrieval_results_df[["query", "hit_at_k", "precision_at_k", "recall_at_k", "mrr_at_k", "ndcg_at_k"]]

In [None]:
# Aggregate metrics summary
summary = retrieval_evaluator.get_summary()

print("üìä Retrieval Evaluation Summary:")
print("=" * 50)
print(f"  Number of queries:     {summary['num_queries']}")
print(f"  Avg Hit@k:             {summary['avg_hit_at_k']:.3f}")
print(f"  Avg Precision@k:       {summary['avg_precision_at_k']:.3f}")
print(f"  Avg Recall@k:          {summary['avg_recall_at_k']:.3f}")
print(f"  Avg MRR@k:             {summary['avg_mrr_at_k']:.3f}")
print(f"  Avg nDCG@k:            {summary['avg_ndcg_at_k']:.3f}")
print(f"  Total relevant found:  {summary['total_relevant_retrieved']}")

In [None]:
# Visualization: Retrieval Metrics Bar Chart
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Per-query metrics
ax1 = axes[0]
metrics_to_plot = ["hit_at_k", "precision_at_k", "recall_at_k", "mrr_at_k"]
x = range(len(retrieval_results_df))
width = 0.2

for i, metric in enumerate(metrics_to_plot):
    offset = (i - 1.5) * width
    ax1.bar([xi + offset for xi in x], retrieval_results_df[metric], width, label=metric.replace("_at_k", "").title())

ax1.set_xlabel("Query Index")
ax1.set_ylabel("Score (0-1)")
ax1.set_title("Retrieval Metrics by Query")
ax1.set_xticks(x)
ax1.set_xticklabels([f"Q{i+1}" for i in x], rotation=45)
ax1.legend(loc="upper right")
ax1.set_ylim(0, 1.1)
ax1.axhline(y=0.5, color="orange", linestyle="--", alpha=0.5)

# Aggregate metrics
ax2 = axes[1]
agg_metrics = ["avg_hit_at_k", "avg_precision_at_k", "avg_recall_at_k", "avg_mrr_at_k", "avg_ndcg_at_k"]
agg_labels = ["Hit@k", "Precision@k", "Recall@k", "MRR@k", "nDCG@k"]
agg_values = [summary[m] for m in agg_metrics]
colors = ["#3498db", "#2ecc71", "#e74c3c", "#9b59b6", "#f39c12"]

bars = ax2.bar(agg_labels, agg_values, color=colors)
ax2.set_ylabel("Average Score (0-1)")
ax2.set_title("Aggregate Retrieval Metrics")
ax2.set_ylim(0, 1.1)

# Add value labels on bars
for bar, val in zip(bars, agg_values):
    ax2.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.02, f"{val:.2f}", 
             ha="center", va="bottom", fontsize=10)

plt.tight_layout()
plt.savefig("./outputs/retrieval_eval_chart.png", dpi=150)
plt.show()

print("\nüìä Chart saved to ./outputs/retrieval_eval_chart.png")

In [None]:
# Save retrieval evaluation results
retrieval_evaluator.save_results(
    output_dir="./outputs",
    csv_filename="retrieval_eval_results.csv",
    json_filename="retrieval_eval_results.json",
)

print("üíæ Results saved to:")
print("   - ./outputs/retrieval_eval_results.csv")
print("   - ./outputs/retrieval_eval_results.json")

## üéâ Complete!

This notebook demonstrated the full CV RAG pipeline:

1. ‚úÖ Configuration loading
2. ‚úÖ CV data loading
3. ‚úÖ Semantic chunking by CV sections
4. ‚úÖ Embedding generation and Chroma indexing
5. ‚úÖ Single query RAG
6. ‚úÖ Batch query processing
7. ‚úÖ LLM-as-judge evaluation
8. ‚úÖ **Retrieval evaluation** (Hit@k, Precision@k, Recall@k, MRR@k, nDCG@k)

### Next Steps:
- Run the Streamlit app: `streamlit run app.py`
- Customize prompts in `src/prompts/templates.py`
- Add more CVs to `data/cvs.csv`
- Add more retrieval eval queries to `data/eval/retrieval_eval.jsonl`
- Experiment with different models in `config.yaml`