In [5]:
# Import necessary libraries
import json
import sys
from typing import Dict, Any, List, Optional, Iterable

import numpy as np
import ollama
from opensearchpy import OpenSearch
from sentence_transformers import SentenceTransformer

# Set up Python path to access project modules
sys.path.insert(0, "..")

%load_ext autoreload
%autoreload 2

# Import from project modules
from src.constants import (
    ASSYMETRIC_EMBEDDING,
    OLLAMA_MODEL_NAME,
    OPENSEARCH_HOST,
    OPENSEARCH_PORT,
    OPENSEARCH_INDEX
)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:
# Embedding settings
EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"  # Model for generating embeddings
EMBEDDING_DIMENSION = 384  # Embedding dimension for the model
ASSYMETRIC_EMBEDDING = False  # Whether to use asymmetric embeddings

# 1. Connect to OpenSearch and Set Up Hybrid Search

In [7]:
client = OpenSearch(
    hosts = [{"host": OPENSEARCH_HOST, "port": OPENSEARCH_PORT}],
    http_compress = True,
    timeout = 30,
    max_retries = 3,
    retry_on_timeout = True
)


try:
    info = client.info()
    print(f"Successfully connected to OpenSearch {info['version']['number']}")
except Exception as e:
    print(f"Failed to connect to OpenSearch: {e}")
    print("Make sure OpenSearch is running on localhost:9200")
    raise

Successfully connected to OpenSearch 2.19.2


In [8]:
# Verify pipeline exists
from opensearchpy.exceptions import NotFoundError
pipeline_name = "nlp-search-pipeline"

try:
    result = client.transport.perform_request(
        "GET",
        f"/_search/pipeline/{pipeline_name}"
    )
    print(f"\n✅ Search pipeline '{pipeline_name}' exists.")
except NotFoundError:
    print(f"\n⚠️ Search pipeline '{pipeline_name}' does NOT exist.")
    print("This is required for hybrid search. Please run the prerequisites notebook.")
except Exception as e:
    print(f"\n🚨 Error: {e}")
     


✅ Search pipeline 'nlp-search-pipeline' exists.


In [9]:
def hybrid_search(query_text: str, query_embedding: List[float], top_k: int = 5) -> List[Dict[str, Any]]:
    """
    Performs hybrid search combining text-based and vector-based queries.
    
    Args:
        query_text (str): The text query for BM25 search
        query_embedding (List[float]): The vector embedding for KNN search
        top_k (int): Number of results to return
        
    Returns:
        List[Dict[str, Any]]: The search results
    """
    query_body = {
        "_source": {"exclude": ["embedding"]},  # Exclude embeddings from results
        "query": {
            "hybrid": {
                "queries": [
                    {"match": {"text": {"query": query_text}}},  # Text-based search
                    {
                        "knn": {
                            "embedding": {
                                "vector": query_embedding,
                                "k": top_k,
                            }
                        }
                    },  # Vector-based search
                ]
            }
        },
        "size": top_k,
    }
    
    print("\nExecuting hybrid search query...")
    try:
        # Try with search pipeline parameter (for newer OpenSearch versions)
        response = client.search(
            index=OPENSEARCH_INDEX,
            body=query_body,
            params={"search_pipeline": "nlp-search-pipeline"} # Uses the pipeline for score normalization
        )
    except TypeError:
        # Fall back to without pipeline parameter for older versions
        print("Warning: OpenSearch client doesn't support search_pipeline parameter, using raw query")
        response = client.search(
            index=OPENSEARCH_INDEX,
            body=query_body
        )
    
    return response["hits"]["hits"]

# 2. Process Query and Perform Search

In [10]:
def get_embedding_model():
    """
    Loads and returns the sentence transformer embedding model.
    
    Returns:
        SentenceTransformer: The loaded embedding model.
    """
    print(f"Loading embedding model: {EMBEDDING_MODEL_NAME}")
    model = SentenceTransformer(EMBEDDING_MODEL_NAME)
    return model


def generate_embeddings(texts: List[str]):
    """
    Generates embeddings for a list of text chunks.
    
    Args:
        texts (List[str]): List of text chunks to embed.
        
    Returns:
        List[numpy.ndarray]: List of embedding vectors.
    """
    model = get_embedding_model()
    
    # If using asymmetric embeddings, prefix each text with "passage: "
    if ASSYMETRIC_EMBEDDING:
        texts = [f"passage: {text}" for text in texts]
        
    # Generate embeddings
    embeddings = model.encode(texts)
    return embeddings

In [11]:
# Sample query
query = "What is the average rate of ice loss"
print(f"Query: '{query}'")

# Generate query embedding
print("\nGenerating embedding for query...")
embeddings = generate_embeddings([query])
query_embedding = embeddings[0].tolist()
print(f"Generated embedding with dimension: {len(query_embedding)}")

# Set number of results to retrieve
top_k = 3
print(f"\nRetrieving top {top_k} documents...")

# Perform hybrid search
results = hybrid_search(query, query_embedding, top_k=top_k)

# Display results
print(f"\nSearch results for query: '{query}'\n")
for i, hit in enumerate(results, 1):
    print(f"Result {i} (Score: {hit['_score']:.3f}):")
    print(f"Text: {hit['_source']['text'][:200]}...")  # Showing truncated text
    print(f"Document: {hit['_source']['document_name']}\n")

Query: 'What is the average rate of ice loss'

Generating embedding for query...
Loading embedding model: sentence-transformers/all-MiniLM-L6-v2
Generated embedding with dimension: 384

Retrieving top 3 documents...

Executing hybrid search query...

Search results for query: 'What is the average rate of ice loss'

Result 1 (Score: 0.700):
Text: Gt yr−1 of ice loss is equivalent to about 0.28 mm yr−1 of global mean sea level rise.SPMSummary for Policymakers101900 1920 1940 1960 1980 2000−20−1001020 Year (1022 J)Change in global average upper ...
Document: climate

Result 2 (Score: 0.525):
Text: km2 per decade), and very likely in the range 9.4 to 13.6% per decade (range of 0.73 to 1.07 million km2 per decade) for the summer sea ice minimum (perennial sea ice). The average decrease in decadal...
Document: climate

Result 3 (Score: 0.300):
Text: evaporation and precipitation over the oceans have changed ( medium confidence ). {2.5, 3.3, 3.5} • There is no observational evidence of a tren

# 3. Generate Response with Ollama

In [12]:
# Define a function to generate responses with Ollama
def generate_response_with_ollama(query: str, results: List[Dict], model_name: str = OLLAMA_MODEL_NAME):
    """
    Generates a response using Ollama based on search results.
    
    Args:
        query (str): The user's question
        results (List[Dict]): The search results from OpenSearch
        model_name (str): The Ollama model to use
        
    Returns:
        tuple: A tuple containing (prompt, model_name)
    """
    # Format context from search results
    context = ""
    for i, result in enumerate(results):
        context += f"Document {i + 1}:\n{result['_source']['text']}\n\n"

    # Create prompt template
    prompt = f"""You are a helpful AI assistant. Use the following context to answer the question.
If you cannot find the answer in the context, say so.

Context:
{context}

Question: {query}

Answer: """

    return prompt, model_name

In [13]:

# Ensure model is pulled
print(f"Ensuring Ollama model {OLLAMA_MODEL_NAME} is available...")
try:
    ollama.pull(OLLAMA_MODEL_NAME)
    print(f"Model {OLLAMA_MODEL_NAME} is ready.")
except ollama.ResponseError as e:
    print(f"Error pulling model: {e.error}")
    print("You might need to install the model manually with: ollama pull " + OLLAMA_MODEL_NAME)

# Get prompt and model
prompt, model_name = generate_response_with_ollama(query, results)

Ensuring Ollama model llama3.2:1b is available...
Model llama3.2:1b is ready.


In [14]:
# Print prompt length
print(f"\nPrompt created with {len(prompt)} characters")
print("First 200 characters of prompt:")
print(prompt[:200] + "...")

# Generate streaming response
print("\nGenerating response with Ollama...")
response = ""
print("\nResponse:")
for chunk in ollama.generate(model=model_name, prompt=prompt, stream=True):
    piece = chunk['response']
    print(piece, end='', flush=True)
    response += piece

print("\n\nResponse generation complete!")
print(f"Generated {len(response)} characters")


Prompt created with 9189 characters
First 200 characters of prompt:
You are a helpful AI assistant. Use the following context to answer the question.
If you cannot find the answer in the context, say so.

Context:
Document 1:
Gt yr−1 of ice loss is equivalent to about...

Generating response with Ollama...

Response:
The question asks for the average rate of ice loss from glaciers around the world, excluding glaciers on the periphery of the ice sheets. According to Document 3, the average rate of ice loss was very likely:

* From 1971 to 2009: 226 Gt yr−1
* From 1993 to 2009: 275 Gt yr−1

However, it's worth noting that these values are based on limited data and are considered high confidence levels. The actual rates of ice loss may be different or more variable.

Response generation complete!
Generated 454 characters
