In [1]:
import os
import json
import openai
from openai import OpenAI
from dotenv import load_dotenv
from elasticsearch import Elasticsearch
from sentence_transformers import SentenceTransformer


# Load environment variables
load_dotenv()

# Get environment variables
elasticsearch_host = os.getenv("ELASTICSEARCH_HOST", "localhost")
elasticsearch_port = os.getenv("ELASTICSEARCH_PORT", "9200")
elasticsearch_url = f"http://{elasticsearch_host}:{elasticsearch_port}"

# Initialize connections
es = Elasticsearch(elasticsearch_url)
model = SentenceTransformer('multi-qa-MiniLM-L6-cos-v1')


  from tqdm.autonotebook import tqdm, trange
2024-10-18 08:37:02.689815: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:479] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-10-18 08:37:02.709242: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:10575] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-10-18 08:37:02.709280: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1442] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-10-18 08:37:02.722026: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  retur

In [2]:
def knn_query(question):
    return  {
        "field": "text_vector",
        "query_vector": model.encode(question),
        "k": 5,
        "num_candidates": 10000,
        "boost": 0.5,
        
    }

def keyword_query(question):
    return {
        "bool": {
            "must": {
                "multi_match": {
                    "query": f"{question}",
                    "fields": ["description^3", "text", "title"],
                    "type": "best_fields",
                    "boost": 0.5,
                }
            },
        }
    }

def multi_search(key_word):
    response = es.search(
        index='video-content',
        query=keyword_query(key_word),
        knn=knn_query(key_word),
        size=10
    )
    return [
    {
        'title': record['_source']['title'],
        'timecode_text': record['_source']['timecode_text'],
        'link': record['_source']['link'],
        'text': record['_source']['text'] 
        
    }
    for record in response["hits"]["hits"]
    ]


In [3]:
# openai_api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=openai_api_key)
def ask_openai(prompt):
        response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": f"{prompt}"}
        ],
        temperature=0.7
    )

    # Extract response content
        response_content = response.choices[0].message

        # Optionally, print or process the response
        return response_content
def make_context(question, records):
    return f"""
        You are a helpful program which is given a quesion and then the results from video transcripts which should answer the question given.
        This is an online course about using Retreival Augmented Generation (RAGS) and LLMs as well as how to evaluate the results of elasticsearch
        and the answers from the LLMs, how to monitor the restults etc. In the Course we used MAGE as an Orchestrator and PostgreSQL to capture user
        feedback. Given the question below, look at the records in the RECORDS section and return the best matching video link and a short summary
        and answer if you are able to which will answer the students question. Again your response should be a link from the records in the RECORDS
        section below. 

        Please return your answer as a dictionary without json```. Just have summary, title and link as as keys in the dictionary.

        QUESTION:
        {question}

        RECORDS:
        {records}
        """

def get_answer(question):
    search_results = multi_search(question)
    prompt = make_context(question,search_results)
    try:
        answer = ask_openai(prompt)
    except Exception as e:
        print(f"Error during Ollama completion: {e} your api key is set to {openai_api_key}")
        answer = ""
    return answer

In [4]:
question = 'When did we start to use elasticsearch?'

In [18]:
results = get_answer(question)

In [19]:
results

ChatCompletionMessage(content='{\n    "summary": "In this video, we discuss the implementation of Elasticsearch to replace a toy search engine used previously. We explain the reasons for using Elasticsearch, its advantages, and how to set it up for indexing data effectively. The video includes practical examples of running Elasticsearch and creating an index for our dataset.",\n    "title": "LLM Zoomcamp 1.6 - Search with Elasticsearch",\n    "link": "https://www.youtube.com/watch?v=1lgbR5wMvsI&t=0s"\n}', role='assistant', function_call=None, tool_calls=None, refusal=None)

In [20]:
response = json.loads(results.content) if isinstance(results.content, str) else results.content

In [21]:
type(response)

dict

In [26]:
with open('example_response.json','w') as outfile:
    json.dump(response, outfile)

In [5]:
import redis
import json
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

# Initialize Redis and SentenceTransformer
r = redis.Redis(host='localhost', port=6379, db=0)
model = SentenceTransformer('multi-qa-MiniLM-L6-cos-v1')

In [6]:
with open('example_response.json','r') as infile:
    response = json.load(infile)

In [7]:
response

{'summary': 'In this video, we discuss the implementation of Elasticsearch to replace a toy search engine used previously. We explain the reasons for using Elasticsearch, its advantages, and how to set it up for indexing data effectively. The video includes practical examples of running Elasticsearch and creating an index for our dataset.',
 'title': 'LLM Zoomcamp 1.6 - Search with Elasticsearch',
 'link': 'https://www.youtube.com/watch?v=1lgbR5wMvsI&t=0s'}

In [8]:
def encode_question(question):
    return model.encode(question)

In [9]:
enc_question = encode_question(question)

In [12]:
def get_cached_entry(redis_client, question_embedding, threshold=0.85):
    all_keys = redis_client.keys()
    for key in all_keys:
        # Retrieve cached embedding and check similarity
        cached_data = json.loads(redis_client.get(key))
        cached_embedding = np.array(cached_data['embedding'])
        similarity = cosine_similarity([question_embedding], [cached_embedding])[0][0]

        if similarity >= threshold:
            return cached_data, similarity

    return None, 0

In [17]:
cached_answer = get_cached_entry(r, enc_question)

In [18]:
cached_answer

({'question': 'When did we start to use elasticsearch?',
  'embedding': [0.007949301041662693,
   0.025332283228635788,
   -0.023968027904629707,
   0.0077902753837406635,
   -0.05190315097570419,
   0.018772074952721596,
   -0.09641619026660919,
   0.02446819841861725,
   -0.011942150071263313,
   0.0014577661640942097,
   0.007056320086121559,
   0.10180263221263885,
   0.0009428968769498169,
   0.047538306564092636,
   -0.020036065950989723,
   0.015526373870670795,
   -0.04972099885344505,
   -0.04703520983457565,
   0.05575082451105118,
   -0.06379717588424683,
   -0.036731746047735214,
   0.007071054074913263,
   0.0001245723251486197,
   -0.05400277301669121,
   0.0024815951474010944,
   -0.01078138779848814,
   -0.05635342374444008,
   -0.027055038139224052,
   0.03084401786327362,
   0.00273871049284935,
   -0.0020149596966803074,
   -0.06800607591867447,
   0.011005599051713943,
   0.052050065249204636,
   0.012564333155751228,
   0.03227859362959862,
   0.03964609280228615,


In [None]:
def handle_new_question(question):
    # This function should return the response for the new question
    return {
        'summary': 'In this video, we discuss the implementation of Elasticsearch to replace a toy search engine used previously...',
        'title': 'LLM Zoomcamp 1.6 - Search with Elasticsearch',
        'link': 'https://www.youtube.com/watch?v=1lgbR5wMvsI&t=0s'
    }

In [15]:
def store_in_redis(redis_client, question, embedding, response):
    key = f"question:{question}"
    value = {
        'question': question,
        'embedding': embedding.tolist(),  # Convert to list for JSON serialization
        'response': response
    }
    redis_client.set(key, json.dumps(value))
    redis_client.expire(key, 86400)  # Set expiration to 24 hours

In [16]:
store_in_redis(r, question, enc_question, response)

In [None]:
def process_question(question):
    question_embedding = encode_question(question)

    # Check Redis for a similar question
    cached_entry, similarity = get_cached_entry(r, question_embedding)

    if cached_entry:
        if similarity == 1.0:
            # Exact match found
            print("Exact match found in cache. Returning cached entry.")
            return cached_entry['response']
        else:
            # Similar question found, return cached entry and add new question
            print(f"Similar question found (similarity: {similarity}). Returning cached entry and adding new question.")
            response = cached_entry['response']
            store_in_redis(r, question, question_embedding, response)
            return response
    else:
        # No similar question found, call function and store in Redis
        print("No similar question found. Calling function to get new entry.")
        response = handle_new_question(question)
        store_in_redis(r, question, question_embedding, response)
        return response

# Example usage:
new_question = "What is Elasticsearch and how do I set it up?"
response = process_question(new_question)
print(response)

In [None]:
# docker run -d -p 6379:6379 --name redis-server redis:latest redis-server --appendonly yes --maxmemory-policy allkeys-lru --maxmemory 100mb --save 60 1 --timeout 86400
# docker run -d --name elasticsearch -p 9200:9200 -e "discovery.type=single-node" -e "ES_JAVA_OPTS=-Xms512m -Xmx512m" docker.elastic.co/elasticsearch/elasticsearch:8.4.3
