<a href="https://colab.research.google.com/github/DynamicLLM/LLM2024/blob/main/src/sample-ai-agent/RAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This program implements a Retrieval-Augmented Generation (RAG) system that combines semantic search and dynamic text generation. It uses tinyDB to cache user queries and their corresponding responses, leveraging sentence embeddings from SentenceTransformer to find semantically similar questions based on cosine similarity. If a similar question is found, the cached response is returned. If no match is found, the program generates a new response dynamically using OpenAI's GPT model (gpt-4o-mini). The generated response is then stored in the tinyDB database for future use. This hybrid approach optimizes response accuracy while reducing redundant computations, making it ideal for chatbots, FAQ systems, and knowledge-base applications.

In [2]:
pip install tinydb

Collecting tinydb
  Downloading tinydb-4.8.2-py3-none-any.whl.metadata (6.7 kB)
Downloading tinydb-4.8.2-py3-none-any.whl (24 kB)
Installing collected packages: tinydb
Successfully installed tinydb-4.8.2


In [5]:
import time
import numpy as np
from sentence_transformers import SentenceTransformer
from scipy.spatial.distance import cosine
from tinydb import TinyDB, Query
import json
from openai import OpenAI

# Initialize the embedding model
embedding_model = SentenceTransformer('distiluse-base-multilingual-cased-v1')

# Cache settings
CACHE_EXPIRATION = 3600  # 1 hour in seconds
SIMILARITY_THRESHOLD = 0.8  # Threshold for similarity
DB_FILE = "cache.json"  # TinyDB file

# Initialize OpenAI client
client = OpenAI(
    api_key="sk-proj-***A"
)

# Initialize TinyDB
db = TinyDB(DB_FILE)
query = Query()

def set_cached_response(question, response, embedding):
    """Stores the question, response, and embedding in TinyDB."""
    try:
        timestamp = time.time()
        db.upsert({
            'question': question,
            'response': response,
            'embedding': json.dumps(embedding.tolist()),  # Convert embedding to JSON string
            'timestamp': timestamp
        }, query.question == question)
        print(f"Cached response for: {question}")
    except Exception as e:
        print(f"Failed to set cache: {str(e)}")

def find_similar_question(question, embedding):
    """Searches for a semantically similar question in TinyDB."""
    try:
        current_time = time.time()
        records = db.all()

        for record in records:
            if current_time - record['timestamp'] < CACHE_EXPIRATION:
                cached_embedding = np.array(json.loads(record['embedding']))
                similarity = 1 - cosine(embedding, cached_embedding)
                if similarity > SIMILARITY_THRESHOLD:
                    print(f"Found similar question with similarity: {similarity}")
                    return record['question'], record['response']
    except Exception as e:
        print(f"Failed to search cache: {str(e)}")
    return None, None

def generate_response(query):
    """Generates a response using OpenAI's GPT model."""
    try:
        completion = client.chat.completions.create(
            model="gpt-4o-mini",
            store=True,
            messages=[
                {"role": "user", "content": query}
            ]
        )
        return completion.choices[0].message.content
    except Exception as e:
        print(f"Failed to generate response: {str(e)}")
        return "I'm sorry, I couldn't generate a response at this time."

def get_embedding(text):
    """Generates an embedding for the input text."""
    return embedding_model.encode([text])[0]

# Example usage
if __name__ == "__main__":
    user_question = input("Enter your question: ")
    user_embedding = get_embedding(user_question)

    # Search for similar questions in the cache
    cached_question, cached_response = find_similar_question(user_question, user_embedding)

    if cached_response:
        print(f"Found similar question: {cached_question}")
        print(f"Cached response: {cached_response}")
    else:
        print("No similar question found. Generating a new response...")
        generated_response = generate_response(user_question)
        print(f"Generated response: {generated_response}")

        # Cache the generated response
        set_cached_response(user_question, generated_response, user_embedding)


Enter your question: oK
No similar question found. Generating a new response...
Generated response: Hello! How can I assist you today?
Cached response for: oK
