In [54]:
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate
from langchain.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq
from langchain_huggingface import HuggingFaceEmbeddings
import os  
import sys
from src.controllers import DocumentController
DocumentController = DocumentController()
from src.controllers import BaseController
BaseController = BaseController()
from src.utils.config import get_settings
# --------------------------------------------
from langchain.schema import Document
from langchain_community.vectorstores import Chroma
import shutil
import argparse

In [44]:
# Load environment variables from .env file
settings = get_settings()

# Retrieve the GROQ_API_KEY from environment variables
groq_api_key = settings.GROQ_API_KEY

In [45]:
def load_documents(Doc_name):
    file_abs_path = DocumentController.get_project_path(Doc_name=Doc_name)
    loader = TextLoader(file_abs_path)
    documents = loader.load()
    return documents


In [14]:
def split_text(documents: list[Document]):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=300,
        chunk_overlap=100,
        length_function=len,
        add_start_index=True,
    )
    chunks = text_splitter.split_documents(documents)
    texts = [doc.page_content for doc in chunks]

    return chunks

In [5]:
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

  from tqdm.autonotebook import tqdm, trange


In [15]:
BaseController = BaseController()
def save_to_chroma(chunks: list[Document]):
    CHROMA_PATH = BaseController.db_dir
    # Clear out the database first.
    if os.path.exists(CHROMA_PATH):
        shutil.rmtree(CHROMA_PATH)
    embedding_function = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
    # Create a new DB from the documents.
    db = Chroma.from_documents(
        chunks, embedding_function, persist_directory=CHROMA_PATH
    )
    db.persist()
    print(f"Saved {len(chunks)} chunks to {CHROMA_PATH}.")

In [16]:
def generate_data_store():
    documents = load_documents()
    chunks = split_text(documents)
    save_to_chroma(chunks)

In [25]:
save_to_chroma(chunks)

Saved 26 chunks to d:\graduation project all 4th\project files\chatbot\src\assets/chroma.


  warn_deprecated(


In [26]:
from langchain.evaluation import load_evaluator

embedding_function = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

def compare_embeddings(embedding_function):
    # Get embedding for a word.
    embedding_function = embedding_function
    vector = embedding_function.embed_query("apple")
    print(f"Vector for 'apple': {vector}")
    print(f"Vector length: {len(vector)}")

    # Compare vector of two words
    evaluator = load_evaluator("pairwise_embedding_distance")
    words = ("apple", "iphone")
    x = evaluator.evaluate_string_pairs(prediction=words[0], prediction_b=words[1])
    print(f"Comparing ({words[0]}, {words[1]}): {x}")

In [32]:
import numpy as np
# Define the custom similarity function
def custom_similarity(vector1, vector2):
    # Calculate the cosine similarity between the two vectors
    dot_product = np.dot(vector1, vector2)
    magnitude1 = np.linalg.norm(vector1)
    magnitude2 = np.linalg.norm(vector2)
    similarity = dot_product / (magnitude1 * magnitude2)
    return similarity

def compare_embeddings(embedding_function,input_1,input_2):
    # Get the embeddings for the two words
    vector1 = embedding_function.embed_query(input_1)
    vector2 = embedding_function.embed_query(input_2)

    # Calculate the custom similarity metric
    similarity = custom_similarity(vector1, vector2)
    print(f"Custom similarity: {similarity}")

# Call the function
compare_embeddings(embedding_function,"apple","iphone")

Custom similarity: 0.72382934656493


--------

In [38]:
query_text = input("Inter your query")
# # Create CLI.
# parser = argparse.ArgumentParser()
# parser.add_argument("query_text", type=str, help="The query text.")
# args = parser.parse_args()
# query_text = args.query_text

In [55]:
def search_chroma_db(query_text):
    """
    Search the Chroma database for the given query text and return the top results.

    Args:
        query_text (str): The text to search for.

    Returns:
        list: A list of tuples containing the matched text and relevance score, or an empty list if no relevant results are found.
    """
    CHROMA_PATH = BaseController.db_dir
    embedding_function = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
    db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)

    results = db.similarity_search_with_relevance_scores(query_text, k=3)

    if len(results) == 0 or results[0][1] < 0.2:
        print(f"Unable to find matching results.")
        return []

    return results

In [57]:
results = search_chroma_db(query_text)
results

[(Document(page_content='Sleep and Rest:', metadata={'source': 'd:\\graduation project all 4th\\project files\\chatbot\\src\\assets/files\\health.txt', 'start_index': 2110}),
  0.5803860188722099),
 (Document(page_content='Establish a consistent sleep routine, create a sleep-conducive environment, and practice good sleep hygiene, such as avoiding caffeine and screens before bedtime.', metadata={'source': 'd:\\graduation project all 4th\\project files\\chatbot\\src\\assets/files\\health.txt', 'start_index': 2330}),
  0.5547013268808072),
 (Document(page_content="If you're experiencing sleep disorders like insomnia or sleep apnea, consult with a healthcare professional for appropriate treatment and management strategies.", metadata={'source': 'd:\\graduation project all 4th\\project files\\chatbot\\src\\assets/files\\health.txt', 'start_index': 2494}),
  0.43440440522869717)]

In [58]:
def generate_prompt(query_text, results):
    """
    Generate a prompt for a language model based on the given query text and search results.

    Args:
        query_text (str): The query text or question.
        results (list): A list of tuples containing the matched text and relevance score.

    Returns:
        str: The generated prompt.
    """
    PROMPT_TEMPLATE = """
    Answer the question based only on the following context:
    {context}
    ---
    Answer the question based on the above context: {question}
    """

    context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
    prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
    prompt = prompt_template.format(context=context_text, question=query_text)

    return prompt

In [60]:
prompt = generate_prompt(query_text, results)
prompt

"Human: \n    Answer the question based only on the following context:\n    Sleep and Rest:\n\n---\n\nEstablish a consistent sleep routine, create a sleep-conducive environment, and practice good sleep hygiene, such as avoiding caffeine and screens before bedtime.\n\n---\n\nIf you're experiencing sleep disorders like insomnia or sleep apnea, consult with a healthcare professional for appropriate treatment and management strategies.\n    ---\n    Answer the question based on the above context: sleep advice\n    "

In [62]:
model = ChatGroq(temperature=0, model_name="mixtral-8x7b-32768")
response_text = model.invoke(prompt)

In [63]:
response_text

AIMessage(content="Based on the provided context, here are some pieces of sleep advice:\n\n1. Establish a consistent sleep routine: Try to go to bed and wake up at the same time every day, even on weekends. This can help regulate your body's internal clock and improve the quality of your sleep.\n\n2. Create a sleep-conducive environment: Make sure your bedroom is dark, quiet, and cool. Consider using earplugs, an eye mask, or a white noise machine if necessary.\n\n3. Practice good sleep hygiene: Avoid caffeine and screens (like phones, tablets, and computers) for a few hours before bedtime. These can interfere with your ability to fall asleep and stay asleep.\n\n4. If you're experiencing sleep disorders like insomnia or sleep apnea, consult with a healthcare professional: They can provide appropriate treatment and management strategies. Sleep disorders can significantly impact the quality of your sleep and overall health, so it's important to seek professional help if you're experienci

In [64]:
sources = [doc.metadata.get("source", None) for doc, _score in results]
formatted_response = f"Response: {response_text}\nSources: {sources}"
print(formatted_response)

Response: content="Based on the provided context, here are some pieces of sleep advice:\n\n1. Establish a consistent sleep routine: Try to go to bed and wake up at the same time every day, even on weekends. This can help regulate your body's internal clock and improve the quality of your sleep.\n\n2. Create a sleep-conducive environment: Make sure your bedroom is dark, quiet, and cool. Consider using earplugs, an eye mask, or a white noise machine if necessary.\n\n3. Practice good sleep hygiene: Avoid caffeine and screens (like phones, tablets, and computers) for a few hours before bedtime. These can interfere with your ability to fall asleep and stay asleep.\n\n4. If you're experiencing sleep disorders like insomnia or sleep apnea, consult with a healthcare professional: They can provide appropriate treatment and management strategies. Sleep disorders can significantly impact the quality of your sleep and overall health, so it's important to seek professional help if you're experienci

----------------