```bash
!pip install poetry
```
```bash
!poetry install
```
### or

```bash
!pip install -r requirements.txt
```

In [None]:
# Importing the required libraries
import os
from typing import List, Optional

from dotenv import load_dotenv
from openai import OpenAI
from pinecone import Pinecone, ServerlessSpec

load_dotenv()

In [None]:
# Constants
OPENAI_MODEL = "gpt-4o"
MAX_TOKENS = 225
TEMPERATURE = 0.7
OPENAI_EMBEDDING_MODEL = "text-embedding-3-small"

In [None]:
# Setting up the Pinecone client
pinecone_client = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
spec = ServerlessSpec(cloud=os.getenv("PINECONE_CLOUD"), region=os.getenv("PINECONE_REGION"))
index_name = os.getenv("PINECONE_INDEX_NAME")

In [None]:
# Setting up the OpenAI client
openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

In [None]:
# chat history
history = [
    "1: User: Hi there! How are you doing today? | Bot: Hello! I'm doing great, thank you! How can I assist you today?",
    "2: User: What's the weather like today in New York? | Bot: Today in New York, it's sunny with a slight chance of rain.",
    "3: User: Great! Do you have any good lunch suggestions? | Bot: Sure! How about trying a new salad recipe?",
    "4: User: That sounds healthy. Any specific recipes? | Bot: You could try a quinoa salad with avocado and chicken.",
    "5: User: Sounds delicious! I'll try it. What about dinner? | Bot: For dinner, you could make grilled salmon with vegetables.",
    "6: User: Thanks for the suggestions! Any dessert ideas? | Bot: How about a simple fruit salad or yogurt with honey?",
    "7: User: Perfect! Now, what are some good exercises? | Bot: You can try a mix of cardio and strength training exercises.",
    "8: User: Any specific recommendations for cardio? | Bot: Running, cycling, and swimming are all excellent cardio exercises.",
    "9: User: I'll start with running. Can you recommend any books? | Bot: 'Atomic Habits' by James Clear is a highly recommended book.",
    "10: User: I'll check it out. What hobbies can I take up? | Bot: You could explore painting, hiking, or learning a new instrument.",
    "11: User: Hiking sounds fun! Any specific trails? | Bot: There are great trails in the Rockies and the Appalachian Mountains.",
    "12: User: I'll plan a trip. What about indoor activities? | Bot: Indoor activities like reading, cooking, or playing board games.",
    "13: User: Nice! Any good board games? | Bot: Settlers of Catan and Ticket to Ride are both excellent choices.",
    "14: User: I'll try them out. Any movie recommendations? | Bot: 'Inception' and 'The Matrix' are must-watch movies.",
    "15: User: I love those movies! Any TV shows? | Bot: 'Breaking Bad' and 'Stranger Things' are very popular.",
    "16: User: Great choices! What about podcasts? | Bot: 'How I Built This' and 'The Daily' are very informative.",
    "17: User: Thanks! What are some good travel destinations? | Bot: Paris, Tokyo, and Bali are amazing travel spots.",
    "18: User: I'll add them to my list. Any packing tips? | Bot: Roll your clothes to save space and use packing cubes.",
    "19: User: That's helpful! What about travel insurance? | Bot: Always get travel insurance for safety and peace of mind.",
    "20: User: Thanks for the tips! Any last advice? | Bot: Just enjoy your journey and make the most out of your experiences."
]

In [None]:
def initialize_pinecone_index(pinecone_index_name: Optional[str] = 'chat-history'):
    """
    Initialize a Pinecone index for storing chat history.

    Parameters:
    pinecone_index_name (str, optional): The name of the Pinecone index. Defaults to 'chat-history'.

    Returns:
    pinecone.Index: The initialized Pinecone index, or None if the index does not exist.
    """
    pinecone_index = None
    try:

        # Check if the index exists
        if pinecone_index_name in pinecone_client.list_indexes().names():
            pinecone_index = pinecone_client.Index(pinecone_index_name)
        else:
            print(f"Index '{pinecone_index_name}' does not exist.")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
    finally:
        print("Finished initializing Pinecone index.")
        return pinecone_index

In [None]:
def get_embeddings(message: str, model: Optional[str] = OPENAI_EMBEDDING_MODEL):
    """
    Encode a message using OpenAI's text-embedding model.

    Parameters:
    message (str): The message to be encoded.
    model (str, optional): The model to be used for encoding. Defaults to OPENAI_EMBEDDING_MODEL.

    Returns:
    list: The encoded message.
    """
    return openai_client.embeddings.create(input=message, model=model).data[0].embedding

In [None]:
def add_embeddings_to_pinecone(chat_history: List[str],
                               pinecone_index_name: Optional[str] = 'chat-history'):
    """
    Add embeddings to Pinecone index.

    Parameters:
    chat_history (List[str]): The chat history to be added to the Pinecone index.
    pinecone_index_name (str, optional): The name of the Pinecone index. Defaults to 'chat-history'.

    Raises:
    ValueError: If the specified Pinecone index does not exist.

    Returns:
    None
    """
    try:
        # Initialize Pinecone index
        pinecone_index = initialize_pinecone_index(pinecone_index_name)
        if pinecone_index is None:
            raise ValueError(f"Index {pinecone_index_name} does not exist.")

        embeddings = []
        for i, message in enumerate(chat_history):
            embedding = get_embeddings(message)
            embeddings.append({
                'id': str(i + 1),
                'values': embedding,
                'metadata': {'text': message}
            })
        pinecone_index.upsert(vectors=embeddings)

    except ValueError as ve:
        print(f"ValueError: {ve}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
    finally:
        print("Finished processing embeddings.")

In [None]:
add_embeddings_to_pinecone(history, pinecone_client)

In [None]:
def retrieve_relevant_history(query: str, top_k: Optional[int] = 2,
                              pinecone_index_name: Optional[str] = 'chat-history'):
    """
    Retrieve the most relevant message from the chat history based on the query.

    Parameters:
    query (str): The query to be used for searching the chat history.
    top_k (int, optional): The number of most relevant messages to retrieve. Defaults to 2.
    pinecone_index_name (str, optional): The name of the Pinecone index. Defaults to 'chat-history'.

    Raises:
    ValueError: If the specified Pinecone index does not exist.

    Returns:
    str: The most relevant message from the chat history.
    """
    try:
        # Initialize Pinecone index
        pinecone_index = initialize_pinecone_index(pinecone_index_name)
        if pinecone_index is None:
            raise ValueError(f"Index {pinecone_index_name} does not exist.")

        # Get embeddings for the query
        query_embedding = get_embeddings(query)

        # Search for the most relevant message in the chat history
        search_results = pinecone_index.query(vector=[query_embedding], top_k=top_k,
                                              include_metadata=True)

        # Get the most relevant message [top 2] based on the similarity score
        relevant_message = '\n'.join([r['metadata']['text'] for r in search_results['matches']])

        return relevant_message

    except ValueError as ve:
        print(f"ValueError: {ve}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
    finally:
        print("Finished retrieving relevant history.")

In [None]:
def prepare_system_prompt(query: str):
    """
    Prepare the prompt for the OpenAI chat completion API.

    Parameters:
    query (str): The user query.

    Returns:
    str: The prepared prompt.
    str: The context referred to in the prompt.
    """
    # Retrieve the most relevant chat history based on the user's query
    context = retrieve_relevant_history(query)

    # Prepare the prompt template
    prompt_template = ("You are an AI assistant named Viamagus. "
                       "Your role is to assist the user by answering their queries based on the context provided. "
                       "You have access to the following context:\n"
                       f"{context}\n"
                       )

    return prompt_template, context

In [None]:
def chat_with_openai(query: str):
    """
    Chat with the OpenAI chat completion API.

    Parameters:
    query (str): The user query.

    Returns:
    str: The response from the OpenAI chat completion API.
    str: The context referred to in the response.
    """
    system_prompt, context_referred = prepare_system_prompt(query)
    res = openai_client.chat.completions.create(
        model=OPENAI_MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": query}
        ],
        max_tokens=MAX_TOKENS,
        temperature=TEMPERATURE
    )
    return res.choices[0].message.content, context_referred

In [None]:
def test_final_prompt():
    query = "Do you think it will help me stay fit?"
    response, context = chat_with_openai(query)
    print(f"Final Test Prompt: {query}")
    print(f"Context Referred: {context}")
    print(f"Final Test Prompt Response: {response}")


In [None]:
test_final_prompt()

# Open Soure Embedding Model

In [22]:
from sentence_transformers import SentenceTransformer

In [37]:
embedding_model = SentenceTransformer('BAAI/bge-large-en-v1.5')

In [58]:
def get_embeddings_open_source(messages: str, model: SentenceTransformer):
    """
    Encode a list of messages using an open-source text-embedding model.

    Parameters:
    messages (List[str]): The list of messages to be encoded.
    model (SentenceTransformer): The open-source text-embedding model.

    Returns:
    list: The encoded messages.
    """
    return model.encode(messages, normalize_embeddings=True)

In [59]:
# print(get_embeddings_open_source("Hello, how are you?", embedding_model))

In [60]:
# print(len(get_embeddings_open_source("Hello, how are you?", embedding_model)))

In [61]:
# print(get_embeddings(message="Hello, how are you?"))

In [62]:
# print(len(get_embeddings(message="Hello, how are you?")))

In [67]:
def add_embeddings_to_pinecone_open_source(chat_history: List[str],
                                           pinecone_index_name: Optional[str] = 'opensource-embeddings'):
    """
    Add embeddings to Pinecone index using an open-source text-embedding model.

    Parameters:
    chat_history (List[str]): The chat history to be added to the Pinecone index.
    pinecone_index_name (str, optional): The name of the Pinecone index. Defaults to 'chat-history-open-source'.

    Raises:
    ValueError: If the specified Pinecone index does not exist.

    Returns:
    None
    """
    try:
        # Initialize Pinecone index
        pinecone_index = initialize_pinecone_index(pinecone_index_name)
        if pinecone_index is None:
            raise ValueError(f"Index {pinecone_index_name} does not exist.")

        embeddings = []
        for i, message in enumerate(chat_history):
            embedding = get_embeddings_open_source(message, embedding_model)
            embeddings.append({
                'id': str(i + 1),
                'values': embedding.tolist(),
                'metadata': {'text': message}
            })
        pinecone_index.upsert(vectors=embeddings)

    except ValueError as ve:
        print(f"ValueError: {ve}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
    finally:
        print("Finished processing embeddings.")

In [68]:
add_embeddings_to_pinecone_open_source(history)

Finished initializing Pinecone index.
[ 0.06640825 -0.00498162 -0.03351139 ... -0.00099763 -0.02269631
 -0.04536788]
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
[ 0.03010496  0.00481044  0.00705468 ... -0.04326796 -0.03929177
 -0.03224383]
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
[ 0.04942119  0.00351219 -0.01418415 ... -0.01627847  0.00035702
 -0.02105516]
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
[ 0.01347936  0.01195142 -0.04309826 ... -0.00621914  0.0069764
 -0.03176572]
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
[ 0.03412233  0.00378344 -0.04075072 ... -0.02442858  0.01617874
 -0.01458771]
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
[ 0.04094969 -0.00814151 -0.02709641 ... -0.03023439  0.0178825
 -0.02095387]
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
[ 0.04217416  0.03770748 -0.00560683 ...  0.01342603  0.00257149
 -0.02553049]
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
[ 0.03823996  0.04391637 -0.00206581 ...  0.03683512  0.02549322
 -0.002443

In [74]:
def retrieve_relevant_history_open_source(query: str, top_k: Optional[int] = 2,
                                          pinecone_index_name: Optional[str] = 'opensource-embeddings'):
    """
    Retrieve the most relevant message from the chat history based on the query using an open-source text-embedding model.

    Parameters:
    query (str): The query to be used for searching the chat history.
    top_k (int, optional): The number of most relevant messages to retrieve. Defaults to 2.
    pinecone_index_name (str, optional): The name of the Pinecone index. Defaults to 'chat-history-open-source'.

    Raises:
    ValueError: If the specified Pinecone index does not exist.

    Returns:
    str: The most relevant message from the chat history.
    """
    try:
        # Initialize Pinecone index
        pinecone_index = initialize_pinecone_index(pinecone_index_name)
        if pinecone_index is None:
            raise ValueError(f"Index {pinecone_index_name} does not exist.")

        # Get embeddings for the query
        query_embedding = get_embeddings_open_source(query, embedding_model)

        # Search for the most relevant message in the chat history
        search_results = pinecone_index.query(vector=[query_embedding.tolist()], top_k=top_k,
                                              include_metadata=True)

        # Get the most relevant message [top 2] based on the similarity score
        relevant_message = '\n'.join([r['metadata']['text'] for r in search_results['matches']])

        return relevant_message

    except ValueError as ve:
        print(f"ValueError: {ve}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
    finally:
        print("Finished retrieving relevant history.")

In [75]:
def prepare_system_prompt_open_source(query: str):
    """
    Prepare the prompt for the OpenAI chat completion API using an open-source text-embedding model.

    Parameters:
    query (str): The user query.

    Returns:
    str: The prepared prompt.
    str: The context referred to in the prompt.
    """
    # Retrieve the most relevant chat history based on the user's query
    context = retrieve_relevant_history_open_source(query)

    # Prepare the prompt template
    prompt_template = ("You are an AI assistant named Viamagus. "
                       "Your role is to assist the user by answering their queries based on the context provided. "
                       "You have access to the following context:\n"
                       f"{context}\n"
                       )

    return prompt_template, context

In [76]:
def chat_with_openai_open_source(query: str):
    """
    Chat with the OpenAI chat completion API using an open-source text-embedding model.

    Parameters:
    query (str): The user query.

    Returns:
    str: The response from the OpenAI chat completion API.
    str: The context referred to in the response.
    """
    system_prompt, context_referred = prepare_system_prompt_open_source(query)
    res = openai_client.chat.completions.create(
        model=OPENAI_MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": query}
        ],
        max_tokens=MAX_TOKENS,
        temperature=TEMPERATURE
    )
    return res.choices[0].message.content, context_referred

In [83]:
def test_final_prompt_open_source():
    query = "Do you think it will help me stay fit?"
    response, context = chat_with_openai_open_source(query)
    print(f"\nFinal Test Prompt: \n{query}\n")
    print(f"Context Referred: \n{context}\n")
    print(f"Final Test Prompt Response: \n{response}\n")

In [84]:
test_final_prompt_open_source()

Finished initializing Pinecone index.
Finished retrieving relevant history.

Final Test Prompt: 
Do you think it will help me stay fit?

Context Referred: 
7: User: Perfect! Now, what are some good exercises? | Bot: You can try a mix of cardio and strength training exercises.
8: User: Any specific recommendations for cardio? | Bot: Running, cycling, and swimming are all excellent cardio exercises.
9: User: I'll start with running. Can you recommend any books? | Bot: 'Atomic Habits' by James Clear is a highly recommended book.

Final Test Prompt Response: 
Yes, 'Atomic Habits' by James Clear can be very helpful in staying fit. The book focuses on building good habits and breaking bad ones, which can be critical in maintaining a consistent fitness routine. By applying the principles outlined in the book, you can create sustainable habits that support your fitness goals, such as regular running and other exercises.

