In [1]:
!pip install transformers pinecone-client sentence-transformers
!pip install --upgrade --quiet pinecone-client pinecone-text pinecone-notebooks



In [2]:
pinecone_api_key = '2fe780c8-c067-46f5-90cf-a77211c57445'
PINECONE_CLOUD = 'aws'
PINECONE_REGION = 'us-east-1'

In [3]:
history = [
    "1: User: Hi there! How are you doing today? | Bot: Hello! I'm doing great, thank you! How can I assist you today?",
    "2: User: What's the weather like today in New York? | Bot: Today in New York, it's sunny with a slight chance of rain.",
    "3: User: Great! Do you have any good lunch suggestions? | Bot: Sure! How about trying a new salad recipe?",
    "4: User: That sounds healthy. Any specific recipes? | Bot: You could try a quinoa salad with avocado and chicken.",
    "5: User: Sounds delicious! I'll try it. What about dinner? | Bot: For dinner, you could make grilled salmon with vegetables.",
    "6: User: Thanks for the suggestions! Any dessert ideas? | Bot: How about a simple fruit salad or yogurt with honey?",
    "7: User: Perfect! Now, what are some good exercises? | Bot: You can try a mix of cardio and strength training exercises.",
    "8: User: Any specific recommendations for cardio? | Bot: Running, cycling, and swimming are all excellent cardio exercises.",
    "9: User: I'll start with running. Can you recommend any books? |Bot: 'Atomic Habits' by James Clear is a highly recommended book.",
    "10: User: I'll check it out. What hobbies can I take up? | Bot: You could explore painting, hiking, or learning a new instrument.",
    "11: User: Hiking sounds fun! Any specific trails? | Bot: There aregreat trails in the Rockies and the Appalachian Mountains.",
    "12: User: I'll plan a trip. What about indoor activities? | Bot: Indoor activities like reading, cooking, or playing board games.",
    "13: User: Nice! Any good board games? | Bot: Settlers of Catan and Ticket to Ride are both excellent choices.",
    "14: User: I'll try them out. Any movie recommendations? | Bot: 'Inception' and 'The Matrix' are must-watch movies.",
    "15: User: I love those movies! Any TV shows? | Bot: 'Breaking Bad' and 'Stranger Things' are very popular.",
    "16: User: Great choices! What about podcasts? | Bot: 'How I Built This' and 'The Daily' are very informative.",
    "17: User: Thanks! What are some good travel destinations? | Bot: Paris, Tokyo, and Bali are amazing travel spots.",
    "18: User: I'll add them to my list. Any packing tips? | Bot: Roll your clothes to save space and use packing cubes.",
    "19: User: That's helpful! What about travel insurance? | Bot: Always get travel insurance for safety and peace of mind.",
    "20: User: Thanks for the tips! Any last advice? | Bot: Just enjoy your journey and make the most out of your experiences."]


In [4]:
from pinecone import Pinecone,ServerlessSpec
import os
from sentence_transformers import SentenceTransformer
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import warnings

warnings.filterwarnings("ignore")

# Initialize Pinecone
pc = Pinecone(pinecone_api_key)

index_name = "chat-history"

# Delete the index if it exists
if index_name in pc.list_indexes():
    pc.delete_index(index_name)

if index_name not in pc.list_indexes().names():
    pc.create_index(
            name=index_name,
            dimension=384,
            metric='euclidean',
            spec=ServerlessSpec(
                cloud = PINECONE_CLOUD,
                region = PINECONE_REGION
            )
        )

# Connect to the index
index = pc.Index(index_name)

# Initialize the sentence transformer model for embeddings
embedding_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

# Initialize the text generation model and tokenizer
model_name = 'gpt2'
text_gen_model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)



In [5]:
# Store conversation history embeddings in Pinecone
for i, message in enumerate(history):
    try:
        embedding = embedding_model.encode(message).tolist()
        index.upsert([(str(i), embedding)])
    except Exception as e:
        print(f"Error upserting message {i}: {e}")

#  function to retrieve relevant messages
def retrieve_relevant_messages(query, top_k=5):
    try:
        # Compute the query embedding
        query_embedding = embedding_model.encode(query).tolist()

        # Query Pinecone with keyword arguments
        results = index.query(
            vector=query_embedding,
            top_k=top_k
        )

        # Extract relevant messages from the results
        relevant_messages = [history[int(match['id'])] for match in results['matches']]
        return relevant_messages
    except Exception as e:
        print(f"Error retrieving relevant messages: {e}")
        return []




In [6]:
def prepare_prompt(test_prompt, history, top_k=5, max_tokens=225):
    """

    Args:
      test_prompt:
      history:
      top_k:
      max_tokens:

    Returns:

    """
    # Retrieve relevant messages
    relevant_messages = retrieve_relevant_messages(test_prompt, top_k)

    # Combine messages and test prompt
    context = " ".join(relevant_messages)
    combined_prompt = f"{context}\nUser: {test_prompt}\nBot:"

    # Ensure combined prompt does not exceed the maximum token limit
    tokenized_prompt = tokenizer(combined_prompt, return_tensors='pt', truncation=True, max_length=max_tokens)
    if tokenized_prompt['input_ids'].shape[1] > max_tokens:
        combined_prompt = tokenizer.decode(tokenized_prompt['input_ids'][0, :max_tokens].tolist(), skip_special_tokens=True)

    # Return the combined prompt and the context
    return combined_prompt, context

In [8]:
final_test_prompt = "Do you think it will help me stay fit?"

def test_final_prompt(final_test_prompt):
    # Define the final test prompt


    # Prepare the prompt using the prepare_prompt function
    prepared_prompt, context_referred = prepare_prompt(final_test_prompt, history)

    # Print the results
    print("Final Test Prompt:")
    print(final_test_prompt)
    print("\nChat History Context Referenced:")
    print(context_referred)
    print("\nPrepared Prompt:")
    print(prepared_prompt)

# Call the function to test
test_final_prompt(final_test_prompt)


Final Test Prompt:
Do you think it will help me stay fit?

Chat History Context Referenced:
7: User: Perfect! Now, what are some good exercises? | Bot: You can try a mix of cardio and strength training exercises. 20: User: Thanks for the tips! Any last advice? | Bot: Just enjoy your journey and make the most out of your experiences. 8: User: Any specific recommendations for cardio? | Bot: Running, cycling, and swimming are all excellent cardio exercises. 10: User: I'll check it out. What hobbies can I take up? | Bot: You could explore painting, hiking, or learning a new instrument. 4: User: That sounds healthy. Any specific recipes? | Bot: You could try a quinoa salad with avocado and chicken.

Prepared Prompt:
7: User: Perfect! Now, what are some good exercises? | Bot: You can try a mix of cardio and strength training exercises. 20: User: Thanks for the tips! Any last advice? | Bot: Just enjoy your journey and make the most out of your experiences. 8: User: Any specific recommendation