In [1]:
# import env libraries
import os
from dotenv import load_dotenv
# import chatbot libraries
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferWindowMemory
from langchain.chains import ConversationalRetrievalChain
#import pdf embedding libraries
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyMuPDFLoader
# import pinecone
import pinecone
from langchain.vectorstores import Pinecone
import time

load_dotenv()

def initialize_pinecone(index_name):
    pinecone.init(
        api_key=os.environ.get('PINECONE_API_KEY'),
        environment=os.environ.get('PINECONE_ENVIRONMENT')
    )

    # create index if it does not exist
    if index_name not in pinecone.list_indexes():
        pinecone.create_index(
            index_name,
            dimension=1536,
            metric='cosine'
        )
    # wait for index to finish initialization
    while not pinecone.describe_index(index_name).status['ready']:
        time.sleep(1)

def initialize_chatbot(index_name):
    chatbot = ChatOpenAI(
        openai_api_key=os.environ["OPENAI_API_KEY"],
        model='gpt-3.5-turbo',
        temperature=0.2
    )

    embeddings = OpenAIEmbeddings(openai_api_key=os.environ["OPENAI_API_KEY"])
    vectorstore = Pinecone.from_existing_index(index_name, embeddings)
    retriever = vectorstore.as_retriever()

    memory = ConversationBufferWindowMemory(
        k=10,
        memory_key="chat_history",
        return_messages=True
    )

    qa = ConversationalRetrievalChain.from_llm(llm=chatbot, retriever=retriever, memory=memory)
    return qa

def embed_pdfs(file_path):
    loader = PyMuPDFLoader(file_path)
    data = loader.load()

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=0)
    all_splits = text_splitter.split_documents(data)

    embeddings = OpenAIEmbeddings(openai_api_key=os.environ["OPENAI_API_KEY"])
    Pinecone.from_documents(documents=all_splits, embedding=embeddings())

def chat(query, qa):
    response = qa(query)

    print(response["answer"])

  from tqdm.autonotebook import tqdm


In [2]:
index_name = "recipes"

initialize_pinecone(index_name)
chatbot = initialize_chatbot(index_name)

In [6]:
query = "what is the nutrition information for the boiled egg recipe?"
chat(query, chatbot)

The nutrition information for the boiled egg recipe is as follows:
- Calories: 77.5kcal
- Carbohydrates: 0.6g
- Protein: 6.3g
- Fat: 5.3g
- Saturated Fat: 1.6g
- Cholesterol: 186.5mg
- Sodium: 62mg
- Sugar: 0.6g
