In [None]:
!pip install -q openai pinecone langchain langchain-community langchain-openai langchain-pinecone transformers datasets tiktoken

In [None]:
import time
from pinecone import Pinecone as PineconeClient, ServerlessSpec

In [None]:
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_pinecone import PineconeVectorStore
from langchain.chains import RetrievalQA
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [None]:
import os
from getpass import getpass

OPENAI_API_KEY = getpass("Enter your OpenAI API key: ")
PINECONE_API_KEY = getpass("Enter your Pinecone API key: ")

os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY

In [None]:
from google.colab import files
import io

print('Please upload your business data file (e.g., .txt, .md).')
uploaded = files.upload()

if not uploaded:
    raise ValueError('No file was uploaded. Please run the cell again.')

# Get the content of the uploaded file
file_name = next(iter(uploaded))
business_data_content = uploaded[file_name].decode('utf-8')

print(f'Successfully uploaded {file_name}.')

In [None]:
# The name of the file that the RAG system will read from
rag_file_name = "business_data.txt"
with open(rag_file_name, "w") as f:
    f.write(business_data_content)

In [None]:
loader = TextLoader(f'./{rag_file_name}')
documents = loader.load()

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
docs = text_splitter.split_documents(documents)

In [None]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

In [None]:
pc = PineconeClient(api_key=PINECONE_API_KEY)
index_name = "rag-qa-bot-business"
if index_name not in pc.list_indexes().names():
    print(f"Creating a new index: {index_name}")
    pc.create_index(
        name=index_name,
        dimension=1536,
          metric='cosine',
        spec=ServerlessSpec(
            cloud='aws',
            region='us-east-1'
        )
    )
    time.sleep(1)
    print("Index created.")
else:
    print(f"Index '{index_name}' is already there.")

Index 'rag-qa-bot-business' is already there.


In [None]:
index = pc.Index(index_name)

docsearch = PineconeVectorStore(
    index=index,
    embedding=embeddings,
    text_key='text'
)

docsearch.add_documents(docs)

['8307185e-4ce8-4355-a886-531672ae8239',
 '1c0e7b61-b0f2-48d7-9b7e-51da6db5f79b']

In [None]:
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.0)

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=docsearch.as_retriever(),
    return_source_documents=True
)

In [None]:
def ask_question(query):
  result = qa_chain.invoke({"query": query})
  print("\nANSWER:")
  print(result["result"])
  print("\nSOURCES:")
  for source in result["source_documents"]:
        print("  Source: " + source.page_content[:160])
  print("_________________________________")


user_input = input("Question: ")

if user_input:
    ask_question(user_input)
else:
    print("No question was entered.")


Question: Where is the office located?

ANSWER:
InnovateTech Solutions has its headquarters in San Francisco, California, with satellite offices in Austin, Texas, and Bangalore, India.

SOURCES:
  Source: InnovateTech Solutions - Company FAQ

1. What is InnovateTech Solutions?
InnovateTech Solutions is a leading provider of custom software development, cloud comp
  Source: InnovateTech Solutions - Company FAQ

1. What is InnovateTech Solutions?
InnovateTech Solutions is a leading provider of custom software development, cloud comp
  Source: InnovateTech Solutions - Company FAQ

1. What is InnovateTech Solutions?
InnovateTech Solutions is a leading provider of custom software development, cloud comp
  Source: 4. How can I get a quote for a project?
To get a quote, please visit our website's "Contact Us" page and fill out the project inquiry form. Our solutions team w
_________________________________
