# LangChain RetrievalQA Example

## Setup and Imports

In [None]:
!pip install --upgrade --quiet  langchain langchain-community langchainhub langchain-openai chromadb sentence_transformers

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m811.8/811.8 kB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m509.0/509.0 kB[0m [31m14.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m132.8/132.8 kB[0m [31m15.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m239.4/239.4 kB[0m [31m17.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m55.7/55.7 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m226.7/226.7 kB[0m [31m16.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m20.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━

In [None]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain_community.document_loaders import TextLoader
import dotenv
from langchain.memory import ConversationBufferMemory
from langchain_community.embeddings import HuggingFaceBgeEmbeddings


## Environment Variables Setup

In [None]:
from google.colab import userdata
import os

os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')

## Load and Process Documents

In [None]:
loader = TextLoader("aipg.txt")
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50, length_function=len)
splits = text_splitter.split_documents(documents)

len(splits)


9

## Embeddings and Vector Database Setup

In [None]:
embeddings = HuggingFaceBgeEmbeddings(model_name="BAAI/bge-small-en-v1.5")
vectorDB = Chroma.from_documents(splits, embeddings)

#REPLACE HuggingFaceBgeEmbeddings(model_name="BAAI/bge-small-en-v1.5")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/93.0k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

## Memory Setup

In [None]:
memory = ConversationBufferMemory(memory_key="history", input_key="query", output_key='answer', return_messages=True)


## Query Setup and Template

In [None]:
input_variables = ['context', 'question']
template_str = "You are an assistant for question-answering tasks. Use the following pieces of retrieved information to answer the question consicely.\nQuestion:{question} \nUse this information to answer the question:{context} \nAnswer:\n"
prompt_template = PromptTemplate(input_variables=input_variables, template=template_str)


## LLM and RetrievalQA Setup

In [None]:
#ChatOpenAI(openai_api_base="https://differ-mill-n-cricket.trycloudflare.com", openai_api_key="random", max_tokens=1024) for local model usage
llm = ChatOpenAI()
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorDB.as_retriever(),
    chain_type_kwargs={"prompt": prompt_template},
    return_source_documents = True
)



## Execute Query

In [None]:
##query = "who's jojo?"
query = "What is the relation between aipg and the electric powergrid?"
response = qa.invoke(query)

#print(response)
st = (f"Answer:\n=======\n{response['result']}\n\n")
st += (f"Sources:\n========\n")
for doc in response['source_documents']:
    st += (f"#######:\n{doc.page_content}\n")

print(st)


Answer:
The relation between AI Power Grid (AIPG) and the electric power grid is not explicitly mentioned in the provided information. However, AI Power Grid is a project that focuses on democratizing AI and utilizes blockchain and AI technologies. For more information on the project and roadmap, you can visit their website at https://aipowergrid.io/.

Sources:
#######:
8) Where can I get more information on the project and roadmap?
Our website has an overview roadmap as well as a link to the White Paper at https://aipowergrid.io/

9) Where can I mine AIPG?
The official AIPG pool is https://pool.aipowergrid.io/. Other pools can be found here: https://miningpoolstats.stream/aipowergrid
#######:
@half - Founder
As the driving force behind AI Power Grid, half brings a wealth of experience and a deep passion for blockchain and AI technologies. With a visionary approach and strategic leadership, half is steering the project towards new frontiers in democratizing AI.
#######:
@Mandark - Bloc

In [None]:
print(llm.invoke("What are you"))

https://python.langchain.com/docs/use_cases/question_answering/