In [23]:
!pip install --q chromadb
!pip install --q langchain-text-splitters
!pip install --q unstructured langchain
!pip install --q "unstructured[all-docs]"

In [24]:
!pip install --q pillow_heif 
!pip install --q matplotlib 
!pip install --q unstructured_inference

### Start Code


In [37]:
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma

from langchain_community.document_loaders import UnstructuredPDFLoader
from langchain_community.document_loaders import OnlinePDFLoader

In [26]:
local_path = "graphql_org_learn_queries.pdf"

# Local PDF file uploads
if local_path:
    loader = UnstructuredPDFLoader(file_path=local_path)
    data = loader.load()
else:
    print("Upload a PDF file")

In [27]:
# Preview first page
data[0].page_content

'📣 GraphQLConf 2024 • Sept 10-12 • San Francisco • Check out the Schedule & Get Your Ticket • …\n\nQueries and Mutations\n\nOn this page, you’ll learn in detail about how to query a GraphQL server.\n\nFields\n\nAt its simplest, GraphQL is about asking for speci\x00c \x00elds on objects. Let’s\n\nstart by looking at a very simple query and the result we get when we run it:\n\n{ hero { name } }\n\n{ "data": { "hero": { "name": "R2-D2" } } }\n\nYou can see immediately that the query has exactly the same shape as the\n\nresult. This is essential to GraphQL, because you always get back what you\n\nexpect, and the server knows exactly what \x00elds the client is asking for.\n\nThe \x00eld name returns a String type, in this case the name of the main hero of Star Wars, "R2-D2" .\n\nOh, one more thing - the query above is interactive. That means you can\n\nchange it as you like and see the new result. Try adding an appearsIn \x00eld to the hero object in the query, and see the new result.\n\nI

In [38]:
# Split and chunk
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=7500, chunk_overlap=100)
chunks = text_splitter.split_documents(data)

In [None]:
!ollama pull mistral
!ollama pull nomic-embed-text

In [94]:

!ollama list

44361.94s - pydevd: Sending message related to process being replaced timed-out after 5 seconds


NAME                   	ID          	SIZE  	MODIFIED      
mistral:latest         	2ae6f6dd7a3d	4.1 GB	2 minutes ago	
nomic-embed-text:latest	0a109f422b47	274 MB	4 minutes ago	
moondream:latest       	55fc3abd3867	1.7 GB	6 days ago   	
llama3:latest          	a6990ed6be41	4.7 GB	6 weeks ago  	


In [84]:

embeddings = OllamaEmbeddings(model="nomic-embed-text", show_progress=True)
vector_db = Chroma(collection_name="local-rag", embedding_function=embeddings)

In [81]:

# #chroma_instance.add_documents(chunks)

# # Add to vector database
# vector_db = Chroma.from_documents(
#     documents=chunks,
#     embedding=OllamaEmbeddings(model="nomic-embed-text", show_progress=True),
#     collection_name="local-rag"
# )

In [85]:
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.chat_models import ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain.retrievers.multi_query import MultiQueryRetriever

In [86]:
# LLM from Ollama
local_model = "mistral"
llm = ChatOllama(model=local_model)

In [87]:
QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate five
    different versions of the given user question to retrieve relevant documents from
    a vector database. By generating multiple perspectives on the user question, your
    goal is to help the user overcome some of the limitations of the distance-based
    similarity search. Provide these alternative questions separated by newlines.
    Original question: {question}""",
)

In [88]:
retriever = MultiQueryRetriever.from_llm(
    vector_db.as_retriever(),
    llm,
    prompt=QUERY_PROMPT
)

# RAG prompt
template = """Answer the question based ONLY on the following context:
{context}
Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

In [89]:
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [107]:

from IPython.display import Markdown, display
answer = chain.invoke(
    "create a graphql query: schema name is BLX001T i need a CCN, TIN,NPI and where groupnumber is start with 786. give me a output in a json discription,graphql,other")
display(Markdown(answer))









[A[A[A[A[A[A[A[A







OllamaEmbeddings: 100%|██████████| 1/1 [00:01<00:00,  1.61s/it]








OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00, 16.58it/s]








OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00, 13.28it/s]








OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00, 10.32it/s]








OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00, 11.12it/s]








OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00, 14.26it/s]








OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00, 12.54it/s]








OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00, 12.91it/s]








OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00, 12.77it/s]


 Based on the context provided, here's an example GraphQL query for the schema `BLX001T` to fetch the fields `CCN`, `TIN`, `NPI`, where the `groupnumber` starts with `786`. The result will be returned in JSON format.

```graphql
query GetData {
  BLX001T(where: {groupnumber: {beginsWith: "786"}}) {
    CCN
    TIN
    NPI
  }
}
```

This query will return a JSON object containing the `CCN`, `TIN`, and `NPI` for all records in the `BLX001T` schema where the `groupnumber` starts with `786`.