In [1]:
# # Add '--use-deprecated=legacy-resolver' to resolve dependency error

# !pip install -q python-dotenv --use-deprecated=legacy-resolver
# !pip install -q langchain --use-deprecated=legacy-resolver
# !pip install -q langchain_community --use-deprecated=legacy-resolver
# !pip install -q langchain_experimental --use-deprecated=legacy-resolver

# # If using Cohere:
# !pip install -q langchain-cohere --use-deprecated=legacy-resolver

# # Using IRIS:
# !pip install -q langchain-iris --use-deprecated=legacy-resolver

In [2]:
import os
from dotenv import load_dotenv, find_dotenv
from langchain_cohere import ChatCohere

In [3]:
# # https://docs.cohere.com/docs/cohere-and-langchain
# # class `Cohere` was deprecated in LangChain 0.1.14

_ = load_dotenv(find_dotenv()) # read local .env file
cohere_api_key = "YOUR_COHERE_API_KEY"

# https://docs.cohere.com/docs/models
model = "command"

temperature = 0
llm = ChatCohere(model=model,temperature=0, cohere_api_key=cohere_api_key)

In [4]:
username = 'superuser'
password = 'sys'
hostname = os.getenv('IRIS_HOSTNAME', 'localhost')
port = '1972'
namespace = 'TEST'
CONNECTION_STRING = f"iris://{username}:{password}@{hostname}:{port}/{namespace}"
print(CONNECTION_STRING)

iris://superuser:sys@localhost:1972/TEST


In [5]:
from langchain_iris import IRISVector

In [6]:
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import TextLoader
from langchain_cohere import CohereEmbeddings

In [7]:
# https://cohere.com/blog/search-cohere-langchain
# https://openexchange.intersystems.com/package/iris-vector-search
# refer to TestIRIS_Cohere.ipynb

loader = TextLoader("factsheet_example.txt", encoding='utf-8')
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=40, chunk_overlap=5)
docs = text_splitter.split_documents(documents)

# initialise embedding model
embeddings = CohereEmbeddings(cohere_api_key=cohere_api_key)

In [8]:
COLLECTION_NAME = "factsheet_test"

# initialises the database with the given documents and embeddings
db_rag = IRISVector.from_documents(
    embedding=embeddings,
    documents=docs,
    collection_name=COLLECTION_NAME,
    connection_string=CONNECTION_STRING,
)

In [9]:
print(f"Number of docs in vector store: {len(db_rag.get()['ids'])}")

query = "Joint patrols to catch traffickers"

'''
Cosine Distance: Measures the dissimilarity between vectors as the complement of the cosine similarity.
Higher values mean greater dissimilarity.
'''
docs_with_score = db_rag.similarity_search_with_score(query, k=3)
print(docs_with_score)

Number of docs in vector store: 54
[(Document(page_content='Explore the hidden patterns, uncover', metadata={'source': 'factsheet_example.txt'}), 0.836592449933092), (Document(page_content='revealing the fascinating connections', metadata={'source': 'factsheet_example.txt'}), 0.872788998919718), (Document(page_content='on a captivating journey through the', metadata={'source': 'factsheet_example.txt'}), 0.88672400818712)]


In [10]:
# docs = vectordb.similarity_search(question,k=3)

# run with mmr!!more diverse results to extract impt info??
# db.max_marginal_relevance_search(query, k=2, fetch_k=3)

In [11]:
retriever = db_rag.as_retriever()
print(retriever)

tags=['IRISVector'] vectorstore=<langchain_iris.vectorstores.IRISVector object at 0x000002133F832A50>


In [12]:
from langchain.sql_database import SQLDatabase
from langchain_experimental.sql import SQLDatabaseChain
from langchain.prompts.prompt import PromptTemplate

In [13]:
# https://community.intersystems.com/post/langchain-fixed-sql-me

_DEFAULT_TEMPLATE = """Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer.

The only table available is DiseaseProfile.

Use the following format:

Question: "Question here"
SQLQuery: "SQL Query to run"
SQLResult: "Result of the SQLQuery"
Answer: "Final answer here"

The SQL query should NOT end with semi-colon
Question: {input}"""

PROMPT = PromptTemplate(
    input_variables=["input", "dialect"], template=_DEFAULT_TEMPLATE
)

db_sql = SQLDatabase.from_uri(CONNECTION_STRING) 
db_chain = SQLDatabaseChain.from_llm(llm=llm, db=db_sql, prompt=PROMPT, verbose=True) 

In [14]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate

template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

retrieval_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | db_chain
)

In [15]:
retrieval_chain.invoke("how many rows are there?")



[1m> Entering new SQLDatabaseChain chain...[0m
messages=[HumanMessage(content="Answer the question based only on the following context:\n[Document(page_content='Columns and Usage:', metadata={'source': 'factsheet_example.txt'}), Document(page_content='The columns are Disease VARCHAR(512),', metadata={'source': 'factsheet_example.txt'}), Document(page_content='Fatigue VARCHAR(25),', metadata={'source': 'factsheet_example.txt'}), Document(page_content='Fever VARCHAR(25), Cough VARCHAR(25),', metadata={'source': 'factsheet_example.txt'})]\n\nQuestion: how many rows are there?\n")]
SQLQuery:[32;1m[1;3mSELECT COUNT(*) FROM DiseaseProfile
SQLResult:[0m
SQLResult: [33;1m[1;3m[(349,)][0m
Answer:[32;1m[1;3m349[0m
[1m> Finished chain.[0m


{'query': ChatPromptValue(messages=[HumanMessage(content="Answer the question based only on the following context:\n[Document(page_content='Columns and Usage:', metadata={'source': 'factsheet_example.txt'}), Document(page_content='The columns are Disease VARCHAR(512),', metadata={'source': 'factsheet_example.txt'}), Document(page_content='Fatigue VARCHAR(25),', metadata={'source': 'factsheet_example.txt'}), Document(page_content='Fever VARCHAR(25), Cough VARCHAR(25),', metadata={'source': 'factsheet_example.txt'})]\n\nQuestion: how many rows are there?\n")]),
 'result': '349'}

In [16]:
retrieval_chain.invoke("find the disease and number of occurances among patients below 30 who have normal cholesterol level")



[1m> Entering new SQLDatabaseChain chain...[0m
messages=[HumanMessage(content="Answer the question based only on the following context:\n[Document(page_content='Cholesterol Level: The cholesterol', metadata={'source': 'factsheet_example.txt'}), Document(page_content='level of the patient (Normal/High).', metadata={'source': 'factsheet_example.txt'}), Document(page_content='level of the patient (Normal/High).', metadata={'source': 'factsheet_example.txt'}), Document(page_content='or assessment for the specific disease', metadata={'source': 'factsheet_example.txt'})]\n\nQuestion: find the disease and number of occurances among patients below 30 who have normal cholesterol level\n")]
SQLQuery:[32;1m[1;3mSELECT Disease, Count(*) FROM DiseaseProfile WHERE Age < 30 AND CholesterolLevel = 'Normal' GROUP BY Disease
SQLResult:[0m
SQLResult: [33;1m[1;3m[('ASTHMA', 3), ('COMMON COLD', 2), ('DEPRESSION', 1), ('DIABETES', 1), ('ECZEMA', 2), ('GASTROENTERITIS', 1), ('HYPERTHYROIDISM', 2), (

{'query': ChatPromptValue(messages=[HumanMessage(content="Answer the question based only on the following context:\n[Document(page_content='Cholesterol Level: The cholesterol', metadata={'source': 'factsheet_example.txt'}), Document(page_content='level of the patient (Normal/High).', metadata={'source': 'factsheet_example.txt'}), Document(page_content='level of the patient (Normal/High).', metadata={'source': 'factsheet_example.txt'}), Document(page_content='or assessment for the specific disease', metadata={'source': 'factsheet_example.txt'})]\n\nQuestion: find the disease and number of occurances among patients below 30 who have normal cholesterol level\n")]),
 'result': 'Answer: The disease profile for patients below 30 years of age, with a normal cholesterol level, is as follows:\n\nDisease | Number of occurrences\n---- | ----\nASTHMA | 3\nCOMMON COLD | 2\nDEPRESSION | 1\nDIABETES | 1\nECZEMA | 2\nGASTROENTERITIS | 1\nHYPERTHYROIDISM | 2\nINFLUENZA | 3\nLIVER CANCER | 1\nPANCREATIT