In [9]:
#!pip install weaviate-client

Collecting weaviate-client
  Using cached weaviate_client-3.26.0-py3-none-any.whl.metadata (3.4 kB)
Collecting validators<1.0.0,>=0.21.2 (from weaviate-client)
  Using cached validators-0.22.0-py3-none-any.whl.metadata (4.7 kB)
Collecting authlib<2.0.0,>=1.2.1 (from weaviate-client)
  Using cached Authlib-1.3.0-py2.py3-none-any.whl.metadata (3.8 kB)
Using cached weaviate_client-3.26.0-py3-none-any.whl (120 kB)
Using cached Authlib-1.3.0-py2.py3-none-any.whl (223 kB)
Using cached validators-0.22.0-py3-none-any.whl (26 kB)
Installing collected packages: validators, authlib, weaviate-client
  Attempting uninstall: validators
    Found existing installation: validators 0.20.0
    Uninstalling validators-0.20.0:
      Successfully uninstalled validators-0.20.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
streamlit 1.24.0 requires importlib-metadata<7,>=1.4, 

In [5]:
import os
import openai
from dotenv import load_dotenv

In [6]:
load_dotenv('.env')

True

In [7]:
weaviate_url = os.environ['WEAVIATE_URL']

In [8]:
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain_community.embeddings.openai import OpenAIEmbeddings
from langchain_community.vectorstores import Weaviate

In [9]:
from langchain_community.document_loaders import TextLoader


loader = TextLoader("/home/susearc/Documents/github/Weaviate_Openai/gut.txt") #loads the text file
documents =  loader.load() #load as a document
text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=10) #Create a splitter
docs = text_splitter.split_documents(documents) #apply the splitter and split the documnents


embeddings = OpenAIEmbeddings() #load the embeddings



In [32]:
db = Weaviate.from_documents(docs, embeddings, weaviate_url = weaviate_url, by_text=False)

In [33]:
query = "cancer?"
docs = db.similarity_search(query)

In [34]:
print(docs[0].page_content)

Artificial Intelligence and Computer Vision
The term artificial intelligence refers to computer systems performing complex tasks that would normally require the use of the human brain, such as visual perception (“computer vision”), speech recognition, and decision making.6 Early attempts at polyp detection required explicit programming of software to recognize certain polyp features (eg, textures and shapes).7 These early efforts were focused on recognizing still images because computer-processing speed at that time could not support real-time, live video image analysis. Since then, major advances in deep-learning algorithms using convolutional neural networks have dramatically expanded the capabilities of computer vision for endoscopy. These contemporary algorithms are trained on large data sets and can adapt and “learn” to recognize complex objects in live video.8 The most important applications of AI computer vision in colonoscopy today include computer-aided detection (CADe) and co

Authentication

In [10]:
import weaviate

client = weaviate.Client(url = weaviate_url)

In [36]:
vectorstore = Weaviate.from_documents(documents, embeddings, client=client, by_text=False)

In [38]:
#The return distance is measured by cosine distance
#Sometimes we might want to perform the search, but also obtain relevancy score to know how good is a particular result. The returned distance score is cosine distance.Therefore, a lower score is better.


docs = db.similarity_search_with_score(query, by_text =False)
docs[0]

(Document(page_content='Artificial Intelligence and Computer Vision\nThe term artificial intelligence refers to computer systems performing complex tasks that would normally require the use of the human brain, such as visual perception (“computer vision”), speech recognition, and decision making.6 Early attempts at polyp detection required explicit programming of software to recognize certain polyp features (eg, textures and shapes).7 These early efforts were focused on recognizing still images because computer-processing speed at that time could not support real-time, live video image analysis. Since then, major advances in deep-learning algorithms using convolutional neural networks have dramatically expanded the capabilities of computer vision for endoscopy. These contemporary algorithms are trained on large data sets and can adapt and “learn” to recognize complex objects in live video.8 The most important applications of AI computer vision in colonoscopy today include computer-aide

Maximal marginal relevance search (MMR)


In [39]:
retriever = db.as_retriever(search_type="mmr")
retriever.get_relevant_documents(query)[0]

Document(page_content='Artificial Intelligence and Computer Vision\nThe term artificial intelligence refers to computer systems performing complex tasks that would normally require the use of the human brain, such as visual perception (“computer vision”), speech recognition, and decision making.6 Early attempts at polyp detection required explicit programming of software to recognize certain polyp features (eg, textures and shapes).7 These early efforts were focused on recognizing still images because computer-processing speed at that time could not support real-time, live video image analysis. Since then, major advances in deep-learning algorithms using convolutional neural networks have dramatically expanded the capabilities of computer vision for endoscopy. These contemporary algorithms are trained on large data sets and can adapt and “learn” to recognize complex objects in live video.8 The most important applications of AI computer vision in colonoscopy today include computer-aided

USE CASES

In [11]:
# LLMS dont have access to outside knowledge the training data. Thus vector stores come in handy to provide LLMs with additonal context


from langchain_community.chat_models import ChatOpenAI


llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=1)
llm.predict("How AI will detect cancer in gut?")  #Not  he answer we were expecting

'Artificial intelligence (AI) can be used in various ways to detect cancer in the gut. Here are a few potential methods:\n\n1. Image analysis: AI algorithms can analyze images obtained through endoscopic procedures like colonoscopy or capsule endoscopy, where a camera or imaging device captures images of the gut. AI can help in identifying abnormal growths, lesions, or tumors that could potentially be cancerous.\n\n2. Pattern recognition: AI can be trained on large datasets of medical images, including both healthy and diseased gut images. By learning from these datasets, AI models can recognize patterns and anomalies that may be indicative of cancer, allowing for early detection.\n\n3. Biomarker analysis: AI algorithms can assist in analyzing various biomarkers like specific proteins, DNA mutations, or chemical markers found in blood, tissue, or stool samples. These biomarkers could be associated with the presence of cancer in the gut, and AI can aid in detecting and classifying these

Question Answering with Sources

In [12]:
from langchain.chains import RetrievalQAWithSourcesChain
from langchain_community.llms import OpenAI

In [13]:
with open("/home/susearc/Documents/github/Weaviate_Openai/gut.txt") as f:
    state_of_the_union = f.read()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_text(state_of_the_union)

In [15]:
docsearch = Weaviate.from_texts(
    texts,
    embeddings,
    weaviate_url=weaviate_url,
    by_text=False,
    metadatas=[{"source": f"{i}-pl"} for i in range(len(texts))],
)

In [16]:
chain = RetrievalQAWithSourcesChain.from_chain_type(
    OpenAI(temperature=0), chain_type="stuff", retriever=docsearch.as_retriever()
)

In [17]:
chain(
    {"question": "What did the article say"},
    return_only_outputs=True,
)

{'answer': ' The article discusses the use of artificial intelligence and computer vision in colonoscopy, specifically for the detection of colorectal polyps. It mentions the use of convolutional neural networks and their success in detecting polyps, as well as the potential for improving adenoma detection and other performance metrics. \n',
 'sources': '0-pl'}

Retrieval-Augmented Generation

In [19]:
with open("/home/susearc/Documents/github/Weaviate_Openai/gut.txt") as f:
    state_of_the_union = f.read()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_text(state_of_the_union)

In [21]:
docsearch = Weaviate.from_texts(
    texts,
    embeddings,
    weaviate_url=weaviate_url,
    by_text=False,
    metadatas=[{"source": f"{i}-pl"} for i in range(len(texts))],
)

retriever = docsearch.as_retriever()

In [22]:
from langchain.prompts import ChatPromptTemplate

template = """You are a helpful assistant, Use the following pieces of retrieved context to answer the question


Question: {question}
Context: {context}
Answer:
"""

prompt = ChatPromptTemplate.from_template(template)
print(prompt)

input_variables=['context', 'question'] messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template='You are a helpful assistant, Use the following pieces of retrieved context to answer the question\n\n\nQuestion: {question}\nContext: {context}\nAnswer:\n'))]


In [23]:
from langchain_community.chat_models import ChatOpenAI

llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

In [24]:


from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

rag_chain = (
    {"context": retriever, "question":RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)


rag_chain.invoke("What ml framework does it uses?")

'The ml framework used in this context is convolutional neural networks.'