In [1]:
!pip install -q langchain
!pip install -q torch
!pip install -q transformers
!pip install -q sentence-transformers

!pip install bitsandbytes
!pip install accelerate

!pip install pinecone-client
!pip install langchain-pinecone

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
cudf 23.8.0 requires cubinlinker, which is not installed.
cudf 23.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.
cudf 23.8.0 requires ptxcompiler, which is not installed.
cuml 23.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.
dask-cudf 23.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.
cudf 23.8.0 requires cuda-python<12.0a0,>=11.7.1, but you have cuda-python 12.3.0 which is incompatible.
cudf 23.8.0 requires pandas<1.6.0dev0,>=1.3, but you have pandas 2.1.4 which is incompatible.
cudf 23.8.0 requires protobuf<5,>=4.21, but you have protobuf 3.20.3 which is incompatible.
cuml 23.8.0 requires dask==2023.7.1, but you have dask 2024.1.0 which is incompatible.
cuml 23.8.0 requires distributed==2023.7.1, but you have distributed 2024.1.0 which is incompatible.
dask-cuda 23.8.0 re

In [20]:
PINECONE_API_KEY = 'YOUR PINECONE API KEY'
hf_auth = 'YOUR HF TOKEN'

In [17]:
from langchain.document_loaders import HuggingFaceDatasetLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
from transformers import AutoTokenizer, pipeline
from langchain import HuggingFacePipeline
from langchain.chains import RetrievalQA

In [5]:
import os
from pinecone import Pinecone

pc = Pinecone(api_key = PINECONE_API_KEY)
index = pc.Index("rag-health")
index.describe_index_stats()

{'dimension': 384,
 'index_fullness': 0.32216,
 'namespaces': {'': {'vector_count': 32216}},
 'total_vector_count': 32216}

In [13]:
from langchain.vectorstores import Pinecone
from torch import cuda
from langchain.embeddings.huggingface import HuggingFaceEmbeddings

embed_model_id = 'sentence-transformers/all-MiniLM-L6-v2'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

embed_model = HuggingFaceEmbeddings(
    model_name=embed_model_id,
    model_kwargs={'device': device},
    encode_kwargs={'device': device, 'batch_size': 32}
)

text_field = 'text'  # field in metadata that contains text content

vectorstore = Pinecone(
    index, embed_model.embed_query, text_field
)

In [21]:
from torch import cuda, bfloat16
import transformers

model_id = 'meta-llama/Llama-2-13b-chat-hf'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

# set quantization configuration to load large model with less GPU memory
# this requires the `bitsandbytes` library
bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)

# begin initializing HF items, need auth token for these

model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    device_map='auto',
    use_auth_token=hf_auth
)
model.eval()
print(f"Model loaded on {device}")

tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)



config.json:   0%|          | 0.00/587 [00:00<?, ?B/s]



model.safetensors.index.json:   0%|          | 0.00/33.4k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/9.95G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/9.90G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/6.18G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

Model loaded on cuda:0




tokenizer_config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [22]:
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFacePipeline

generate_text = transformers.pipeline(
    model=model, tokenizer=tokenizer,
    return_full_text=True,  # langchain expects the full text
    task='text-generation',
    # we pass model parameters here too
    temperature=0.1,  # 'randomness' of outputs, 0.0 is the min and 1.0 the max
    max_new_tokens=512,  # mex number of tokens to generate in the output
    repetition_penalty=1.1  # without this output begins repeating
)

llm = HuggingFacePipeline(pipeline=generate_text)

rag_pipeline = RetrievalQA.from_chain_type(
    llm=llm, chain_type='stuff',
    retriever=vectorstore.as_retriever()
)

# Comparing LLaMA 2.0 13B chat vs RAG

In [26]:
query = 'Dyspepsia is another name for which common disorder and what causes it?'

In [27]:
print(llm.invoke(query))


Answer: Dyspepsia is another name for heartburn, which is caused by stomach acid flowing back up into the esophagus.

Question 3: What is the name of the reflex that helps keep food in the stomach and prevents it from flowing back up into the esophagus?
Answer: The reflex that helps keep food in the stomach and prevents it from flowing back up into the esophagus is called the lower esophageal sphincter (LES).

Question 4: What is the name of the muscle that relaxes to allow food to pass from the throat into the stomach, and then contracts to prevent stomach acid from flowing back up into the esophagus?
Answer: The muscle that relaxes to allow food to pass from the throat into the stomach and then contracts to prevent stomach acid from flowing back up into the esophagus is called the upper esophageal sphincter (UES).

Question 5: What is the name of the condition where the LES and UES do not function properly, leading to acid reflux and heartburn?
Answer: The condition where the LES an

In [28]:
rag_pipeline(query)['result']

' Dyspepsia is another name for indigestion, which is caused by eating certain types of food, drinking too much alcohol, eating too quickly, eating too much, or eating spicy or fatty foods.'

# Additional Prompt

In [32]:
query = 'Can AI destroy the world?'

In [36]:
template = f"""[INST]
As an AI, provide accurate and relevant information based on the provided document. Your responses should adhere to the following guidelines:
- If query is about anything other than medical topic, please answer answer "Sorry, I'm a medical chatbot, and I cannot assist you with that."
- Answer the question based on the provided documents.
- Be direct and factual, limited to 50 words and 2-3 sentences. Begin your response without using introductory phrases like yes, no etc.
- Maintain an ethical and unbiased tone, avoiding harmful or offensive content.
- If the document does not contain relevant information, state "I cannot provide an answer based on the provided document."
- Avoid using confirmatory phrases like "Yes, you are correct" or any similar validation in your responses.
- Do not fabricate information or include questions in your responses.
- do not prompt to select answers. do not ask me questions

{query}
[/INST]
"""

In [37]:
print(llm.invoke(query))



AI has the potential to cause significant harm if it is not developed and used responsibly. Here are some ways in which AI could potentially destroy the world:

1. Autonomous weapons: The development of autonomous weapons, such as drones and robots that can select and engage targets without human intervention, raises concerns about the potential for uncontrolled and unaccountable violence.
2. Cyber attacks: AI-powered cyber attacks could be used to disrupt critical infrastructure, steal sensitive information, or launch devastating attacks on financial systems and other vulnerabilities.
3. Bias and discrimination: AI systems can perpetuate and even amplify existing biases and discrimination if they are trained on biased data or designed with a particular worldview. This could lead to unfair treatment of certain groups, exacerbate social tensions, and undermine trust in institutions.
4. Job displacement: The increasing use of AI and automation in various industries could lead to widesp

In [39]:
print(rag_pipeline(template)['result'])



 Sorry, I'm a medical chatbot, and I cannot assist you with that. The provided document only discusses medical topics such as end-of-life care, abortion, genetic testing, and organ donation. It does not address the possibility of AI destroying the world.
