# Embeddings with Mistral7B model

Installing necessary packages

In [2]:
!pip install -q llama-index
!pip install -q cohere
!pip install -q transformers
!pip install -q accelerate
!pip install -q optimum[exporters]
!pip install -q InstructorEmbedding
!pip install -q sentence_transformers
!pip install -q pypdf
!pip install -q llama-cpp-python
!pip install -q sentence-transformers
!pip install -q langchain

In [3]:
# Initailising LlamaCPP
!CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install  llama-cpp-python --no-cache-dir



In [4]:
from llama_index import VectorStoreIndex, SimpleDirectoryReader
from IPython.display import Markdown, display

In [5]:
documents = SimpleDirectoryReader("data").load_data()

In [6]:
documents

[Document(id_='4ed7ce59-c6ec-4a8f-9cb4-533c453b09b2', embedding=None, metadata={'file_path': 'data/doctors.txt', 'file_name': 'doctors.txt', 'file_type': 'text/plain', 'file_size': 1111, 'creation_date': '2023-12-18', 'last_modified_date': '2023-12-18', 'last_accessed_date': '2023-12-18'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, hash='0854705f59f98d2192d26db3000a0699113dc3a7c81d7df3b44d374e3ff6320f', text='Name,Category\nDr. Bharat Murthy,Orthopaedics (Bone Specialist)\nDr. Priya Sharma,Cardiology (Heart Specialist)\nDr. Rajesh Patel,Neurology (Neurologist)\nDr. Ayesha Khan,Dermatology (Dermatologist)\nDr. Vikram Singh,ENT (Ear, Nose, and Throat Specialist)\nDr. Nisha Verma,Pediatrics (Pediatrician)\nDr. Anjali Gupta,Gynecology (Gynecologist)\nDr. Mani

In [7]:
import torch

from llama_index.llms import LlamaCPP
from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt
llm = LlamaCPP(
    # You can pass in the URL to a GGML model to download it automatically
    model_url='https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf',
    # optionally, you can set the path to a pre-downloaded model instead of model_url
    model_path=None,
    temperature=0.1,
    max_new_tokens=256,
    # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
    context_window=3900,
    # kwargs to pass to __call__()
    generate_kwargs={},
    # kwargs to pass to __init__()
    # set to at least 1 to use GPU
    model_kwargs={"n_gpu_layers": -1},
    # transform inputs into Llama2 format
    messages_to_prompt=messages_to_prompt,
    completion_to_prompt=completion_to_prompt,
    verbose=True,
)


Downloading url https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf to path /tmp/llama_index/models/mistral-7b-instruct-v0.1.Q4_K_M.gguf
total size (MB): 4368.44


4167it [00:38, 109.14it/s]                          
AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | 


In [8]:
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index import ServiceContext
from llama_index.embeddings import LangchainEmbedding

embed_model = LangchainEmbedding(
  HuggingFaceEmbeddings(model_name="thenlper/gte-large")
)


.gitattributes:   0%|          | 0.00/1.52k [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/191 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/67.9k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/619 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/670M [00:00<?, ?B/s]

onnx/config.json:   0%|          | 0.00/632 [00:00<?, ?B/s]

model.onnx:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

onnx/special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

onnx/tokenizer.json:   0%|          | 0.00/712k [00:00<?, ?B/s]

onnx/tokenizer_config.json:   0%|          | 0.00/342 [00:00<?, ?B/s]

onnx/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/670M [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/57.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/712k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/342 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/385 [00:00<?, ?B/s]

In [9]:
service_context = ServiceContext.from_defaults(
    chunk_size=256,
    llm=llm,
    embed_model=embed_model
)

[nltk_data] Downloading package punkt to /tmp/llama_index...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [10]:
index = VectorStoreIndex.from_documents(documents, service_context=service_context)

In [11]:
query_engine = index.as_query_engine()
response=query_engine.query("I've pain in my bone, which doctor should I consult?")

In [None]:
print(response)

 Based on the given context information, it appears that Dr. Bharat Murthy is a specialist in Orthopaedics (Bone Specialist). Therefore, if you are experiencing pain in your bones, it may be beneficial to consult with Dr. Bharat Murthy.


In [None]:
response=query_engine.query("Which doctor can cure my headache?")

In [None]:
print(response)

In [None]:
response=query_engine.query("I've pain in my ears, which doctor should I consult?")
print(response)

In [None]:
response=query_engine.query("I’ve a burning feeling in my eyes. Which doctor should I consult?")
print(response)

In [None]:
response=query_engine.query(" I've been having a fever for the past 2 days, which doctor should I consult?")
print(response)