In [1]:
!pip install -q llama-index
!pip install -q openai
!pip install -q transformers
!pip install -q accelerate
!pip install -q optimum[exporters]
!pip install -q InstructorEmbedding
!pip install -q sentence_transformers
!pip install -q pypdf

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m850.8/850.8 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m15.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m24.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.0/77.0 kB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.1/143.1 kB[0m [31m13.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.7/4.7 MB[0m [31m38.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.4/49.4 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m484.3/484.3 kB[0m [31m39.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
import openai
import os
openai.api_key = "Enter Your OpenAI Key"

In [4]:
from llama_index.llms import OpenAI
from llama_index import VectorStoreIndex, SimpleDirectoryReader
from IPython.display import Markdown, display

In [5]:
documents = SimpleDirectoryReader("data").load_data()
index = VectorStoreIndex.from_documents(documents)
query_engine = index.as_query_engine()
response = query_engine.query("What did the author do growing up? ")

[nltk_data] Downloading package punkt to /tmp/llama_index...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [6]:
from llama_index import ServiceContext, set_global_service_context

llm = OpenAI(
    model = "gpt-3.5-turbo",
    temperature = 0,
    max_tokens = 256
)
service_context = ServiceContext.from_defaults(
    llm = llm,
    chunk_size = 800,
    chunk_overlap = 20
)
index = VectorStoreIndex.from_documents(
    documents,
    service_context = service_context
)

## Customize Embeddings model

In [7]:
from llama_index import ServiceContext
from llama_index.embeddings import OpenAIEmbedding

In [8]:
openai_embed = OpenAIEmbedding()
service_context = ServiceContext.from_defaults(embed_model=openai_embed)

In [9]:
text_embeddings = openai_embed.get_text_embedding("AI is awesome!")

In [10]:
len(text_embeddings)

1536

### Local embeddings from hugging face
Massive Text Embedding Benchmark (MTEB) [Leaderboard](https://huggingface.co/spaces/mteb/leaderboard)

### bge embedding

In [11]:
from llama_index.embeddings import HuggingFaceEmbedding

In [12]:
embed_model_bge = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
text_embeddings = embed_model_bge.get_text_embedding("AI is awesome!")
print(len(text_embeddings))

Downloading (…)lve/main/config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/134M [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/394 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

384


### Instructor embedding

In [13]:
from llama_index.embeddings import InstructorEmbedding

In [14]:
embed_model_inst = InstructorEmbedding(model_name="hkunlp/instructor-base")
text_embeddings = embed_model_inst.get_text_embedding("AI is awesome!")
print(len(text_embeddings))

Downloading (…)62736/.gitattributes:   0%|          | 0.00/1.48k [00:00<?, ?B/s]

Downloading (…)_Pooling/config.json:   0%|          | 0.00/270 [00:00<?, ?B/s]

Downloading (…)/2_Dense/config.json:   0%|          | 0.00/115 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/2.36M [00:00<?, ?B/s]

Downloading (…)15e6562736/README.md:   0%|          | 0.00/66.2k [00:00<?, ?B/s]

Downloading (…)e6562736/config.json:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

Downloading (…)ce_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/439M [00:00<?, ?B/s]

Downloading (…)nce_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

Downloading spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

Downloading (…)62736/tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/2.43k [00:00<?, ?B/s]

Downloading (…)6562736/modules.json:   0%|          | 0.00/461 [00:00<?, ?B/s]

load INSTRUCTOR_Transformer
max_seq_length  512
768


## Speed Benchmarking

In [15]:
!curl https://www.ipcc.ch/report/ar6/wg2/downloads/report/IPCC_AR6_WGII_Chapter03.pdf --output IPCC_AR6_WGII_Chapter03.pdf

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 20.7M  100 20.7M    0     0   249k      0  0:01:25  0:01:25 --:--:--  257k


In [16]:
from llama_index import VectorStoreIndex, SimpleDirectoryReader

In [17]:
documents = SimpleDirectoryReader(
    input_files = ["IPCC_AR6_WGII_Chapter03.pdf"]
).load_data()

In [18]:
print(f"Number of documents: {len(documents)}")

Number of documents: 172


### OpenAI embeddings

In [19]:
embed_model_openai = OpenAIEmbedding()
service_context = ServiceContext.from_defaults(embed_model=embed_model_openai)

In [20]:
%%timeit -r 2 -n 2
index = VectorStoreIndex.from_documents(
    documents, service_context=service_context, show_progress=True
)

Parsing documents into nodes:   0%|          | 0/172 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/428 [00:00<?, ?it/s]

Parsing documents into nodes:   0%|          | 0/172 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/428 [00:00<?, ?it/s]

Parsing documents into nodes:   0%|          | 0/172 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/428 [00:00<?, ?it/s]

Parsing documents into nodes:   0%|          | 0/172 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/428 [00:00<?, ?it/s]

18.2 s ± 697 ms per loop (mean ± std. dev. of 2 runs, 2 loops each)


### BGE embeddings

In [21]:
service_context = ServiceContext.from_defaults(embed_model=embed_model_bge)

In [22]:
%%timeit -r 2 -n 2
index = VectorStoreIndex.from_documents(
    documents, service_context=service_context, show_progress=True)

Parsing documents into nodes:   0%|          | 0/172 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/428 [00:00<?, ?it/s]

Parsing documents into nodes:   0%|          | 0/172 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/428 [00:00<?, ?it/s]

Parsing documents into nodes:   0%|          | 0/172 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/428 [00:00<?, ?it/s]

Parsing documents into nodes:   0%|          | 0/172 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/428 [00:00<?, ?it/s]

8.61 s ± 40.2 ms per loop (mean ± std. dev. of 2 runs, 2 loops each)


### Instructor embeddings

In [23]:
service_context = ServiceContext.from_defaults(embed_model=embed_model_inst)

In [24]:
%%timeit -r 2 -n 2
index = VectorStoreIndex.from_documents(
    documents, service_context=service_context, show_progress=True)

Parsing documents into nodes:   0%|          | 0/172 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/428 [00:00<?, ?it/s]

Parsing documents into nodes:   0%|          | 0/172 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/428 [00:00<?, ?it/s]

Parsing documents into nodes:   0%|          | 0/172 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/428 [00:00<?, ?it/s]

Parsing documents into nodes:   0%|          | 0/172 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/428 [00:00<?, ?it/s]

18.8 s ± 648 ms per loop (mean ± std. dev. of 2 runs, 2 loops each)
