<a href="https://colab.research.google.com/github/Huangjian2013/ai-demo/blob/main/rag/11-LlamaIndex-pdf-rag.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [67]:
!pip install llama-index --quiet
!pip install faiss-cpu --quiet
!pip install llama-index-vector-stores-faiss --quiet
!pip install llama-index-embeddings-openai --quiet
!pip install openai --quiet
!pip install tiktoken --quiet

In [68]:
from google.colab import userdata
from llama_index.core import Settings
from llama_index.core import SimpleDirectoryReader,VectorStoreIndex
from llama_index.vector_stores.faiss import FaissVectorStore
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core.text_splitter import SentenceSplitter
from llama_index.core.ingestion import IngestionPipeline
import faiss
import openai

In [69]:
dimensions_size = 512
openai.api_key = userdata.get('REAL_OPENAI_KEY')
Settings.embedding_model = OpenAIEmbedding(dimensions = dimensions_size)

In [70]:
node_reader = SimpleDirectoryReader(input_files=["sample_data/极越01.pdf"])
documents = node_reader.load_data()

In [71]:
faiss_index = faiss.IndexFlatL2(dimensions_size)
vector_store = FaissVectorStore(faiss_index=faiss_index)

In [72]:
text_splitter = SentenceSplitter(chunk_size=200)
pipeline = IngestionPipeline(
    transformations=[text_splitter],
    vector_store=vector_store,
)

In [73]:
notes = pipeline.run(documents=documents)

In [74]:
vector_store_index = VectorStoreIndex(notes)
retrievers = vector_store_index.as_retriever(similarity_top_k=1)

In [75]:
context = retrievers.retrieve("极越01的型号尺寸是多少？")
for i in context:
  print("-----------------------------")
  print(i.node.get_text())

-----------------------------
[22] 
外观方面  
播报 
编辑 
外观方面，极越 01采用了自主的设计理念，其长宽高分别为 4853/1990/1611 毫米，轴距为
3000毫米。 [16] 
内饰方面  
播报 
编辑 
进入车内，极简的设计风格给人以概念车的既视感。一块 35.6英寸6k超清一体屏悬浮于中
控台之上，丰富的功能选项大量简化物理按键，包括可用语音操作的 3D智驾，营造科幻座
舱氛围。  [15] 
价格方面  
 
 
极越01峰值功率 400kW、零百加速不到 4s、最大续航里程超 700km。


In [76]:
query_engine = vector_store_index.as_query_engine()
response = query_engine.query("极越01的型号尺寸是多少？")
print(response)


4853/1990/1611 毫米
