In [1]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))


from IPython.display import Markdown, display
import torch
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.core.prompts import PromptTemplate
from modelscope import snapshot_download
from llama_index.core.base.embeddings.base import BaseEmbedding, Embedding
from abc import ABC
from typing import Any, List, Optional, Dict, cast
from llama_index.core import (
    VectorStoreIndex,
    ServiceContext,
    set_global_service_context,
    SimpleDirectoryReader,
)

2024-05-29 02:20:57,686 - modelscope - INFO - PyTorch version 2.2.2 Found.
2024-05-29 02:20:57,687 - modelscope - INFO - Loading ast index from /Users/david/.cache/modelscope/ast_indexer
2024-05-29 02:20:57,710 - modelscope - INFO - Loading done! Current index file version is 1.14.0, with md5 1f6f1117e9c05769579dc20579dc0521 and a total number of 976 components indexed


In [16]:
# Model names 
qwen2_4B_CHAT = "qwen/Qwen1.5-4B-Chat"

selected_model = snapshot_download(qwen2_4B_CHAT)

SYSTEM_PROMPT = """You are a helpful AI assistant.
"""

query_wrapper_prompt = PromptTemplate(
    "[INST]<<SYS>>\n" + SYSTEM_PROMPT + "<</SYS>>\n\n{query_str}[/INST] "
)

llm = HuggingFaceLLM(
    context_window=4096,
    max_new_tokens=128,
    generate_kwargs={"temperature": 0.5, "do_sample": True},
    query_wrapper_prompt=query_wrapper_prompt,
    tokenizer_name=selected_model,
    model_name=selected_model,
    # device_map="auto",
    # change these settings below depending on your GPU
    model_kwargs={"torch_dtype": torch.float16},
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


## 加载本地数据库(需要先存放数据再执行)

In [17]:
# load documents
documents = SimpleDirectoryReader("database").load_data()
documents  # 展示documents的内容

[Document(id_='8e9a2270-a202-40c6-ac72-6f62a3f01bcf', embedding=None, metadata={'file_path': '/Users/david/Documents/my_rag/demo/database/data.txt', 'file_name': 'data.txt', 'file_type': 'text/plain', 'file_size': 226, 'creation_date': '2024-05-29', 'last_modified_date': '2024-05-29'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, text='上海有许多特色的小吃，例如炒肝，糖葫芦，黄鳝面等等，其中糖葫芦最受人们欢迎。\n北京是个历史悠久的城市，也有许多美食，像胡辣汤、土笋冻都是非常有名的食物。', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n')]

In [18]:
embedding_model = "damo/nlp_gte_sentence-embedding_chinese-small"
class ModelScopeEmbeddings4LlamaIndex(BaseEmbedding, ABC):
    embed: Any = None
    model_id: str = "damo/nlp_gte_sentence-embedding_chinese-small"

    def __init__(
            self,
            model_id: str,
            **kwargs: Any,
    ) -> None:
        super().__init__(**kwargs)
        try:
            from modelscope.models import Model
            from modelscope.pipelines import pipeline
            from modelscope.utils.constant import Tasks
            # 使用modelscope的embedding模型（包含下载）
            self.embed = pipeline(Tasks.sentence_embedding, model=self.model_id)

        except ImportError as e:
            raise ValueError(
                "Could not import some python packages." "Please install it with `pip install modelscope`."
            ) from e

    def _get_query_embedding(self, query: str) -> List[float]:
        text = query.replace("\n", " ")
        inputs = {"source_sentence": [text]}
        return self.embed(input=inputs)['text_embedding'][0].tolist()

    def _get_text_embedding(self, text: str) -> List[float]:
        text = text.replace("\n", " ")
        inputs = {"source_sentence": [text]}
        return self.embed(input=inputs)['text_embedding'][0].tolist()

    def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]:
        texts = list(map(lambda x: x.replace("\n", " "), texts))
        inputs = {"source_sentence": texts}
        return self.embed(input=inputs)['text_embedding'].tolist()

    async def _aget_query_embedding(self, query: str) -> List[float]:
        return self._get_query_embedding(query)


embeddings = ModelScopeEmbeddings4LlamaIndex(model_id=embedding_model)

2024-05-29 02:22:37,007 - modelscope - INFO - initiate model from /Users/david/.cache/modelscope/hub/damo/nlp_gte_sentence-embedding_chinese-small
2024-05-29 02:22:37,009 - modelscope - INFO - initiate model from location /Users/david/.cache/modelscope/hub/damo/nlp_gte_sentence-embedding_chinese-small.
2024-05-29 02:22:37,015 - modelscope - INFO - initialize model from /Users/david/.cache/modelscope/hub/damo/nlp_gte_sentence-embedding_chinese-small
2024-05-29 02:22:37,324 - modelscope - INFO - cuda is not available, using cpu instead.


In [19]:

service_context = ServiceContext.from_defaults(embed_model=embeddings, llm=llm)
set_global_service_context(service_context)

index = VectorStoreIndex.from_documents(documents)
index

  service_context = ServiceContext.from_defaults(embed_model=embeddings, llm=llm)


<llama_index.core.indices.vector_store.base.VectorStoreIndex at 0x3a5821ca0>

In [20]:
# set Logging to DEBUG for more detailed outputs
query_engine = index.as_query_engine()

In [21]:
response = query_engine.query("上海特色小吃有什么")
print(response)

例如
