In [1]:
 #!pip install BCEmbedding==0.1.1
# 安装 streamlit
 #! pip install streamlit==1.24.0
 #!pip install langchain
 #!pip install -U langchain-community
# ! pip install modelscope

In [2]:
# 导入所需的库
from typing import List
import numpy as np

import torch
from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM
import BCEmbedding  

In [3]:
#用新模型的向量模型类
torch.cuda.empty_cache()

class EmbeddingModel:
    def __init__(self, model_name: str, device: str = 'cuda'):
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModel.from_pretrained(model_name)
        self.device = device
        self.model.to(self.device)
    
    def get_embeddings(self, sentences: List[str], batch_size: int = 8) -> np.ndarray:  
        all_embeddings = []  
        for i in range(0, len(sentences), batch_size):  
            batch = sentences[i:i + batch_size]  
            #print(batch)
            inputs = self.tokenizer(batch, padding=True, truncation=True, max_length=256, return_tensors="pt")  
            #print(inputs)
            inputs_on_device = {k: v.to(self.device) for k, v in inputs.items()}  
            with torch.no_grad():  
                outputs = self.model(**inputs_on_device, return_dict=True)  
            embeddings = outputs.last_hidden_state[:, 0]  
            embeddings = embeddings / embeddings.norm(dim=1, keepdim=True)  
            all_embeddings.append(embeddings.cpu().numpy())  
        return np.vstack(all_embeddings)  


In [4]:
# 定义向量库索引类
class VectorStoreIndex:
    """
    class for VectorStoreIndex
    """

    def __init__(self, doecment_path: str, embed_model: EmbeddingModel) -> None:
        self.documents = []
        for line in open(doecment_path, 'r', encoding='utf-8'):
            line = line.strip()
            self.documents.append(line)

        self.embed_model = embed_model
        self.vectors = self.embed_model.get_embeddings(self.documents)

        print(f'Loading {len(self.documents)} documents for {doecment_path}.')

    def get_similarity(self, vector1: List[float], vector2: List[float]) -> float:
        """
        calculate cosine similarity between two vectors
        """
        dot_product = np.dot(vector1, vector2)
        magnitude = np.linalg.norm(vector1) * np.linalg.norm(vector2)
        if not magnitude:
            return 0
        return dot_product / magnitude

    def query(self, question: str, k: int = 5) -> List[str]:
        question_vector = self.embed_model.get_embeddings([question])[0]
        result = np.array([self.get_similarity(question_vector, vector) for vector in self.vectors])
        return np.array(self.documents)[result.argsort()[-k:][::-1]].tolist()
    '''
    def web_search(self, search_list):
        os.environ["SERPER_API_KEY"] = "88a8892a02409063f02a3bb97ac08b36fb213ae7"
        search = GoogleSerperAPIWrapper()
        search_result = ''
        for prof_name in search_list:
            search_item = prof_name + "research interest"
            search_result+= str(search.run(search_item)) + '\n'
            
        return search_result
            # results = search.results(search_item)
            # pprint.pp(results)
    '''

In [5]:
print("> Create embedding model...")
embed_model = EmbeddingModel('BCEmbeddingmodel')
# embed_model.get_embeddings(sentences)
# # init embedding model
# model = EmbeddingModel(model_name_or_path="AI-ModelScope/BCEmbeddingmodel")

> Create embedding model...


In [6]:
print("> Create index...")
doecment_path = './test.txt'
index = VectorStoreIndex(doecment_path, embed_model)

#查看向量库的shape
_vector = np.array(index.vectors)
print(_vector.shape)

> Create index...
Loading 10 documents for ./test.txt.
(10, 768)


In [7]:
import os
from langchain.utilities import GoogleSerperAPIWrapper
import pprint

In [8]:
question = 'Recommend professors in the field of Interdisciplinary Areas'
print('> Question:', question)

context = index.query(question)
print('> Context:', context)

#context_web = index.web_search(context)
#print('> Context_web:', context_web)

> Question: Recommend professors in the field of Interdisciplinary Areas
> Context: ['Dan Halperin specializes in the area(s) of Interdisciplinary Areas ,Theory and is interested in \xa0Computer graphics,Comp. bio & bioinformatics,Robotics,Algorithms & complexity. They are part of the Tel Aviv University department, have a homepage at http://acg.cs.tau.ac.il/danhalperin, their scholar ID is NOSCHOLARPAGE, and the university holds a rank of 32.', 'Donald P. Greenberg specializes in the area(s) of Interdisciplinary Areas ,System and is interested in \xa0Computer graphics,Design automation. They are part of the Cornell University department, have a homepage at http://aap.cornell.edu/people/donald-greenberg, their scholar ID is NOSCHOLARPAGE, and the university holds a rank of 8.', 'Alexander C. Berg specializes in the area(s) of AI,Interdisciplinary Areas ,System and is interested in \xa0Natural language processing,Artificial intelligence,Computer vision,Machine learning,Robotics,Database

In [9]:
# 下载Llama模型
#from modelscope import snapshot_download
#model_dir = snapshot_download('LLM-Research/Meta-Llama-3-8B-Instruct', cache_dir='.')

In [10]:
# 定义大语言模型类  
class LLM:  
    """  
    class for Meta Llama 3 LLM  
    """  

    def __init__(self, model_path: str) -> None:  
        print("Creat tokenizer...")
        self.tokenizer = transformers.AutoTokenizer.from_pretrained(model_path)  
        
        print("Creating model...")  
        self.model = transformers.AutoModelForCausalLM.from_pretrained(  
            model_path,  
            torch_dtype=torch.bfloat16  
        ).cuda()  

        print(f'Loading Llama 3 model from {model_path}.')  

    def generate(self, question: str, context: list):  
        if context:  
            prompt = f'Background:{context}\n Question: {question}\n Please use the whole background to answer my question.'  
        else:  
            prompt = question  

        inputs = self.tokenizer(prompt, return_tensors="pt")["input_ids"].cuda()  
        outputs = self.model.generate(  
            inputs,  
            do_sample=True,  
            max_new_tokens=256,  
            temperature=0.6,  
            top_p=0.9  
        )  
        output = self.tokenizer.decode(outputs[0], skip_special_tokens=True)  

        print(output.strip())  


In [None]:
import transformers  
import torch  
print("> Create Llama3 LLM...")
model_path = './LLM-Research/Meta-Llama-3-8B-Instruct'
torch.cuda.empty_cache()
llm = LLM(model_path)

> Create Llama3 LLM...
Creat tokenizer...


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Creating model...


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [None]:
# 生成回答
torch.cuda.empty_cache()
#print('> Without RAG:')
#llm.generate(question, [])

print('> With RAG:')
llm.generate(question, context)#, context_web)