In [None]:
!pip install transformers
!pip install torch


In [None]:
!pip install -U langchain-community
!pip install sentence-transformers
!pip install chromadb

In [None]:
import os
import torch
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from tqdm import tqdm
from langchain.vectorstores import FAISS

def load_documents(directory):
    documents = []
    supported_formats = ['.txt', '.pdf', '.docx']  # 增加支持的文件格式
    for filename in tqdm(os.listdir(directory), desc="Loading documents"):
        file_extension = os.path.splitext(filename)[1].lower()
        if file_extension in supported_formats:
            try:
                filepath = os.path.join(directory, filename)
                if file_extension == '.txt':
                    with open(filepath, 'r', encoding='utf-8') as file:
                        text = file.read()
                elif file_extension == '.pdf':
                    # 使用 PyPDF2 或其他 PDF 庫來讀取 PDF
                    # text = read_pdf(filepath)
                    pass
                elif file_extension == '.docx':
                    # 使用 python-docx 來讀取 DOCX
                    # text = read_docx(filepath)
                    pass
                filename = os.path.splitext(filename)[0]
                documents.append(Document(page_content=text, metadata={"source": filename}))
            except Exception as e:
                print(f"Error loading {filename}: {e}")
    return documents

def split_documents(documents):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=500,  # 增加塊大小以包含更多上下文
        chunk_overlap=100,
        length_function=len,
        separators=["\n\n", "\n", " ", ""]  # 自定義分隔符
    )
    chunks = text_splitter.split_documents(documents)
    return chunks

"""用FASISS構建向量庫"""
# def create_vectorstore(chunks):
#     device = "cuda" if torch.cuda.is_available() else "cpu"

#     # 使用更先進的嵌入模型
#     embeddings = HuggingFaceEmbeddings(
#         model_name="sentence-transformers/all-mpnet-base-v2",  # 更準確的模型
#         model_kwargs={'device': device}
#     )

#     # 提取文本內容並將其向量化
#     # 假設 chunks 中的每個 Document 對象都有 'page_content' 屬性
#     documents_text = [chunk.page_content for chunk in chunks]
#     vectors = embeddings.embed_documents(documents_text)

#     # 使用 FAISS 構建索引
#     vectorstore = FAISS.from_documents(chunks, embeddings)
#     # 或者使用向量數據創建
#     # vectorstore = FAISS(vectors, chunks)

#     return vectorstore

def create_vectorstore(chunks):
    device = "cuda" if torch.cuda.is_available() else "cpu"

    # 使用更先進的嵌入模型
    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/msmarco-bert-base-dot-v5",  # 更準確的模型
        model_kwargs={'device': device}
    )

    # 添加索引參數以提高檢索效率
    vectorstore = Chroma.from_documents(
        chunks,
        embeddings,
        collection_metadata={"hnsw:space": "cosine"}  # 使用餘弦相似度
    )
    return vectorstore


#### 確認切割文本內容

In [None]:
if __name__ == "__main__":
    directory = '/content/drive/MyDrive/暫放'
    # directory = '/content/test'
    documents = load_documents(directory)

    print(f"載入了 {len(documents)} 個文檔")

    chunks = split_documents(documents)

    print(f"文檔被分割成 {len(chunks)} 個chunks")

    # 打印第一個chunk的完整內容
    if chunks:
        first_chunk = chunks[99]
        print("\n第一個Chunk的完整內容:")
        print(f"內容:\n{first_chunk.page_content}")
        print(f"\n長度: {len(first_chunk.page_content)} 字符")
        print(f"來源: {first_chunk.metadata.get('source', '未知')}")

In [None]:
!pip install langchain_groq

### 初始分割文本及建立向量庫

In [None]:
documents = load_documents("/content/drive/MyDrive/暫放")
chunks = split_documents(documents)
vectorstore = create_vectorstore(chunks)

In [None]:
from langchain.schema.runnable import RunnablePassthrough
from langchain.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
import gradio as gr

def main():
    """
    This function is the main entry point of the application. It sets up the Groq client, the Streamlit interface, and handles the chat interaction.
    """

    # Get Groq API key
    groq_api_key = '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'
    model = 'llama3-8b-8192'
    # Initialize Groq Langchain chat object and conversation
    groq_chat = ChatGroq(
        groq_api_key=groq_api_key,
        model_name=model
    )

    # 設置記憶
    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

    retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 2, "fetch_k": 20}) #5,10
    # 修改提示模板
    template = """你是一個在幫助使用者回答有關Podcast節目相關內容的智能助手。根據提供的檢索到的資料來回答問題。如果信息不足以回答問題，請直接回答"RAG資料庫沒有您想要的資料"，並須注意以繁體中文回答。

檢索資料信息：
{context}

聊天歷史：
{chat_history}

當前問題：{question}

請根據上述信息回答問題。請注意：

1.優先使用檢索資料信息中的內容，但也可以參考聊天歷史。
2.回答時提供時間戳，讓使用者知道這段內容是出現在哪個時間點。
3.如果檢索資料信息和聊天歷史都不足以回答問題，請直接回答"RAG資料庫沒有您想要的資料"。
4.回答要簡潔明瞭，並以繁體中文表達。
回答："""
# 5. 當回答完問題後，請附上資料來源，需要提供這是在哪一集節目被提到的，節目標題為檢索到資料的文件標題。

    prompt = ChatPromptTemplate.from_template(template)

    # 創建 ConversationalRetrievalChain
    qa_chain = ConversationalRetrievalChain.from_llm(
        llm=groq_chat,
        retriever=retriever,
        memory=memory,
        combine_docs_chain_kwargs={"prompt": prompt}
    )


    # 在主循環中使用
    while True:
        user_question = input("請問您的問題：")
        if user_question:
            try:
                # 執行檢索
                results = retriever.invoke(user_question)
                # 根據提供的問題進行檢索和回答
                response = qa_chain({"question": user_question})
                
                # 獲取回答和相關文檔
                answer = response['answer']
                # 打印回答
                print("Groq Agent:", answer)
                
                # 輸出檢索結果
                for idx, result in enumerate(results):
                    # print(f"Result {idx+1}: {result}")
                    print("參考資料:",result.metadata['source'])
                  
                print("-" * 50)
                
                # 打印參考資料
                # for idx, result in enumerate(results):
                #   print("\n參考資料:",result.metadata['source'])
                
            except Exception as e:
                print(f"發生錯誤: {e}")
                print("很抱歉，我無法處理您的問題。請再試一次或換個問題。")


if __name__ == "__main__":
    main()

Hello! I'm your friendly Groq chatbot. I can help answer your questions, provide information, or just chat. I'm also super fast! Let's start our conversation!


KeyboardInterrupt: Interrupted by user

In [None]:
import os
from langchain.chains import LLMChain
from langchain_core.prompts import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    MessagesPlaceholder,
)
from langchain_core.messages import SystemMessage, AIMessage
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
from langchain_groq import ChatGroq
from langchain_community.vectorstores import Chroma

def main():
    """
    This function is the main entry point of the application. It sets up the Groq client, the Streamlit interface, and handles the chat interaction.
    """
    # Get Groq API key
    groq_api_key = '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'
    model = 'llama3-8b-8192'
    # Initialize Groq Langchain chat object and conversation
    groq_chat = ChatGroq(
            groq_api_key=groq_api_key,
            model_name=model
    )

    print("Hello! I'm your friendly Groq chatbot. I can help answer your questions, provide information, or just chat. I'm also super fast! Let's start our conversation!")

    system_prompt = 'You are a friendly conversational chatbot. When you use information from the retrieved documents, please cite the sources.'
    conversational_memory_length = 5 # number of previous messages the chatbot will remember during the conversation

    memory = ConversationBufferWindowMemory(k=conversational_memory_length, memory_key="chat_history", return_messages=True)

    # Initialize your vectorstore here
    # vectorstore = Chroma(...)  # You need to initialize this with your actual data

    retriever = vectorstore.as_retriever(search_kwargs={"k": 2})

    while True:
        user_question = input("Ask a question: ")

        if not user_question:
            print("No question asked. Please try again.")
            continue

        try:
            # Retrieve relevant documents
            retrieved_docs = retriever.get_relevant_documents(user_question)

            if retrieved_docs:
                retrieved_texts = []
                sources = []
                for doc in retrieved_docs:
                    retrieved_texts.append(doc.page_content)
                    if 'source' in doc.metadata:
                        sources.append(doc.metadata['source'])
                    else:
                        sources.append("Unknown source")

                retrieved_content = "\n\n".join(retrieved_texts)
                source_info = "Sources: " + ", ".join(sources)

                # Construct a chat prompt template
                prompt = ChatPromptTemplate.from_messages([
                    SystemMessage(content=system_prompt),
                    MessagesPlaceholder(variable_name="chat_history"),
                    HumanMessagePromptTemplate.from_template("{human_input}"),
                    SystemMessage(content="Relevant information:"),
                    AIMessage(content=retrieved_content),
                    SystemMessage(content="When answering, please cite the sources of information you used.")
                ])
            else:
                print("No relevant information found. The model will answer based on its general knowledge.")
                prompt = ChatPromptTemplate.from_messages([
                    SystemMessage(content=system_prompt),
                    MessagesPlaceholder(variable_name="chat_history"),
                    HumanMessagePromptTemplate.from_template("{human_input}")
                ])

            # Create a conversation chain using the LangChain LLM (Language Learning Model)
            conversation = LLMChain(
                llm=groq_chat,
                prompt=prompt,
                verbose=False,
                memory=memory,
            )

            # Generate the chatbot's response
            response = conversation.predict(human_input=user_question)

            if retrieved_docs:
                full_response = f"{response}\n\n{source_info}"
            else:
                full_response = response

            print("Chatbot:", full_response)

        except Exception as e:
            print(f"An error occurred: {e}")

if __name__ == "__main__":
    main()

Hello! I'm your friendly Groq chatbot. I can help answer your questions, provide information, or just chat. I'm also super fast! Let's start our conversation!
Ask a question: I would like to ask in the program "EP2-AI是起點還是終點？ " of 科技浪, is there any mention of Mexer of Experts? Please answer in Chinese.
Chatbot: According to the video "EP2-AI是起點還是終點？" from 科技浪, the concept of "Make sure of Experts" (或譯為 "Expert Ensemble" 或 "Model Aggregation") is mentioned around 24:57-25:55. In this context, it refers to combining the outputs of multiple small models (Experts) to form a larger model, which can reduce the number of parameters and computational resources required. This approach has been popular in recent Machine Learning research. (Source: 科技浪 "EP2-AI是起點還是終點？" video)

Sources: processed_EP28 - 深入瞭解兩大AI突破：Sora & Gemini 15 Pro.txt, processed_EP28 - 深入瞭解兩大AI突破：Sora & Gemini 15 Pro.txt
Ask a question: GPT-4曾被視為非常強大的模型，但如今它不再神秘，也沒有最初那麼強大。其實GPT-4由多個小模型組成，這種方法被稱為什麼?請用中文回答
Chatbot: The concept i

KeyboardInterrupt: Interrupted by user

In [None]:
from langchain.chains import RetrievalQA
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain.llms import HuggingFacePipeline

#加载 CodeGeeX4 模型和 Tokenizer
model_name = "THUDM/chatglm3-6b"  # 确认模型名称正确
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)

# 创建生成器管道
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=300,
    no_repeat_ngram_size=2
)

# 将生成器管道包装成 HuggingFacePipeline 对象
llm = HuggingFacePipeline(pipeline=pipe)


qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True
)
def ask_question(question):
    try:
        # 尝试通过检索增强生成（RAG）模型获取答案
        result = qa_chain({"query": question})
        answer = result['result']
        sources = [doc.page_content for doc in result.get('source_documents', [])]
        return answer, sources
    except Exception as e:
        print(f"检索式问答失败: {str(e)}")

        # 如果RAG模型失败，尝试直接使用生成模型生成答案
        try:
            direct_answer = llm(question)
            return f"基于直接生成的答案：\n{direct_answer}\n\n注意：此回答是由AI直接生成，未基于特定上下文。", []
        except Exception as e2:
            return f"无法生成回答。错误: {str(e2)}", []



In [None]:
# 测试问答功能
query = "請問在科技浪EP2中提到的Cago是什麼，請解釋"
answer, sources = ask_question(query)
print("回答:", answer)
print("相关来源:", sources)