In [2]:
!curl https://ollama.ai/install.sh | sh

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0>>> Downloading ollama...
100 10941    0 10941    0     0  41621      0 --:--:-- --:--:-- --:--:-- 41759
############################################################################################# 100.0%
>>> Installing ollama to /usr/local/bin...
>>> Creating ollama user...
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.


In [3]:
! ollama pull cwchang/llama3-taide-lx-8b-chat-alpha1-32k

Error: could not connect to ollama app, is it running?


In [None]:
! pip install -U langchain-nomic langchain_community tiktoken langchainhub chromadb langchain langgraph tavily-python gpt4all bs4 jq googletrans==4.0.0-rc1

In [None]:
### LLM

local_llm = "cwchang/llama3-taide-lx-8b-chat-alpha1-32k"

In [None]:
### Index
import json
from langchain.schema import Document
from langchain_community.document_loaders.json_loader import JSONLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import GPT4AllEmbeddings


def load_json_data(filepath):
    with open(filepath, "r", encoding="utf-8") as file:
        json_data = json.load(file)
    documents = [Document(page_content=item["text"]) for item in json_data]
    return documents

data = load_json_data("./data.json")

#loader = JSONLoader("./data2.json", jq_schema='.[].text')
#data = loader.load()

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=250, chunk_overlap=0
)

doc_splits = text_splitter.split_documents(data)

vectorstore = Chroma.from_documents(
    documents=doc_splits,
    collection_name="rag-chroma",
    embedding=GPT4AllEmbeddings(model_name="all-MiniLM-L6-v2.gguf2.f16.gguf"),
)
retriever = vectorstore.as_retriever()

In [None]:
import googletrans
question = "我想要修很涼的課，推薦我一些課程的名字?"
translator = googletrans.Translator()
result = translator.translate(question, dest='en')
print(result.text)

I want to take a very cold class, and recommend me some courses?


In [None]:
### Generate
import googletrans
from langchain_community.chat_models import ChatOllama
from langchain.prompts import PromptTemplate
from langchain import hub
from langchain_core.output_parsers import StrOutputParser

# Prompt
prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are an assistant for question-answering tasks.
    Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know.
    Use three sentences maximum and keep the answer concise <|eot_id|><|start_header_id|>user<|end_header_id|>
    Question: {question}
    Context: {context}
    Answer: <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["question", "document"],
)

llm = ChatOllama(model=local_llm, temperature=0)


# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


# Chain
rag_chain = prompt | llm | StrOutputParser()

### 測試多個問題
questions = [
    "我想要修很涼的課，推薦我一些課程的名字?"
]

results = []

for question in questions:
    # 直接使用繁體中文問題
    print(f"Question: {question}")

    # 檢索相關文檔
    docs = retriever.invoke(question)
    formatted_docs = format_docs(docs)

    # 生成回答
    generation = rag_chain.invoke({"context": formatted_docs, "question": question})

    # 打印生成的回答
    print(f"Generated Answer: {generation}\n")

    # 保存結果
    results.append({"question": question, "answer": generation})

# 打印所有結果
for result in results:
    print(f"Question: {result['question']}")
    print(f"Answer: {result['answer']}\n")

根據提供的上下文，我建議以下類：

*當代世界：環境危機和生態可持續性（Wang Congshu教授） - 此類似乎是一個輕鬆而簡單的課程，重點是環境問題。

請注意，這些建議僅基於提供的上下文，可能不會反映課程的實際質量或困難。
