In [31]:
# 列出所有已安裝的 Python 套件
# !pip list

In [8]:
from ollama import Client
import os
import json
import numpy as np

In [None]:
client = Client(host=<your_port>, timeout=180)

In [None]:
for model in client.list()['models']:
    print('model=', model['model'], ', families=', model['details']['families'], ', parameter_size=', model['details']['parameter_size'])

# 選擇最小的 gemma2:27b 模型

model= deepseek-r1:70b , families= ['llama'] , parameter_size= 70.6B
model= llama3.3:70b , families= ['llama'] , parameter_size= 70.6B
model= gemma2:27b , families= ['gemma2'] , parameter_size= 27.2B
model= llama3.2-vision:90b , families= ['mllama', 'mllama'] , parameter_size= 87.7B


In [27]:
def parse_paragraph(filename):
    with open(filename) as f:
        return [line.strip() for line in f if line.strip()]

In [None]:
# 避免每次都重算，把計算好的向量存起來
def calc_embedings(paragraphs):
    return [
        client.embeddings(model="gemma2:27b", prompt=data)["embedding"]  # 把每個元素轉換向量
        for data in paragraphs
    ]

def cache_embeddings(filename, paragraphs):
    embedding_file = f"/cache/{filename}.json"

    if os.path.isfile(os.getcwd() + embedding_file):
        with open(os.getcwd() + embedding_file) as f:
            return json.load(f)

    os.makedirs(os.path.join(os.getcwd(), "cache"), exist_ok=True)

    embeddings = calc_embedings(paragraphs)

    with open(os.getcwd()+embedding_file, "w") as f:
        json.dump(embeddings, f)

    return embeddings

In [17]:
# 計算向量相似度 (計算在向量們的距離)
def calc_similar_vectors(v, vectors):
    v_norm = np.linalg.norm(v)  # 計算向量的範數（長度/大小）
    scores = [np.dot(v, item) / (v_norm * np.linalg.norm(item)) for item in vectors]  # 計算每個詞的餘弦相似度，內積(dot)可用於計算兩個向量的夾角（餘弦相似度）
    return sorted(enumerate(scores), reverse=True, key=lambda x: x[1])

In [15]:
input = client.embeddings(model="gemma2:27b", prompt="天氣")
file = [client.embeddings(model="gemma2:27b", prompt=word).embedding for word in ["氣象局", "下雨天", "氣溫"]]
v_norm = np.linalg.norm(input.embedding)
scores = [np.dot(input.embedding, item) / (v_norm * np.linalg.norm(item)) for item in file] 
sorted(enumerate(scores), reverse=True, key=lambda x: x[1])

[(1, 0.16556577858726831),
 (0, 0.040011433594213955),
 (2, 0.018359225964453264)]

In [14]:
for f in file:
    print(len(f))  # gemma2:27b 的詞向量長度為 4608 維

4608
4608
4608


In [None]:
# 實作如下
def parse_paragraph(filename):
    with open(filename) as f:
        return [line.strip() for line in f if line.strip()]

def cache_embeddings(filename, paragraphs):
    embedding_file = f"/cache/{filename}.json"
    if os.path.isfile(os.getcwd() + embedding_file):
        with open(os.getcwd() + embedding_file) as f:
            return json.load(f)

    os.makedirs(os.path.join(os.getcwd(), "cache"), exist_ok=True)

    embeddings = calc_embedings(paragraphs)

    with open(os.getcwd()+embedding_file, "w") as f:
        json.dump(embeddings, f)

    return embeddings

def calc_similar_vectors(v, vectors):
    v_norm = np.linalg.norm(v)
    scores = [np.dot(v, item) / (v_norm * np.linalg.norm(item)) for item in vectors]
    return sorted(enumerate(scores), reverse=True, key=lambda x: x[1])


paragraphs = parse_paragraph("about.txt")  # 若文檔有更新要刪掉再重跑，不然只要有檔案就會不重新embedding
embeddings = cache_embeddings("about.txt", paragraphs)

prompt = input("請輸入問題... (輸入'q'離開)")
while prompt.lower() != "q":
    prompt_embedding = client.embeddings(model="gemma2:27b", prompt=prompt)["embedding"]
    similar_vectors = calc_similar_vectors(prompt_embedding, embeddings)[:3]

    system_prompt = (
        "現在開始使用我提供的情境來回答，只能使用繁體中文，不要有簡體中文字。如果你不確定答案，就說不知道。情境如下："
        + "\n".join(paragraphs[vector[0]] for vector in similar_vectors)
    )

    response = client.chat(
        model="gemma2:27b",
        messages= [{'role': 'system', 'content': system_prompt},
                   {'role': 'user', 'content': prompt}]
    )

    print(response.message.content)