In [11]:
from pprint import pprint
from langchain.llms import Ollama, OpenAI
from langchain.prompts import PromptTemplate
from langchain.schema import StrOutputParser
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, field_validator

ollama_api = "https://ollama.phate.dev"
starling_llm = Ollama(base_url=ollama_api, model="starling")
taide_llm = Ollama(base_url=ollama_api, model="taide-b11")

openai_api = "https://openai.phate.dev/v1"
openai_llm = OpenAI(openai_api_key="sk-template", openai_api_base=openai_api, model="starling")

str_output_parser = StrOutputParser()

cpus = []
with open('cpu.txt', encoding='utf-8') as f:
    cpus = f.read().splitlines()

In [37]:
taide_llm("你好")

謝謝您的讚賞！作為一名人工智慧語言模型，我的目的就是成為用戶的有用助手，協助他們完成各種任務。我很高興聽到您認為我正在達成這項目標，請繼續對我提出要求和任務，我會全力以赴幫助您完成它們！


![cpu](cpu.PNG)

In [3]:
prompt = PromptTemplate.from_template(
    """幫我把以下的資料結構化輸出成 JSON 格式：
{content}
""")
print(prompt.format(content=cpus[-1]))

幫我把以下的資料結構化輸出成 JSON 格式：
Intel i9-14900K【8+16核/32緒】4.4G(↑6.0G)/36M/UHD770/無風扇【代理盒裝】, $20700 ◆ ★ 熱賣



In [35]:
result = starling_llm(prompt.format(content=cpus[-1]))
pprint(result)

(' {\n'
 '  "cpu": {\n'
 '    "brand": "Intel",\n'
 '    "model": "i9-14900K",\n'
 '    "cores": [8, 16],\n'
 '    "threads": 32,\n'
 '    "base_frequency": "4.4GHz",\n'
 '    "max_turbo_frequency": "6.0GHz",\n'
 '    "cache": "36M",\n'
 '    "integrated_graphics": "UHD 770",\n'
 '    "cooling": "無風扇",\n'
 '    "availability": "代理盒裝"\n'
 '  },\n'
 '  "price": {\n'
 '    "usd": "$20700"\n'
 '  },\n'
 '  "popularity": "★ 熱賣"\n'
 '}')


In [9]:
prompt_chain = prompt|starling_llm|str_output_parser
pprint(prompt_chain.invoke({"content":cpus[-1]}))

(' {\n'
 '  "processor": {\n'
 '    "brand": "Intel",\n'
 '    "model": "i9-14900K",\n'
 '    "core_count": 8,\n'
 '    "thread_count": 16,\n'
 '    "base_frequency": 4.4,\n'
 '    "boost_frequency": 6.0,\n'
 '    "cache_size": 36,\n'
 '    "integrated_graphics": "UHD770",\n'
 '    "cooling": "無風扇",\n'
 '    "packaging": "代理盒裝"\n'
 '  },\n'
 '  "price": {\n'
 '    "USD": 20700,\n'
 '    "stars": 5\n'
 '  }\n'
 '}')


In [23]:
class Cpu(BaseModel):
    full_title: str
    brand: str
    model: str
    core: int
    thread: int
    base_frequency: float
    boost_frequency: float
    price: int

    @field_validator("price")
    def price_int(cls, v) -> int:
        try:
            return int(v)
        except:
            return -1

cpu_parser = PydanticOutputParser(pydantic_object=Cpu)

cpu_prompt = PromptTemplate.from_template(
    template="幫我把以下的資料結構化輸出成 JSON 格式：\n{format_instructions}\n{content}\n",
    partial_variables={"format_instructions": cpu_parser.get_format_instructions()})


In [34]:
cpu_json_chain = cpu_prompt|starling_llm|cpu_parser
res = cpu_json_chain.invoke({"content":cpus[-1]})
pprint(str(res.model_dump_json(indent=2)))

('{\n'
 '  "full_title": "Intel '
 'i9-14900K【8+16核/32緒】4.4G(↑6.0G)/36M/UHD770/無風扇【代理盒裝】",\n'
 '  "brand": "Intel",\n'
 '  "model": "i9-14900K",\n'
 '  "core": 8,\n'
 '  "thread": 32,\n'
 '  "base_frequency": 4.4,\n'
 '  "boost_frequency": 6.0,\n'
 '  "price": 20700\n'
 '}')


In [39]:
print(cpu_parser.get_format_instructions())

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"full_title": {"title": "Full Title", "type": "string"}, "brand": {"title": "Brand", "type": "string"}, "model": {"title": "Model", "type": "string"}, "core": {"title": "Core", "type": "integer"}, "thread": {"title": "Thread", "type": "integer"}, "base_frequency": {"title": "Base Frequency", "type": "number"}, "boost_frequency": {"title": "Boost Frequency", "type": "number"}, "price": {"title": "Price", "type": "integer"}}, "required": ["full_title", "brand", "model", "core", "thread", "base_frequency", "boost_frequency", "pric