In [None]:
%pip install pymysql
%pip install zhipuai
%pip install -q -U google-genai

# 智谱文件内容提取

In [None]:
from zhipuai import ZhipuAI
from pathlib import Path
import json

# 填写您自己的APIKey
client = ZhipuAI(api_key="your_api_key")

# 格式限制：.PDF .DOCX .DOC .XLS .XLSX .PPT .PPTX .PNG .JPG .JPEG .CSV .PY .TXT .MD .BMP .GIF
# 大小：单个文件50M、总数限制为100个文件
file_object = client.files.create(file=Path("./book.md"), purpose="file-extract")

# 获取文本内容
file_content = json.loads(client.files.content(file_id=file_object.id).content)["content"]
print(file_object.id)

# 删除文件
result = client.files.delete(
    file_id=file_object.id         #支持retrieval、batch、fine-tune、file-extract文件
)
print(result)

# 定义输出格式

In [1]:
json_schema = {
    "卦名": "",
    "卦辞": "",
    "象曰": "",
    "时运": "",
    "财运": "",
    "家宅": "",
    "身体": "",
    "动爻": [
        {
            "爻位": "",
            "爻辞": "",
            "象曰": "",
            "时运": "",
            "财运": "",
            "家宅": "",
            "身体": ""
        }
    ]
}

from pydantic import BaseModel
from typing import List

class Yao(BaseModel):
    爻位: str = ""
    爻辞: str = ""
    象曰: str = ""
    时运: str = ""
    财运: str = ""
    家宅: str = ""
    身体: str = ""

class GuaWen(BaseModel):
    卦名: str = ""
    卦辞: str = ""
    象曰: str = ""
    时运: str = ""
    财运: str = ""
    家宅: str = ""
    身体: str = ""
    动爻: List[Yao] = []  # 默认空列表

    def add_yao(self, yao: Yao):
        """添加一个爻辞"""
        self.动爻.append(yao)

# 爻位转序号

In [None]:
def yao_sort(yaoci):
    match yaoci:
        case "初九":return "1"
        case "九二":return "2"
        case "九三":return "3"
        case "九四":return "4"
        case "九五":return "5"
        case "上九":return "6"
        case "初六":return "1"
        case "六二":return "2"
        case "六三":return "3"
        case "六四":return "4"
        case "六五":return "5"
        case "上六":return "6"
        case _:return "0"


# 提取及保存

In [None]:
import pymysql
from google import genai
import os
import time


# 建立连接
conn = pymysql.connect(
    host='',
    user='',
    password='',
    database='',
    port=3306
)
# 创建游标对象
cursor = conn.cursor()

# 准备SQL插入语句
insert_sql = """
INSERT INTO zy_gy (
    id, p_id, gy_name, gy_content, gy_translate, 
    fate, wealth, family, healty, gy_sort
) VALUES (
    %s, %s, %s, %s, %s, 
    %s, %s, %s, %s, %s
)
"""

client = genai.Client(api_key="your_api_key")


gua_list = [
  "乾", "坤", "屯", "蒙", "需", "讼", "师", "比", 
  "小畜", "履", "泰", "否", "同人", "大有", "谦", "豫", 
  "随", "蛊", "临", "观", "噬嗑", "贲", "剥", "复", 
  "无妄", "大畜", "颐", "大过", "习坎", "离", "咸", "恒", 
  "遁", "大壮", "晋", "明夷", "家人", "睽", "蹇", "解", 
  "损", "益", "夬", "姤", "萃", "升", "困", "井", 
  "革", "鼎", "震", "艮", "渐", "归妹", "丰", "旅", 
  "巽", "兑", "涣", "节", "中孚", "小过", "既济", "未济"
]

id_list = ["111111","000000","100010","010001","111010",
           "010111","010000","000010","111011","110111",
           "111000","000111","101111","111101","001000",
           "000100","100110","011001","110000","000011",
           "100101","101001","000001","100000","100111",
           "111001","100001","011110","010010","101101",
           "001110","011100","001111","111100","000101",
           "101000","101011","110101","001010","010100",
           "110001","100011","111110","011111","000110",
           "011000","010110","011010","101110","011101",
           "100100","001001","001011","110100","101100",
           "001101","011011","110110","010011","110010",
           "110011","001100","101010","010101"]

index = 0
for gua in gua_list:
    try:
        message_content = f'''
        #  Role: 信息提取专家

        ## Goals
        从文档提取{gua}卦及其六个动爻的卦辞、爻辞、象曰、时运、财运、家宅、身体方面的解释

        ## Workflow:
        1.从下面原文提取内容，仅提取冒号后的内容，不带序号和标题：

        {file_content}

        2.直接转为简体中文

        3.按照以下JSON格式输出：

        {json_schema}

        # 仅输出JSON，不要输出其他内容
        '''
        response = client.models.generate_content(
        model='gemini-2.0-flash',
        contents=message_content,
        config={
            'response_mime_type': 'application/json',
            'response_schema': GuaWen,
        })
        json_obj = json.loads(response.text)
        with open(f'./gua_json/{gua}卦.json', 'w', encoding='utf-8') as f:
            json.dump(json_obj, f, ensure_ascii=False, indent=2)
        # print(json_obj)
        gua_data = (
            id_list[index],                # id (varchar 50)
            "",            # p_id (varchar 50)
            json_obj["卦名"],                  # gy_name (varchar 50)
            json_obj["卦辞"],              # gy_content (varchar 255)
            json_obj["象曰"],              # gy_translate (varchar 500)
            json_obj["时运"],              # fate (varchar 500)
            json_obj["财运"],              # wealth (varchar 500)
            json_obj["家宅"],              # family (varchar 500)
            json_obj["身体"],              # healty (varchar 500)
            index+1                     # gy_sort (int)
        )
        cursor.execute(insert_sql, gua_data)
        jndex = 0
        for yao in json_obj["动爻"]:
            yao_data = (
                id_list[index]+"_"+yao_sort(yao["爻位"]),                # id (varchar 50)
                id_list[index],            # p_id (varchar 50)
                yao["爻位"],                  # gy_name (varchar 50)
                yao["爻辞"],              # gy_content (varchar 255)
                yao["象曰"],              # gy_translate (varchar 500)
                yao["时运"],              # fate (varchar 500)
                yao["财运"],              # wealth (varchar 500)
                yao["家宅"],              # family (varchar 500)
                yao["身体"],              # healty (varchar 500)
                jndex+1                     # gy_sort (int)
            )
            cursor.execute(insert_sql, yao_data)    
            jndex += 1
        index += 1


        # 提交事务
        conn.commit()
        print("【成功】"+gua+"卦插入完成")

    except Exception as e:
        # print(e)
        print("【失败】"+gua+"卦插入失败")
    #等待一分钟(免费gemini接口，有tpm限制)
    time.sleep(60)

# 关闭连接
cursor.close()
conn.close()