In [2]:
from datetime import datetime
import os
import json
import re

SAVE = "./data/cut"
LOAD = "./data/relationships"
if not os.path.exists(SAVE):
    os.makedirs(SAVE)


entries = []
for dir in os.listdir(LOAD):
    with open(f"./data/relationships/{dir}") as fp:
        obj = json.load(fp)
        result = []
        for item in obj["tags"]:
            tag: str = item["tag"]
            count: int = item["count"]
            # 过滤掉没人用的标签
            if count == 0:
                continue
            # 移除括号内的游戏名
            line = re.sub(r"\(.*?\)", "", tag)
            # 使用"/"或"&"切分字符串
            line = re.split(r"/|&", line)
            # 去除空格和空字符串
            line = [name.strip() for name in line if name.strip()]
            result.append(
                {
                    "cut_tag": line,
                    "count": count,
                }
            )
    entries.append({"name": dir[:-5], "cut_tags": result})


if not os.path.exists(SAVE):
    os.makedirs(SAVE)

with open(f"{SAVE}/merge.json", "w") as fp:
    json.dump(
        {"date": datetime.now().strftime("%Y-%m-%d %T"), "entries": entries},
        fp,
        ensure_ascii=False,
        indent=4,
    )

In [3]:
# 读取 SR_NAMES
SR_NAMES: list[str] = []
with open("SR_NAMES.txt") as fp:
    while name := fp.readline():
        SR_NAMES.append(name.strip())

# 初始化关系矩阵
rel_matrix = {i: {j: 0 for j in SR_NAMES} for i in SR_NAMES}

# 特判
SP = {"Welt Yang": "Welt", "Dan Feng | Previous Imbibitor Lunae": "Dan Feng"}


def process_tag(tag: list[str], count: int):
    global rel_matrix

    def norm(name: str):
        if name in SP:
            return SP[name]
        return name

    tag = [norm(name) for name in tag if norm(name) in SR_NAMES]
    length = len(tag)
    if length == 1:
        rel_matrix[tag[0]][tag[0]] += count
    else:
        for i in range(length):
            for j in range(i + 1, length):
                if length == 2:
                    rel_matrix[tag[j]][tag[i]] += count
                elif length > 2:
                    rel_matrix[tag[j]][tag[i]] += count / length
                else:
                    raise ValueError("异常标签：", tag)


def show_matrix(matrix=rel_matrix, titles=None):
    if titles:
        print("|".join(map(lambda x: x[:6].rjust(6, " "), titles)))
    for i in matrix:
        for j in matrix[i]:
            if matrix[i][j] == 0:
                print(end="    .  ")
            else:
                print(f"{matrix[i][j]:6.1f}", end=" ")
        print(end="\n")


# 填充关系矩阵
for i in entries:
    for j in i["cut_tags"]:
        process_tag(j["cut_tag"], j["count"])


show_matrix(titles=SR_NAMES)

Argent| Arlan|  Asta| Bailu| Blade|Bronya|Caelus| Clara|Dan Fe|Dan He|Dr. Ra|Fu Xua|Gepard|Guinai| Hanya| Herta|Himeko|  Hook|Huohuo|Jing Y|Jingli| Kafka|  Luka|Luocha|  Lynx|March |Natash|  Pela|Qingqu|Ruan M| Sampo| Seele|Serval|Silver|Stelle|Sushan|Tingyu| Topaz|Trailb|  Welt| Xueyi|Yanqin|Yukong
   6.0     .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .  
    .    11.0     .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .  
    .    98.0    6.0     .      .      .      .      .      .      .      .      .      .      

# 导出

In [5]:
import openpyxl


def save_excel(file_name, titles, matrix):
    """
    将数据写入 xlsx 文件
    """
    if not file_name.endswith(".xlsx"):
        file_name += ".xlsx"

    data = [[matrix[i][j] for j in matrix[i]] for i in matrix]
    m = len(data)
    n = len(data[0])

    wb = openpyxl.Workbook()
    ws = wb.active
    data.insert(0, titles)
    for i in range(m):
        for j in range(n):
            ws.cell(row=i + 1, column=j + 1).value = data[i][j]
    wb.save(filename=file_name)


def save_axes_json(file_name, titles, matrix, 精度=2):
    """
    将数据以 (x, y, z) 点阵形式写入 json 文件
    """

    data = [
        [(idx, jdx, round(matrix[i][j]), 精度) for jdx, j in enumerate(matrix[i])]
        for idx, i in enumerate(matrix)
    ]

    if not file_name.endswith(".json"):
        file_name += ".json"
    with open(file_name, "w") as fp:
        json.dump({"titles": titles, "data": data}, fp, separators=(",", ":"))


def save_matrix_json(file_name, titles, matrix, 稀疏矩阵=False, 精度=2):
    """
    将数据以矩阵（二维数组）形式写入 json 文件
    """

    data = [[round(matrix[i][j], 精度) for j in matrix[i]] for i in matrix]
    if 稀疏矩阵:
        m = len(data)
        n = len(data[0])
        data = [
            (i, j, data[i][j]) for i in range(m) for j in range(n) if data[i][j] != 0
        ]

    if not file_name.endswith(".json"):
        file_name += ".json"
    with open(file_name, "w") as fp:
        json.dump({"titles": titles, "data": data}, fp, separators=(",", ":"))


save_excel("./data/cut/matrix.xlsx", SR_NAMES, rel_matrix)
save_matrix_json("./data/cut/matrix.json", SR_NAMES, rel_matrix, 稀疏矩阵=True)