In [5]:
import os
import openai
import re, json, jsonlines, pickle
import numpy as np
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from collections import defaultdict
from constant import openai_key, hf_token
from datasets import load_dataset
from nltk import word_tokenize, sent_tokenize
from collections import Counter, defaultdict
from tqdm.notebook import tqdm
from openai import OpenAI
from copy import deepcopy

engine = "gpt-4.1"
openai.api_key = openai_key

client = OpenAI(api_key=openai_key)

In [2]:
def generate(prompt, engine=engine):

    response = client.responses.create(
        model=engine,
        temperature=1e-8,
        input=[
            {
                "role": "user", 
                "content": prompt
            }
        ]
    ).output_text
    
    return response

In [3]:
class CDT_Node:
    def __init__(self, character, goal_topic, pairs, built_statements=None, depth=1, established_statements=[], gate_path=[], max_depth=3, threshold_accept=0.8, threshold_reject=0.5, threshold_filter=0.8):
        self.statements = []
        self.gates = [] # 1 gate -> 1 child
        self.children = []
        self.depth = depth

        if built_statements is not None:
            assert(pairs is None)
            self.statements = built_statements
        elif len(pairs) <= 8 or self.depth > max_depth:
            pass
        else:
            clusters = select_cluster_centers(character, pairs, n_in_cluster_case=16, n_in_cluster_sample=8, n_max_cluster=8, bs=8)
            statement_candidates, gates = [], []
            for cluster in tqdm(clusters, desc="Making Hypotheses...", leave=leave_bar):
                statement_candidates_cluster, gates_cluster = make_hypothesis(cluster, character, goal_topic, established_statements+self.statements, gate_path)
                statement_candidates.extend(statement_candidates_cluster)
                gates.extend(gates_cluster)

            gates, statement_candidates = summarize_triggers(character, gates, statement_candidates)
            global_statements, gated_statements = [], []
            remained_gates = []

            for gate, statement_candidate in zip(gates, statement_candidates):
                res, _ = validate_hypothesis(character, pairs, None, statement_candidate)
                correctness = res["True"]/(res["True"]+res["False"]+1e-8)+1e-8
                if correctness >= threshold_accept:
                    global_statements.append(statement_candidate)
                else:
                    gated_statements.append(statement_candidate)
                    remained_gates.append(gate)

            self.statements.extend(global_statements)

            for gate, statement_candidate in zip(remained_gates, gated_statements):
                res, filtered_pairs = validate_hypothesis(character, pairs, gate, statement_candidate)
                correctness = res["True"]/(res["True"]+res["False"]+1e-8)+1e-8
                broadness = 1-res["Irrelevant"]/sum(res.values())
                if broadness <= threshold_filter:
                    if correctness <= threshold_reject:
                        continue
                    elif correctness >= threshold_accept:
                        self.gates.append(gate)
                        self.children.append(CDT_Node(character, goal_topic, None, built_statements=[statement_candidate],
                            depth=depth+1, established_statements=established_statements+self.statements,
                                                      gate_path=gate_path+[gate], max_depth=max_depth,
                                                      threshold_accept=threshold_accept, threshold_reject=threshold_reject, threshold_filter=threshold_filter))
                    else:
                        self.gates.append(gate)
                        self.children.append(CDT_Node(character, goal_topic, filtered_pairs,
                            depth=depth+1, established_statements=established_statements+self.statements,
                                                      gate_path=gate_path+[gate], max_depth=max_depth,
                                                      threshold_accept=threshold_accept, threshold_reject=threshold_reject, threshold_filter=threshold_filter))

    def traverse(self, scene):
        statements = deepcopy(self.statements)
        for gate, child in zip(self.gates, self.children):
            if check_scene(scene, gate):
                statements.extend(child.traverse(scene))
        return statements

    def verbalize(self):
        text = ""
        statements = deepcopy(self.statements)
        text = text + "\n".join(statements) + "\n"
        for gate, child in zip(self.gates, self.children):
            text += f"if \"{gate}\" is \"True\":\n"
            text += "\n".join([f"\t{line}" for line in child.verbalize().split("\n")])+"\n"
        if text.strip() == "":
            text = "pass"
        return text

In [4]:
def wikify(character, content, cdt_tree_verbalized, attribute, note, lang="English"):

    prompt = f'''# Wiki:
{content}

# Pseudo code

{cdt_tree_verbalized}

# Task
The pseudo code above describes the character behavior logic of {character}. Summarize the information from the given code for "{attribute}" section in a wiki style to entail the given character Wiki.
{note}
Output in the following format (using {lang} and the name: {character}):
- {attribute} (The title should also be translated to target language: {lang}) -

{{Content}}'''

    section = generate(prompt)

    return section

In [7]:
character = "戸山香澄"
cdt_id = "Kasumi"
lang = "Chinese"
content = f"{character}"
note = '''
# Notes
Kasumi -> 戸山香澄
Arisa -> 市谷有咲
Rimi -> 牛込里美
Tae -> 花园多惠
Saaya -> 山吹沙绫
'''

with open(f"packages/{cdt_id}.cdt.v3.1.package.relation.pkl", "rb") as f:
    cdts = pickle.load(f)
topic2cdt = cdts["topic2cdt"]
rel_topic2cdt = cdts["rel_topic2cdt"]

for topic in tqdm(topic2cdt, desc="Wikifying attribute information..."):
    cdt = topic2cdt[topic]
    increment = wikify(character, content, cdt.verbalize(), topic, note, lang)
    content = "\n\n".join([content, increment])

for rel_topic in tqdm(rel_topic2cdt, desc="Wikifying interactive information..."):
    cdt = rel_topic2cdt[rel_topic]
    increment = wikify(character, content, cdt.verbalize(), rel_topic, note, lang)
    content = "\n\n".join([content, increment])

with open(f"profiles/{character}.wikified.profile.txt", "w") as writer:
    writer.write(content)

Wikifying attribute information...:   0%|          | 0/4 [00:00<?, ?it/s]

Wikifying interactive information...:   0%|          | 0/4 [00:00<?, ?it/s]

In [8]:
print(content)

戸山香澄

- 香澄的身份（Kasumi's identity） -

戸山香澄是Poppin'Party乐队的主唱兼吉他手，以其充满活力和感染力的性格著称。她极度重视团队的凝聚力和共同体验，经常在关键时刻强调“大家在一起”的重要性。无论是面对新的挑战、突发状况，还是团队成员之间的讨论，香澄总是以充满情感和表现力的方式回应，经常用夸张或感叹的语气表达自己的惊喜、兴奋或期待。

在团队中，香澄不仅是气氛的带动者，也是情感的纽带。她喜欢用亲昵、热情的语言表达对市谷有咲、牛込里美、花园多惠和山吹沙绫等成员的关心和依赖，尤其在情感高涨或低落的时刻，主动寻求或给予安慰和支持。当团队士气低落或成员对自身价值产生怀疑时，香澄会主动重申每个人在团队中的重要性，并用积极、鼓励的话语强化大家的归属感。

香澄在面对外界反馈或批评时，常常表现出明显的情绪反应，并倾向于寻求队友的肯定和支持。她也会在团队讨论各自角色或贡献时，强调团队的独特性和共同目标，时常用热情洋溢的语言重申Poppin'Party的身份和理想。无论是团队目标受到质疑，还是成员表达不安，香澄都会用坚定和充满活力的态度，带领大家回归初心，强化团队的凝聚力和共同信念。

总的来说，戸山香澄是Poppin'Party不可或缺的核心人物，她以积极、热情和富有感染力的个性，持续影响并维系着团队的团结与共同成长。

- 香澄的性格（Kasumi's Personality） -

戸山香澄以情感外露、积极乐观的性格著称。她在面对各种情绪时，总是毫不掩饰地表达自己的感受，无论是兴奋、惊喜，还是困惑和不安。香澄极度重视团队的凝聚力，喜欢通过主动寻求团队成员的参与和共鸣，营造“大家在一起”的氛围。她在遇到困难或挑战时，常常以坚定的态度重新振作，并积极提出替代方案或新点子，带动团队士气。

香澄在团队互动中，善于用夸张、幽默或戏谑的方式表达自己，尤其在与市谷有咲、牛込里美、花园多惠和山吹沙绫等亲密伙伴相处时，常常展现出俏皮、亲昵的一面。当团队成员感到不安或自我怀疑时，香澄会主动给予鼓励和支持，强调每个人在Poppin'Party中的独特价值，并用热情洋溢的话语强化团队的归属感和共同目标。

面对外界的反馈、批评或玩笑，香澄通常以轻松幽默的态度回应，有时还会用夸张的表情或言语化解尴尬，进一步拉近与队友的距离。她善于通过自发、显著的情感反应影响团