In [2]:
import pandas as pd
import networkx as nx
import re
from itertools import combinations
import os

def run_analysis_and_output():
    input_file = '小说novel.txt'
    output_file = '红与黑小说人物关系.txt'
    
    roles = {
        "于连": ["于连", "索雷尔", "于连·索雷尔", "德·拉尉耐"],
        "瑞那夫人": ["瑞那夫人", "路易丝", "德·瑞那夫人", "戴薇尔夫人"],
        "瑞那先生": ["瑞那先生", "德·瑞那先生", "维璃叶市长"],
        "玛蒂尔德": ["玛娣儿特", "拉穆尔小姐"],
        "拉穆尔侯爵": ["拉穆尔侯爵", "拉穆尔先生"],
        "谢朗神甫": ["谢朗", "谢朗神甫"],
        "彼拉神甫": ["彼拉", "彼拉神甫"],
        "瓦勒诺": ["瓦勒诺", "收容所所长"],
        "傅凯": ["傅凯"],
        "诺尔拜": ["诺尔拜"],
        "菲华格夫人": ["菲华格夫人", "元帅夫人"],
        "阿尔泰米拉": ["阿尔泰米拉"],
        "爱丽莎": ["爱丽莎", "艾莉莎"]
    }

    alias_map = {alias: name for name, aliases in roles.items() for alias in aliases}

    try:
        with open(input_file, 'r', encoding='utf-8') as f:
            text = f.read()
    except FileNotFoundError:
        print(f"分析失败：未找到文件 {input_file}")
        return

    sentences = [s.strip() for s in re.split(r'[。？！;]', text) if s.strip()]
    
    edges = []
    chars_found = set()

    for sentence in sentences:
        found = set()
        for alias, name in alias_map.items():
            if alias in sentence:
                found.add(name)
                chars_found.add(name)
                
        if len(found) > 1:
            for u, v in combinations(found, 2):
                edges.append(tuple(sorted((u, v))))

    if not edges:
        print("分析失败：未检测到足够的人物互动，请检查文本内容或人物列表。")
        return

    df_edges = pd.DataFrame(edges, columns=['Source', 'Target'])
    df_inter = df_edges.groupby(['Source', 'Target']).size().reset_index(name='互动次数')
    df_inter = df_inter.sort_values(by='互动次数', ascending=False)

    G = nx.from_pandas_edgelist(df_inter, 'Source', 'Target', '互动次数')
    pr = nx.pagerank(G)
    deg = nx.degree_centrality(G)

    df_cent = pd.DataFrame({
        '人物': list(G.nodes()),
        '核心度(PageRank)': [pr.get(n, 0) for n in G.nodes()],
        '社交度(Degree)': [deg.get(n, 0) for n in G.nodes()]
    }).sort_values(by='核心度(PageRank)', ascending=False)
    
    semantic_relationships = [
        ("于连", "瑞那夫人", "秘密情人"),
        ("瑞那先生", "瑞那夫人", "夫妻"),
        ("于连", "玛娣儿特", "秘密情人 / 未婚夫妇"),
        ("拉穆尔侯爵", "玛娣儿特", "父女"),
        ("于连", "索雷尔老爹", "父子"),
        ("谢朗神甫", "于连", "启蒙导师/师生"),
        ("瑞那先生", "瓦勒诺", "政治对手/地方权贵"),
        ("彼拉神甫", "于连", "推荐人/导师"),
        ("玛娣儿特", "诺尔拜", "朋友/追求者")
    ]
    df_semantic = pd.DataFrame(semantic_relationships, columns=['人物A', '人物B', '关系类型'])

    try:
        with open(output_file, 'w', encoding='utf-8') as f:
            f.write("《红与黑》小说人物关系分析报告\n\n")
            
            f.write(f"[1. 书中识别到的主要人物 - 共{len(chars_found)}人]\n")
            f.write(", ".join(sorted(list(chars_found))) + "\n\n")
            
            f.write("[2. 句子级互动次数统计]\n")
            f.write(df_inter.to_string(index=False))
            f.write("\n\n")
            
            f.write("[3. 主要人物中心度分析]\n")
            f.write(df_cent.round(4).to_string(index=False))
            f.write("\n\n")

            f.write("[4. 小说人物关系]\n")
            f.write(df_semantic.to_string(index=False))
        
        print(f"分析成功。结果已保存至文件：{output_file}")
        
    except Exception as e:
        print(f"分析失败，写入文件错误：{str(e)}")

if __name__ == '__main__':
    run_analysis_and_output()

分析成功。结果已保存至文件：红与黑小说人物关系.txt


In [9]:
import pandas as pd
import networkx as nx
import re

# ===== 文件名 =====
CLEAN_FILE = '摇滚红与黑.txt'
OUTPUT_FILE = '红与黑歌剧人物关系.txt'

# ===== 人物字典 =====
ROLES = {
    "于连": ["于连", "索雷尔", "于连・索雷尔"],
    "瑞那夫人": ["瑞那夫人", "露易丝", "德瑞纳夫人"],
    "德瑞纳先生": ["德瑞纳先生", "市长先生"],
    "玛娣儿特": ["玛蒂尔德", "玛蒂尔德·德拉莫小姐"],
    "拉穆尔侯爵": ["拉莫尔侯爵", "父亲", "侯爵"],
    "杰洛尼莫": ["杰洛尼莫"],
    "瓦勒诺": ["瓦勒诺先生", "瓦勒诺", "瓦勒诺男爵"],
    "瓦勒诺夫人": ["瓦勒诺夫人"],
    "爱丽莎": ["爱丽莎"],
    "律师": ["律师"],
    "法官": ["法官"],
    "别人": ["别人"],
    "元帅夫人": ["元帅夫人"],
    "夸泽诺侯爵": ["夸泽诺侯爵"]
}

STANDARD_NAMES = sorted(ROLES.keys(), key=len, reverse=True)

def analyze_cleaned_script():
    with open(CLEAN_FILE, 'r', encoding='utf-8') as f:
        text = f.read()

    analysis_pattern = re.compile(
        f"^({'|'.join(re.escape(name) for name in STANDARD_NAMES)}：)",
        re.MULTILINE
    )

    speaker_sequence = []
    chars_found = set()

    for line in text.split('\n'):
        line = line.strip()
        match = analysis_pattern.match(line)
        if match:
            standard_name = match.group(1)[:-1]
            chars_found.add(standard_name)
            if not speaker_sequence or speaker_sequence[-1] != standard_name:
                speaker_sequence.append(standard_name)

    edges = []
    for i in range(len(speaker_sequence) - 1):
        u, v = speaker_sequence[i], speaker_sequence[i + 1]
        edges.append(tuple(sorted((u, v))))

    df_edges = pd.DataFrame(edges, columns=['Source', 'Target'])
    df_inter = df_edges.groupby(['Source', 'Target']).size().reset_index(name='互动次数')
    df_inter = df_inter.sort_values(by='互动次数', ascending=False)

    G = nx.from_pandas_edgelist(df_inter, 'Source', 'Target', '互动次数')
    pr = nx.pagerank(G)
    deg = nx.degree_centrality(G)

    df_cent = pd.DataFrame({
        '人物': list(G.nodes()),
        '核心度(PageRank)': [pr.get(n, 0) for n in G.nodes()],
        '社交度(Degree)': [deg.get(n, 0) for n in G.nodes()]
    }).sort_values(by='核心度(PageRank)', ascending=False)

    semantic_relationships = [
        ("于连", "瑞那夫人", "秘密情人"),
        ("德瑞纳先生", "瑞那夫人", "夫妻"),
        ("于连", "玛娣儿特", "秘密情人 / 未婚夫妇"),
        ("拉穆尔侯爵", "玛娣儿特", "父女"),
        ("于连", "索雷尔老爹", "父子"), 
        ("德瑞纳先生", "瓦勒诺", "政治对手/地方权贵"),
        ("杰洛尼莫", "于连", "朋友/引导者/歌者"),
        ("爱丽莎", "瑞那夫人", "雇主/女仆"),
        ("律师", "于连", "辩护关系")
    ]

    df_semantic = pd.DataFrame(semantic_relationships, columns=['人物A', '人物B', '关系类型'])

    with open(OUTPUT_FILE, 'w', encoding='utf-8') as f:
        f.write("=== 《摇滚红与黑》歌剧剧本人物关系分析报告\n\n")

        f.write(f"[1. 剧中识别到的主要人物 - 共{len(chars_found)}人]\n")
        f.write(", ".join(sorted(chars_found)) + "\n\n")

        f.write("[2. 对话轮次互动次数统计 (代表直接对话/舞台交互的频率)]\n")
        f.write(df_inter.to_string(index=False))
        f.write("\n\n")

        f.write("[3. 主要人物中心度分析]\n")
        f.write(df_cent.round(4).to_string(index=False))
        f.write("\n\n")

        f.write("[4. 歌剧关键语义关系 (基于背景知识)]\n")
        f.write(df_semantic.to_string(index=False))


if __name__ == '__main__':
    analyze_cleaned_script()
