# CombinedEntropyAndGain 动画说明

## 背景介绍
本动画展示了数据集中“是否流失”标签的熵（Entropy）计算过程，以及基于“性别”和“经济水平”两个属性的信息增益（Information Gain）分析。

数据来自一个简单的数据表，包含性别、经济水平和是否流失三列。

## 代码说明
- 使用 Manim 创建表格和文本，设置字体为支持中文的 `SimHei`，并配置 LaTeX 支持中文。
- 通过 `log2` 计算熵的数值。
- 利用 Manim 的动画函数（`Create`，`Write`，`FadeOut`，`Transform`）完成动态展示。


In [None]:
from manim import *
from math import log2
from manim.utils.tex import TexTemplate

config.frame_width = 22
config.frame_height = 12

# 创建支持中文的 LaTeX 模板
chinese_template = TexTemplate()
chinese_template.add_to_preamble(r"""
\usepackage[UTF8]{ctex}
\usepackage{amsmath}
""")

class CombinedEntropyAndGain(Scene):
    def construct(self):
        # 数据准备
        data = [
            ["男", "高", "0"], ["女", "中", "0"], ["男", "低", "1"], ["女", "高", "0"], ["男", "高", "0"],
            ["男", "中", "0"], ["男", "中", "1"], ["女", "中", "0"], ["女", "低", "1"], ["女", "中", "0"],
            ["女", "高", "0"], ["男", "低", "1"], ["女", "低", "1"], ["男", "高", "0"], ["男", "高", "0"]
        ]
        headers = ["性别", "经济水平", "是否流失"]
        table_data = [headers] + data

        main_table = Table(
            table_data,
            include_outer_lines=True,
            line_config={"stroke_width": 3, "color": WHITE},
            element_to_mobject_config={"font_size": 36, "weight": BOLD},
        ).scale(0.55).to_edge(LEFT, buff=1.0)

        # 熵计算
        total = len(data)
        count_0 = sum(1 for row in data if row[2] == "0")
        count_1 = sum(1 for row in data if row[2] == "1")
        p0 = count_0 / total
        p1 = count_1 / total
        entropy = -(p0 * log2(p0) + p1 * log2(p1)) if p0 > 0 and p1 > 0 else 0

        male_data = [row for row in data if row[0] == "男"]
        female_data = [row for row in data if row[0] == "女"]
        male_count = len(male_data)
        female_count = len(female_data)

        def calc_entropy(group_data, count):
            c0 = sum(1 for row in group_data if row[2] == "0")
            c1 = sum(1 for row in group_data if row[2] == "1")
            p0 = c0 / count if count > 0 else 0
            p1 = c1 / count if count > 0 else 0
            return -(p0 * log2(p0) + p1 * log2(p1)) if p0 > 0 and p1 > 0 else 0

        H_male = calc_entropy(male_data, male_count)
        H_female = calc_entropy(female_data, female_count)
        H_gender = (male_count / total) * H_male + (female_count / total) * H_female

        high_data = [row for row in data if row[1] == "高"]
        mid_data = [row for row in data if row[1] == "中"]
        low_data = [row for row in data if row[1] == "低"]
        high_count = len(high_data)
        mid_count = len(mid_data)
        low_count = len(low_data)

        H_high = calc_entropy(high_data, high_count)
        H_mid = calc_entropy(mid_data, mid_count)
        H_low = calc_entropy(low_data, low_count)
        H_econ = (high_count / total) * H_high + (mid_count / total) * H_mid + (low_count / total) * H_low

        gain_gender = entropy - H_gender
        gain_econ = entropy - H_econ

        # 右侧显示
        entropy_label = Text("总熵:", font="SimHei", font_size=55, weight=BOLD, color=YELLOW).to_edge(UP).shift(RIGHT * 3 + DOWN * 0.5)
        entropy_formula = MathTex(
            f"H(S) = {entropy:.3f}",
            font_size=60,
            tex_template=chinese_template
        ).set_color(YELLOW).next_to(entropy_label, RIGHT, buff=0.2)

        gender_label = Text("性别条件熵:", font="SimHei", font_size=50, weight=BOLD, color=PINK).next_to(
            entropy_formula, DOWN, buff=0.7).align_to(entropy_label, LEFT)
        gender_formula = MathTex(
            f"H(S|X=\\text{{性别}}) = {H_gender:.3f}",
            font_size=58,
            tex_template=chinese_template
        ).set_color(PINK).next_to(gender_label, RIGHT, buff=0.2)

        econ_label = Text("经济条件熵:", font="SimHei", font_size=50, weight=BOLD, color=BLUE).next_to(
            gender_formula, DOWN, buff=0.7).align_to(entropy_label, LEFT)
        econ_formula = MathTex(
            f"H(S|X=\\text{{经济水平}}) = {H_econ:.3f}",
            font_size=58,
            tex_template=chinese_template
        ).set_color(BLUE).next_to(econ_label, RIGHT, buff=0.2)

        gain_gender_label = Text("性别信息增益:", font="SimHei", font_size=40, weight=BOLD, color=PINK).next_to(
            econ_formula, DOWN, buff=0.7).align_to(entropy_label, LEFT)
        gain_gender_formula = MathTex(
            r"""\begin{aligned}
        IG(gender) &= H(S) - H(S|X=\text{性别}) \\ 
        &= %.3f - %.3f \\
        &= %.3f
        \end{aligned}""" % (entropy, H_gender, gain_gender),
            font_size=48,
            tex_template=chinese_template
        ).set_color(PINK).next_to(gain_gender_label, RIGHT, buff=0.2).align_to(gain_gender_label, UP)

        gain_econ_label = Text("经济信息增益:", font="SimHei", font_size=40, weight=BOLD, color=BLUE).next_to(
            gain_gender_formula, DOWN, buff=0.7).align_to(entropy_label, LEFT)
        gain_econ_formula = MathTex(
            r"""\begin{aligned}
        IG(econ) &= H(S) - H(S|X=\text{经济水平}) \\
        &= %.3f - %.3f \\
        &= %.3f
        \end{aligned}""" % (entropy, H_econ, gain_econ),
            font_size=48,
            tex_template=chinese_template
        ).set_color(BLUE).next_to(gain_econ_label, RIGHT, buff=0.2).align_to(gain_econ_label, UP)

        # 右侧组（方便一起操作）
        right_group = VGroup(
            entropy_label, entropy_formula,
            gender_label, gender_formula,
            econ_label, econ_formula,
            gain_gender_label, gain_gender_formula,
            gain_econ_label, gain_econ_formula,
        ).shift(LEFT * 5.5)

        # 动画顺序
        self.play(Create(main_table))
        self.wait(0.5)

        self.play(Write(entropy_label))
        self.play(Write(entropy_formula))
        self.wait(1)

        self.play(Write(gender_label))
        self.play(Write(gender_formula))
        self.wait(1)

        self.play(Write(econ_label))
        self.play(Write(econ_formula))
        self.wait(1)

        self.play(Write(gain_gender_label))
        self.play(Write(gain_gender_formula))
        self.wait(0.5)

        self.play(Write(gain_econ_label))
        self.play(Write(gain_econ_formula))
        self.wait(2)

        # -------- 最终简化显示部分 --------
         # 详细的两个信息增益公式组
        detailed_group = VGroup(gain_gender_formula, gain_econ_formula)

        # 创建两个简化信息增益公式
        final_gain_gender = MathTex(
            f"IG(\\text{{性别}}) = {gain_gender:.3f}",
            font_size=60,
            tex_template=chinese_template
        ).set_color(PINK)

        final_gain_econ = MathTex(
            f"IG(\\text{{经济水平}}) = {gain_econ:.3f}",
            font_size=60,
            tex_template=chinese_template
        ).set_color(BLUE)

        # 两个简化公式横向排列，居中
        final_group = VGroup(final_gain_gender, final_gain_econ).arrange(RIGHT, buff=1.5).to_edge(UP, buff=1)


        # 先淡出左边表格和右侧详细标签（但保留详细公式方便做变换）
        self.play(
            FadeOut(main_table),
            FadeOut(entropy_label),
            FadeOut(entropy_formula),
            FadeOut(gender_label),
            FadeOut(gender_formula),
            FadeOut(econ_label),
            FadeOut(econ_formula),
            FadeOut(gain_gender_label),
            FadeOut(gain_econ_label),
        )
        self.wait(0.2)

        # 使用 Transform 动画把详细信息增益两段合并变换为简化的一行两个公式
        self.play(
            Transform(detailed_group, final_group)
        )
        self.wait(3)
