# Manim 数据拟合演示动画
完整的DataFitting场景实现，展示欠拟合、过拟合和最优拟合概念

In [None]:
from manim import *
import numpy as np
import random

class DataFitting(Scene):
    def construct(self):
        # 1. 创建坐标轴
        axes = Axes(
            x_range=[0, 10, 2],
            y_range=[0, 30, 5],
            x_length=5,
            y_length=5,
            axis_config={
                "color": BLUE,
                "include_numbers": True,
                "numbers_to_exclude": [0],
                "font_size": 20,
            },
            tips=True,
        ).shift(DOWN*0.7)
        
        self.play(Create(axes, run_time=1.5))
        self.wait(0.5)
        
        # 2. 创建原始数据点
        x_values = np.linspace(1, 8.5, 9)
        y_values = 0.8 * x_values**2 - 6 * x_values + 19
        
        original_dots = VGroup()
        for x, y in zip(x_values, y_values):
            dot = Dot(axes.c2p(x, y), color=RED, radius=0.08)
            original_dots.add(dot)
        
        self.play(FadeIn(original_dots))
        self.wait(2)

        # 3. 添加二次拟合曲线
        coefficients = np.polyfit(x_values, y_values, 2)
        fit_func = lambda x: coefficients[0]*x**2 + coefficients[1]*x + coefficients[2]
        fit_curve = axes.plot(fit_func, color=GREEN)
        self.play(Create(fit_curve, run_time=2))
        self.wait(4)
        
        # 4. 添加噪声数据
        noisy_y = y_values.copy()
        selected_indices = random.sample(range(len(x_values)), 9)
        noise_displacements = []
        for i in range(len(x_values)):
            if i in selected_indices:
                displacement = random.uniform(-3.1, 3)
                noise_displacements.append(displacement)
                noisy_y[i] += displacement
            else:
                noise_displacements.append(0)
        
        noisy_dots = VGroup()
        for x, y in zip(x_values, noisy_y):
            dot = Dot(axes.c2p(x, y), color=YELLOW, radius=0.08)
            noisy_dots.add(dot)
        
        animations = []
        for i, dot in enumerate(original_dots):
            if i in selected_indices:
                new_dot = noisy_dots[i]
                animations.append(dot.animate.move_to(new_dot.get_center()).set_color(YELLOW))
        
        self.play(AnimationGroup(*animations, lag_ratio=0.1), run_time=2)
        self.play(FadeOut(fit_curve), run_time=2)
        self.wait(2)
        
        # 5. 分裂视图
        axes_left = axes.copy().shift(LEFT * 3.3)
        axes_right = axes.copy().shift(RIGHT * 3.3)
        
        noisy_dots_left = noisy_dots.copy()
        noisy_dots_right = noisy_dots.copy()
        
        for dot in noisy_dots_left:
            x, y = axes.p2c(dot.get_center())
            dot.move_to(axes_left.c2p(x, y))
        
        for dot in noisy_dots_right:
            x, y = axes.p2c(dot.get_center())
            dot.move_to(axes_right.c2p(x, y))
        
        self.play(
            Transform(axes, axes_left),
            Transform(original_dots, noisy_dots_left),
            FadeIn(axes_right),
            FadeIn(noisy_dots_right),
            run_time=2
        )
        self.wait(2)

        # 6. 左侧线性拟合
        linear_coeffs = np.polyfit(x_values, noisy_y, 1)
        linear_func = lambda x: linear_coeffs[0]*x + linear_coeffs[1]
        linear_fit = axes_left.plot(linear_func, color=BLUE)
        linear_label = MathTex("y = ax + b", color=BLUE).next_to(axes_left, UP)
        
        # 7. 右侧高次多项式拟合
        poly9_coeffs = np.polyfit(x_values, noisy_y, 12)
        poly9_func = lambda x: np.polyval(poly9_coeffs, x)
        poly9_fit = axes_right.plot(poly9_func, color=RED)
        poly9_label = MathTex("y = \\sum_{i=0}^{9} a_i x^i", color=RED).next_to(axes_right, UP).shift(DOWN*0.5)
        
        self.play(Create(linear_fit), Write(linear_label), run_time=2)
        self.wait(3)
        self.play(Create(poly9_fit), Write(poly9_label), run_time=2)
        self.wait(3)
        
        # 8. 添加拟合类型标签
        comparison_text = VGroup(
            Text("Underfitting", color=BLUE).next_to(linear_label, DOWN).shift(UP*0.8),
            Text("Overfitting", color=RED).next_to(poly9_label, DOWN).shift(UP*1.3)
        )
        
        self.play(
            Transform(linear_label, comparison_text[0]),
            Transform(poly9_label, comparison_text[1])
        )
        self.wait(8)
        
        # 9. 重置场景
        self.play(FadeOut(Group(*self.mobjects)), run_time=2)
        self.wait(1)
        
        # 10. 创建新坐标系
        new_axes = Axes(
            x_range=[0, 10, 2],
            y_range=[0, 30, 5],
            x_length=5,
            y_length=5,
            axis_config={
                "color": BLUE,
                "include_numbers": True,
                "numbers_to_exclude": [0],
                "font_size": 20,
            },
            tips=True,
        ).shift(DOWN*0.8)
        
        new_dots = VGroup()
        for x, y in zip(x_values, noisy_y):
            dot = Dot(new_axes.c2p(x, y), color=YELLOW, radius=0.08)
            new_dots.add(dot)
        
        self.play(Create(new_axes), FadeIn(new_dots), run_time=2)
        self.wait(3)
        
        # 11. 多项式拟合演示
        equation = MathTex("y = \\sum_{i=0}^{0} a_i x^i", color=WHITE).to_edge(UP)
        self.play(Write(equation))
        self.wait(0.5)
        
        all_fits = []
        for degree in range(1, 13):
            coeffs = np.polyfit(x_values, noisy_y, degree)
            fit_func = lambda x: np.polyval(coeffs, x)
            curve = new_axes.plot(fit_func, color=self.get_fit_color(degree))
            all_fits.append(curve)
        
        current_fit = all_fits[0]
        self.play(Create(current_fit))
        
        new_equation = MathTex("y = \\sum_{i=0}^{1} a_i x^i", 
                             color=self.get_fit_color(1)).to_edge(UP)
        self.play(Transform(equation, new_equation))
        self.wait(1)
        
        for degree in range(2, 13):
            new_fit = all_fits[degree-1]
            new_equation = MathTex(f"y = \\sum_{{i=0}}^{{{degree}}} a_i x^i", 
                                 color=self.get_fit_color(degree)).to_edge(UP)
            
            self.play(
                Transform(current_fit, new_fit),
                Transform(equation, new_equation),
                run_time=1.5
            )
            self.wait(0.5)
            
            if degree >= 9:
                warning = Text("过拟合!", color=RED).next_to(new_axes, DOWN)
                self.play(Write(warning), run_time=0.5)
                self.wait(0.5)
                self.play(FadeOut(warning))
        
        self.wait(2)
        
        # 12. 显示最优拟合
        best_fit = all_fits[1]
        best_equation = MathTex("y = \\sum_{i=0}^{2} a_i x^i", 
                              color=GREEN).to_edge(UP)
        best_label = Text("最优拟合", color=GREEN).next_to(new_axes, RIGHT)
        
        self.play(
            Transform(current_fit, best_fit),
            Transform(equation, best_equation),
            Write(best_label),
            run_time=2
        )
        self.wait(5)
    
    def get_fit_color(self, degree):
        """根据多项式次数返回不同颜色"""
        if degree == 1:
            return BLUE
        elif degree <= 3:
            return GREEN
        elif degree <= 5:
            return YELLOW
        elif degree <= 8:
            return ORANGE
        else:
            return RED

## 动画流程详解

1. **初始设置**
   - 创建坐标轴系统
   - 生成原始数据点(二次函数分布)
   
2. **完美拟合演示**
   - 展示二次多项式对无噪声数据的完美拟合
   
3. **添加噪声**
   - 为数据点添加随机噪声
   - 动画展示数据点移动过程
   
4. **分裂视图对比**
   - 将视图分成左右两部分
   - 左侧展示线性拟合(欠拟合)
   - 右侧展示高次多项式拟合(过拟合)
   
5. **多项式拟合过程**
   - 从1次到12次逐步展示不同次数的多项式拟合效果
   - 使用颜色区分不同复杂度的模型
   - 高次时显示"过拟合"警告
   
6. **最优拟合展示**
   - 突出显示二次多项式的最优拟合效果

## 使用说明

1. **环境要求**
   - 安装Manim库: `pip install manim`
   - 安装NumPy: `pip install numpy`
   
2. **运行动画**
   ```bash
   manim -pql data_fitting.py DataFitting
   ```
   
3. **参数调整**
   - 修改`x_values`和`y_values`调整数据点分布
   - 调整`noise_displacements`改变噪声大小
   - 修改`get_fit_color`方法调整不同次数拟合曲线的颜色

## 教学要点

1. **欠拟合(Underfitting)**
   - 模型过于简单
   - 无法捕捉数据中的模式
   
2. **过拟合(Overfitting)**
   - 模型过于复杂
   - 过度拟合噪声而非真实模式
   
3. **最优拟合**
   - 模型复杂度与真实数据生成过程匹配
   - 平衡偏差和方差