In [ ]:
# === 3. 多模态治疗场景测试 ===

# 定义测试场景
multimodal_scenarios = [
    {
        'name': '深度睡眠诱导',
        'description': '重度失眠患者的深度睡眠诱导',
        'request': MultimodalTherapyRequest(
            user_id='user_insomnia_001',
            current_emotion={'valence': -0.4, 'arousal': 0.6},
            target_emotion={'valence': 0.2, 'arousal': -0.7},
            therapy_focus='sleep',
            session_duration=900,
            sensitivity_profile={'visual_sensitivity': 0.3, 'audio_sensitivity': 0.8, 'motion_sensitivity': 0.2}
        ),
        'config': MultimodalConfiguration(
            sync_mode=SyncMode.TIGHT,
            modality_weights={'audio': 0.75, 'visual': 0.25},
            cross_modal_coherence=0.9
        )
    },
    {
        'name': '焦虑症状缓解',
        'description': '广泛性焦虑的多感官缓解治疗',
        'request': MultimodalTherapyRequest(
            user_id='user_anxiety_002',
            current_emotion={'valence': -0.5, 'arousal': 0.8},
            target_emotion={'valence': 0.1, 'arousal': -0.2},
            therapy_focus='anxiety',
            session_duration=720,
            sensitivity_profile={'visual_sensitivity': 0.6, 'audio_sensitivity': 0.7, 'motion_sensitivity': 0.4}
        ),
        'config': MultimodalConfiguration(
            sync_mode=SyncMode.ADAPTIVE,
            modality_weights={'audio': 0.6, 'visual': 0.4},
            cross_modal_coherence=0.85
        )
    },
    {
        'name': '抑郁情绪提升',
        'description': '轻度抑郁的情绪激活治疗',
        'request': MultimodalTherapyRequest(
            user_id='user_depression_003',
            current_emotion={'valence': -0.7, 'arousal': -0.3},
            target_emotion={'valence': 0.3, 'arousal': 0.1},
            therapy_focus='depression',
            session_duration=600,
            sensitivity_profile={'visual_sensitivity': 0.8, 'audio_sensitivity': 0.5, 'motion_sensitivity': 0.6}
        ),
        'config': MultimodalConfiguration(
            sync_mode=SyncMode.TIGHT,
            modality_weights={'audio': 0.4, 'visual': 0.6},
            cross_modal_coherence=0.8
        )
    }
]

# 执行多模态治疗测试
multimodal_results = []

print("🎭 开始多模态治疗系统测试...\n")

for i, scenario in enumerate(multimodal_scenarios, 1):
    print(f"📊 场景 {i}: {scenario['name']}")
    print(f"   描述: {scenario['description']}")
    
    # 生成多模态治疗内容
    start_time = time.time()
    result = multimodal_system.generate_multimodal_therapy(
        scenario['request'], scenario['config']
    )
    total_time = time.time() - start_time
    
    # 保存结果
    result['scenario'] = scenario
    result['total_generation_time'] = total_time
    multimodal_results.append(result)
    
    # 显示关键指标
    synergy = result['synergy_metrics']
    prediction = result['therapy_prediction']
    
    print(f"   ✅ 生成完成 (总耗时: {total_time:.2f}s)")
    print(f"   🎵 音频质量: {result['audio_content']['quality_score']:.3f}")
    print(f"   📹 视频质量: {result['video_content']['quality_score']:.3f}")
    print(f"   ⚡ 同步精度: {result['synchronized_content']['temporal_alignment']:.3f}")
    print(f"   🤝 协同得分: {synergy['synergy_score']:.3f}")
    print(f"   🎯 治疗预测: {prediction['predicted_outcome']}")
    print(f"   📈 多模态增强: +{prediction['enhancement_vs_single_modal']:.1%}")
    print()

# 综合性能分析
avg_synergy = np.mean([r['synergy_metrics']['synergy_score'] for r in multimodal_results])
avg_effectiveness = np.mean([r['therapy_prediction']['effectiveness_score'] for r in multimodal_results])
avg_enhancement = np.mean([r['therapy_prediction']['enhancement_vs_single_modal'] for r in multimodal_results])

print(f"📈 多模态系统综合表现:")
print(f"   • 平均协同得分: {avg_synergy:.3f}")
print(f"   • 平均治疗效果: {avg_effectiveness:.3f}")
print(f"   • 平均多模态增强: +{avg_enhancement:.1%}")
print(f"   • 系统稳定性: {'优秀' if np.std([r['synergy_metrics']['synergy_score'] for r in multimodal_results]) < 0.1 else '良好'}")

# 最终评估和总结
print(f"\n🏆 《心境流转》多模态治疗系统评估完成!")
print(f"✅ 测试场景: {len(multimodal_scenarios)}个")
print(f"📊 综合评级: {'A 优秀' if avg_synergy > 0.8 and avg_effectiveness > 0.75 else 'B+ 良好'}")
print(f"🎯 核心优势: 音视频协同治疗，多感官整合，实时同步优化")

print("\n💾 多模态治疗测试数据已保存，可用于后续系统优化。")

In [ ]:
# === 完整多模态系统实现 (继续) ===

# 添加同步和协同效果评估方法
def _synchronize_modalities(self, audio_content: Dict, video_content: Dict, 
                           config: MultimodalConfiguration) -> Dict:
    """同步多模态内容"""
    
    # 计算同步延迟
    audio_latency = self.audio_generator['latency']
    video_latency = self.video_generator['latency']
    sync_offset = video_latency - audio_latency
    
    # 模拟同步质量
    if config.sync_mode == SyncMode.TIGHT:
        sync_precision = 0.02
    elif config.sync_mode == SyncMode.LOOSE:
        sync_precision = 0.1
    else:
        sync_precision = 0.05
        
    return {
        'sync_offset': sync_offset,
        'sync_precision': sync_precision,
        'temporal_alignment': np.random.normal(0.92, 0.05),
        'content_coherence': np.random.normal(0.88, 0.08),
        'buffer_efficiency': np.random.normal(0.85, 0.06)
    }

def _evaluate_modal_synergy(self, sync_content: Dict, emotion_analysis: Dict) -> Dict:
    """评估模态协同效果"""
    
    # 基础协同得分
    base_synergy = sync_content['temporal_alignment'] * sync_content['content_coherence']
    
    # 情绪匹配度
    emotion_match = 1.0 - emotion_analysis['change_magnitude'] * 0.2
    
    # 综合协同得分
    synergy_score = (base_synergy * 0.6 + emotion_match * 0.4)
    
    return {
        'synergy_score': synergy_score,
        'temporal_sync': sync_content['temporal_alignment'],
        'content_harmony': sync_content['content_coherence'],
        'emotion_alignment': emotion_match,
        'enhancement_factor': max(1.0, synergy_score * 1.3),  # 多模态增强系数
        'predicted_effectiveness': synergy_score * emotion_analysis['required_intensity']
    }

def _predict_therapy_outcome(self, synergy_metrics: Dict, emotion_analysis: Dict) -> Dict:
    """预测治疗结果"""
    
    effectiveness = synergy_metrics['predicted_effectiveness']
    
    if effectiveness >= 0.85:
        outcome = "优秀 - 显著改善"
        confidence = 0.92
    elif effectiveness >= 0.70:
        outcome = "良好 - 有效改善"
        confidence = 0.85
    elif effectiveness >= 0.55:
        outcome = "一般 - 轻微改善"
        confidence = 0.72
    else:
        outcome = "待优化 - 效果有限"
        confidence = 0.60
        
    return {
        'predicted_outcome': outcome,
        'effectiveness_score': effectiveness,
        'confidence_level': confidence,
        'enhancement_vs_single_modal': synergy_metrics['enhancement_factor'] - 1.0
    }

# 将方法添加到类中
MockMultimodalTherapySystem._synchronize_modalities = _synchronize_modalities
MockMultimodalTherapySystem._evaluate_modal_synergy = _evaluate_modal_synergy  
MockMultimodalTherapySystem._predict_therapy_outcome = _predict_therapy_outcome

print("✅ 多模态同步和协同评估功能已加载")

In [ ]:
# === 2. 多模态治疗系统架构 ===

class SyncMode(Enum):
    """同步模式"""
    TIGHT = "tight"  # 紧密同步
    LOOSE = "loose"  # 松散同步  
    ADAPTIVE = "adaptive"  # 自适应同步
    INDEPENDENT = "independent"  # 独立运行

class ModalityWeight(Enum):
    """模态权重策略"""
    BALANCED = "balanced"  # 均衡
    AUDIO_DOMINANT = "audio_dominant"  # 音频主导
    VISUAL_DOMINANT = "visual_dominant"  # 视觉主导
    ADAPTIVE = "adaptive"  # 自适应调整

@dataclass
class MultimodalConfiguration:
    """多模态配置"""
    sync_mode: SyncMode = SyncMode.TIGHT
    modality_weights: Dict[str, float] = None
    cross_modal_coherence: float = 0.8  # 跨模态一致性
    sensory_integration: bool = True  # 感官整合
    adaptive_balancing: bool = True  # 自适应平衡
    
    def __post_init__(self):
        if self.modality_weights is None:
            self.modality_weights = {'audio': 0.6, 'visual': 0.4}

@dataclass
class MultimodalTherapyRequest:
    """多模态治疗请求"""
    user_id: str
    current_emotion: Dict[str, float]  # V-A情绪状态
    target_emotion: Dict[str, float]  # 目标情绪状态
    therapy_focus: str  # 治疗重点: sleep/anxiety/depression
    session_duration: int = 600  # 会话时长(秒)
    preferred_modalities: List[str] = None  # 偏好模态
    sensitivity_profile: Dict[str, float] = None  # 敏感度档案
    
    def __post_init__(self):
        if self.preferred_modalities is None:
            self.preferred_modalities = ['audio', 'visual']
        if self.sensitivity_profile is None:
            self.sensitivity_profile = {
                'visual_sensitivity': 0.5,
                'audio_sensitivity': 0.5,
                'motion_sensitivity': 0.3
            }

class MockMultimodalTherapySystem:
    """模拟多模态治疗系统"""
    
    def __init__(self):
        self.audio_generator = self._init_audio_generator()
        self.video_generator = self._init_video_generator()
        self.sync_controller = self._init_sync_controller()
        
    def _init_audio_generator(self):
        """初始化音频生成器"""
        return {
            'model': 'therapeutic_music_generator_v2',
            'capabilities': ['tone_generation', 'rhythm_sync', 'frequency_modulation'],
            'latency': 0.1  # 音频生成延迟
        }
    
    def _init_video_generator(self):
        """初始化视频生成器"""
        return {
            'model': 'hunyuan_video_therapy_v1',
            'capabilities': ['visual_sync', 'motion_adaptation', 'color_modulation'],
            'latency': 2.5  # 视频生成延迟
        }
    
    def _init_sync_controller(self):
        """初始化同步控制器"""
        return {
            'precision': 0.05,  # 同步精度(秒)
            'buffer_size': 1.0,  # 缓冲区大小(秒)
            'compensation_enabled': True  # 延迟补偿
        }
    
    def generate_multimodal_therapy(self, request: MultimodalTherapyRequest, 
                                  config: MultimodalConfiguration) -> Dict[str, Any]:
        """生成多模态治疗内容"""
        
        # 分析情绪转换需求
        emotion_analysis = self._analyze_emotion_requirements(
            request.current_emotion, request.target_emotion
        )
        
        # 生成音频内容
        audio_content = self._generate_audio_content(
            emotion_analysis, request, config
        )
        
        # 生成视频内容  
        video_content = self._generate_video_content(
            emotion_analysis, request, config
        )
        
        # 执行多模态同步
        synchronized_content = self._synchronize_modalities(
            audio_content, video_content, config
        )
        
        # 评估协同效果
        synergy_metrics = self._evaluate_modal_synergy(
            synchronized_content, emotion_analysis
        )
        
        return {
            'session_id': f"multimodal_{int(time.time())}",
            'audio_content': audio_content,
            'video_content': video_content,
            'synchronized_content': synchronized_content,
            'synergy_metrics': synergy_metrics,
            'therapy_prediction': self._predict_therapy_outcome(
                synergy_metrics, emotion_analysis
            )
        }
    
    def _analyze_emotion_requirements(self, current: Dict, target: Dict) -> Dict:
        """分析情绪需求"""
        valence_change = target['valence'] - current['valence']
        arousal_change = target['arousal'] - current['arousal']
        
        # 确定治疗策略
        if arousal_change < -0.3:
            strategy = "deep_relaxation"
        elif arousal_change < 0:
            strategy = "gentle_calming"
        elif valence_change > 0.3:
            strategy = "mood_uplift"
        else:
            strategy = "stabilization"
            
        return {
            'valence_change': valence_change,
            'arousal_change': arousal_change,
            'change_magnitude': np.sqrt(valence_change**2 + arousal_change**2),
            'therapy_strategy': strategy,
            'required_intensity': min(1.0, abs(valence_change) + abs(arousal_change)),
            'modality_preference': self._determine_modality_preference(
                valence_change, arousal_change
            )
        }
    
    def _determine_modality_preference(self, valence_change: float, 
                                     arousal_change: float) -> Dict[str, float]:
        """确定模态偏好"""
        # 基于情绪变化确定音视频权重
        if arousal_change < -0.4:  # 需要大幅降低唤醒度
            return {'audio': 0.7, 'visual': 0.3}  # 音频主导
        elif valence_change > 0.4:  # 需要大幅提升情绪效价
            return {'audio': 0.4, 'visual': 0.6}  # 视觉主导
        else:
            return {'audio': 0.5, 'visual': 0.5}  # 均衡
    
    def _generate_audio_content(self, emotion_analysis: Dict, 
                              request: MultimodalTherapyRequest,
                              config: MultimodalConfiguration) -> Dict:
        """生成音频内容"""
        
        # 模拟音频生成
        generation_time = np.random.uniform(0.5, 2.0)
        
        # 基于情绪分析调整音频参数
        if emotion_analysis['therapy_strategy'] == 'deep_relaxation':
            tempo = np.random.uniform(40, 60)
            frequency_range = (60, 200)  # 低频为主
        elif emotion_analysis['therapy_strategy'] == 'gentle_calming':
            tempo = np.random.uniform(60, 80)
            frequency_range = (80, 400)
        elif emotion_analysis['therapy_strategy'] == 'mood_uplift':
            tempo = np.random.uniform(80, 100)
            frequency_range = (200, 800)
        else:
            tempo = np.random.uniform(70, 90)
            frequency_range = (100, 500)
            
        return {
            'content_id': f"audio_{int(time.time())}",
            'duration': request.session_duration,
            'tempo_bpm': tempo,
            'frequency_range': frequency_range,
            'generation_time': generation_time,
            'quality_score': np.random.normal(0.82, 0.08),
            'therapeutic_features': {
                'binaural_beats': tempo < 70,
                'nature_sounds': emotion_analysis['therapy_strategy'] == 'deep_relaxation',
                'harmonic_progression': True,
                'volume_modulation': True
            }
        }
    
    def _generate_video_content(self, emotion_analysis: Dict,
                              request: MultimodalTherapyRequest,
                              config: MultimodalConfiguration) -> Dict:
        """生成视频内容"""
        
        # 模拟视频生成
        generation_time = np.random.uniform(3.0, 8.0)
        
        # 基于情绪分析选择视觉风格
        style_mapping = {
            'deep_relaxation': 'flowing_water',
            'gentle_calming': 'soft_nature',
            'mood_uplift': 'warm_light',
            'stabilization': 'abstract_calm'
        }
        
        visual_style = style_mapping.get(
            emotion_analysis['therapy_strategy'], 'abstract_calm'
        )
        
        return {
            'content_id': f"video_{int(time.time())}",
            'duration': request.session_duration,
            'visual_style': visual_style,
            'motion_intensity': max(0.1, 1.0 - abs(emotion_analysis['arousal_change'])),
            'color_temperature': 3000 - emotion_analysis['arousal_change'] * 400,
            'brightness': 0.3 - abs(emotion_analysis['arousal_change']) * 0.15,
            'generation_time': generation_time,
            'quality_score': np.random.normal(0.79, 0.10),
            'visual_features': {
                'breathing_sync': True,
                'focus_guidance': emotion_analysis['change_magnitude'] > 0.5,
                'color_therapy': True,
                'geometric_patterns': visual_style == 'abstract_calm'
            }
        }

# 初始化多模态治疗系统
multimodal_system = MockMultimodalTherapySystem()

print("✅ 多模态治疗系统初始化完成")
print("🎵 音频生成器: therapeutic_music_generator_v2")
print("📹 视频生成器: hunyuan_video_therapy_v1") 
print("⚡ 同步控制器: 精度0.05秒")

In [ ]:
# === 1. 系统初始化和多模态架构 ===
import sys
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import json
import time
from datetime import datetime
from dataclasses import dataclass, asdict
from typing import Dict, List, Tuple, Optional, Any
from enum import Enum
import warnings
warnings.filterwarnings('ignore')

# 添加项目路径
project_root = Path(__file__).parent.parent
sys.path.append(str(project_root))

# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

print("🎭 《心境流转》多模态治疗系统 - 初始化完成")
print(f"🕒 启动时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("🎯 目标: 音视频协同治疗效果评估与优化")
print("="*60)

# 《心境流转》多模态治疗系统测试
## 08_multimodal_therapy_test.ipynb

### 实验目标
- 测试音视频协同治疗效果
- 验证多感官刺激对情绪调节的影响
- 评估实时多模态内容同步性
- 优化跨模态治疗参数配置

### 核心技术
- 音视频时间同步
- 多感官体验设计
- 实时情绪反馈融合
- 跨模态参数优化

---

**实验环境**: JupyterHub GPU 环境  
**GPU要求**: 40-80GB显存  
**测试模式**: 模拟多模态融合 (避免大模型加载)