In [None]:
import numpy as np
import pandas as pd
from typing import List, Tuple, Optional
import os
import matplotlib.pyplot as plt
from scipy import signal
import cv2
from scipy.stats import entropy
from scipy.spatial import ConvexHull
import japanize_matplotlib
import seaborn as sns
from typing import List, Tuple, Dict
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import pyVHR as vhr
from pyVHR.extraction.sig_processing import SignalProcessing
from pyVHR.plot.visualize import *
from pyVHR.BVP import *
vhr.plot.VisualizeParams.renderer = 'notebook'
from scipy.stats import skew, kurtosis, entropy
from skimage.metrics import structural_similarity as ssim
from scipy.fft import fft, fftfreq

# 修正: scikit-image 0.18.3用のインポート
from skimage.feature import local_binary_pattern
# graycomatrixとgraycopropsは古いバージョンでは別の場所にあります
try:
    from skimage.feature import greycomatrix as graycomatrix, greycoprops as graycoprops
except ImportError:
    # 別の方法を試す
    from skimage.feature import texture
    # または関数を使わない場合はコメントアウト
    graycomatrix = None
    graycoprops = None
    print("警告: graycomatrix/graycopropsをインポートできませんでした")

from skimage.metrics import structural_similarity as ssim
from skimage.metrics import peak_signal_noise_ratio as psnr
from scipy.ndimage import laplace
import pywt
from sklearn.decomposition import PCA

In [None]:
# 入力とする動画と動画のファイル名を取得
root_dir = "experimentData\\"
data_dirs = [os.path.join(root_dir, d) for d in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, d))]
movie_paths = []
movie_names = []
true_value_csv_array = []
true_value_rri_csv_array = []
print("動画ディレクトリ:", data_dirs)

for i in range(len(data_dirs)):
    data_dir = data_dirs[i]

    # 動画ファイルのパスを取得
    movie_files = [os.path.join(data_dir, f) for f in os.listdir(data_dir) if f.endswith('.avi')]
    movie_paths.extend(movie_files)

    movie_name = os.path.basename(data_dir)
    movie_names.append(movie_name)

    # ppgファイルのパスを取得
    movie_files = [os.path.join(data_dir, f) for f in os.listdir(data_dir) if f.endswith('.csv')]
    true_value_csv_array = [f.replace('.avi', '.csv') for f in movie_paths if f.endswith('.avi')]
    true_value_rri_csv_array.append(os.path.join(data_dir, 'RRI_Simple_' + movie_name + '.csv'))


f_1_ffi = 0.0399  # LFのはじめ
f_2 = 0.151  # LFの終わり、HFのはじめ
f_3 = 0.401  # HFの終わり

# start_index = 6
# end_index = len(movie_paths)

# data_dirs = data_dirs[start_index:end_index]
# movie_paths = movie_paths[start_index:end_index]
# movie_names = movie_names[start_index:end_index]
# true_value_csv_array = true_value_csv_array[start_index:end_index]
# true_value_rri_csv_array = true_value_rri_csv_array[start_index:end_index]

print(f"data_dirs: {data_dirs}")
print(f"movie_paths: {movie_paths}")
print(f"movie_names: {movie_names}")
print(f"true_value_csv_array: {true_value_csv_array}")
print(f"true_value_rri_csv_array: {true_value_rri_csv_array}")

In [None]:
SAVE_DIR  = "ExploreParamPCA"

### 肌領域を1フレーム目から検出し、フレームごとにRGBを抽出

抽出指標
- RGB
- HSV
- Lightness
- LBP
  - エントロピー
  - 分散
  - 歪度
  - 尖度
  - χ²距離
  - Uniform比率
- オプティカルフロー(2フレーム目から)
  - 平均動き量
  - 動き量の標準偏差
  - 1ピクセル/5ピクセル/10ピクセル動いたピクセルの割合
- SSIM(2フレーム目から)
- PSNR(2フレーム目から)
- Canny
- GLCM
  - 等方性テクスチャ

In [None]:
def calculate_lbp_features(gray_roi, radius=1, n_points=8):
    """LBP特徴量を計算"""
    # LBP計算
    lbp = local_binary_pattern(gray_roi, n_points, radius, method='uniform')
    
    # ヒストグラム作成(uniform patternsは0〜n_points+1、non-uniformはn_points+2)
    n_bins = int(lbp.max() + 1)
    hist, _ = np.histogram(lbp.ravel(), bins=n_bins, range=(0, n_bins), density=True)
    
    # エントロピー
    lbp_entropy = entropy(hist + 1e-10)  # ゼロ除算防止
    
    # 分散
    lbp_variance = np.var(lbp)
    
    # 歪度
    lbp_skewness = skew(lbp.ravel())
    
    # 尖度
    lbp_kurtosis_val = kurtosis(lbp.ravel())
    
    # χ²距離(一様分布との比較)
    uniform_hist = np.ones(n_bins) / n_bins
    chi2_distance = np.sum((hist - uniform_hist) ** 2 / (uniform_hist + 1e-10))
    
    # Uniform比率(uniform patternsの割合)
    uniform_patterns = lbp <= n_points
    uniform_ratio = np.sum(uniform_patterns) / lbp.size
    
    return {
        'lbp_entropy': lbp_entropy,
        'lbp_variance': lbp_variance,
        'lbp_skewness': lbp_skewness,
        'lbp_kurtosis': lbp_kurtosis_val,
        'lbp_chi2_distance': chi2_distance,
        'lbp_uniform_ratio': uniform_ratio
    }

In [None]:
def calculate_optical_flow_features(prev_gray, curr_gray, mask):
    """オプティカルフロー特徴量を計算"""
    # Farneback法でオプティカルフロー計算
    flow = cv2.calcOpticalFlowFarneback(
        prev_gray, curr_gray, None,
        pyr_scale=0.5, levels=3, winsize=15,
        iterations=3, poly_n=5, poly_sigma=1.2, flags=0
    )
    
    # マスク領域のフローのみ抽出
    flow_roi = flow[mask > 0]
    
    # 動き量(magnitude)を計算
    magnitude = np.sqrt(flow_roi[:, 0]**2 + flow_roi[:, 1]**2)
    
    # 平均動き量
    mean_motion = np.mean(magnitude)
    
    # 動き量の標準偏差
    std_motion = np.std(magnitude)
    
    # しきい値を超えたピクセルの割合
    ratio_1px = np.sum(magnitude > 1) / len(magnitude)
    ratio_5px = np.sum(magnitude > 5) / len(magnitude)
    ratio_10px = np.sum(magnitude > 10) / len(magnitude)
    
    return {
        'flow_mean_motion': mean_motion,
        'flow_std_motion': std_motion,
        'flow_ratio_1px': ratio_1px,
        'flow_ratio_5px': ratio_5px,
        'flow_ratio_10px': ratio_10px
    }


In [None]:
def calculate_ssim_psnr(prev_gray, curr_gray, mask):
    """SSIMとPSNRを計算"""    
    # マスク領域の外接矩形を取得
    x, y, w, h = cv2.boundingRect(mask)
    
    # ROI領域のみ切り出し
    prev_roi = prev_gray[y:y+h, x:x+w]
    curr_roi = curr_gray[y:y+h, x:x+w]
    mask_roi = mask[y:y+h, x:x+w]
    
    # マスクを適用
    prev_roi_masked = np.where(mask_roi > 0, prev_roi, 0)
    curr_roi_masked = np.where(mask_roi > 0, curr_roi, 0)
    
    # SSIM計算
    ssim_value, _ = ssim(prev_roi_masked, curr_roi_masked, full=True)
    
    # PSNR計算
    mse = np.mean((prev_roi_masked.astype(float) - curr_roi_masked.astype(float)) ** 2)
    if mse == 0:
        psnr_value = 100  # 完全一致の場合
    else:
        psnr_value = 20 * np.log10(255.0 / np.sqrt(mse))
    
    return {
        'ssim': ssim_value,
        'psnr': psnr_value
    }

In [None]:
def calculate_glcm_features(gray_roi):
    """GLCM特徴量を計算"""
    # GLCMの計算(4方向の平均を取る)
    distances = [1]
    angles = [0, np.pi/4, np.pi/2, 3*np.pi/4]
    
    # 256階調を64階調に削減(計算高速化)
    gray_roi_reduced = (gray_roi // 4).astype(np.uint8)
    
    glcm = graycomatrix(
        gray_roi_reduced, 
        distances=distances, 
        angles=angles, 
        levels=64,
        symmetric=True, 
        normed=True
    )
    
    # 等方性テクスチャ(各方向のコントラストの標準偏差の逆数)
    contrast_values = []
    for i in range(len(angles)):
        contrast = graycoprops(glcm, 'contrast')[0, i]
        contrast_values.append(contrast)
    
    # 等方性スコア(方向間の差が小さいほど等方的)
    isotropy = 1.0 / (np.std(contrast_values) + 1e-10)
    
    return {
        'glcm_isotropy': isotropy
    }


In [None]:
def calculate_canny_features(gray_roi):
    """Cannyエッジ特徴量を計算"""
    # Cannyエッジ検出
    edges = cv2.Canny(gray_roi, 100, 200)
    
    # エッジピクセルの割合
    edge_ratio = np.sum(edges > 0) / edges.size
    
    return {
        'canny_edge_ratio': edge_ratio
    }

In [None]:
def calculate_skin_angle(r_signal, g_signal, b_signal):
    """
    皮膚色角度を計算（標準肌色調ベクトルとの角度）
    
    Args:
        r_signal: (N,) 赤チャンネル信号
        g_signal: (N,) 緑チャンネル信号
        b_signal: (N,) 青チャンネル信号
    
    Returns:
        angle: float 標準肌色調ベクトル [0.7682, 0.5121, 0.3841] との角度（度）
    """
    eps = 1e-9  # ゼロ除算対策
    
    # RGB統計量（時間平均）
    r_mean = np.mean(r_signal)
    g_mean = np.mean(g_signal)
    b_mean = np.mean(b_signal)
    
    # RGB正規化（単位ベクトル化）
    norm = np.sqrt(r_mean**2 + g_mean**2 + b_mean**2)
    
    # ゼロ除算チェック
    if norm < eps:
        print("警告: RGB平均値がほぼゼロです。角度を計算できません。")
        return np.nan
    
    skin_vector = np.array([r_mean, g_mean, b_mean]) / norm
    
    # 標準化肌ベクトル [0.7682, 0.5121, 0.3841]との角度計算
    reference_vector = np.array([0.7682, 0.5121, 0.3841])
    
    # コサイン類似度（範囲を[-1, 1]にクリップ）
    cosine_similarity = np.dot(skin_vector, reference_vector)
    cosine_similarity = np.clip(cosine_similarity, -1.0, 1.0)
    
    # 角度計算（度数法）
    angle = np.degrees(np.arccos(cosine_similarity))
    
    return angle

In [None]:
def visualize_roi(frame, roi_info):
    """
    ROIを可視化する関数
    
    Parameters:
    -----------
    frame : np.ndarray
        元の画像フレーム
    roi_info : dict
        extract_rgb_with_fixed_roi()が返すROI情報
    
    Returns:
    --------
    vis_frame : np.ndarray
        ROIを描画した画像
    """
    vis_frame = frame.copy()
    
    # マスク領域を半透明で表示
    mask_colored = cv2.cvtColor(roi_info['mask'], cv2.COLOR_GRAY2BGR)
    mask_colored[:, :, 1] = roi_info['mask']  # 緑チャンネル
    vis_frame = cv2.addWeighted(vis_frame, 0.7, mask_colored, 0.3, 0)
    
    # 凸包の輪郭を描画
    cv2.polylines(vis_frame, [roi_info['hull_points']], 
                  True, (0, 255, 0), 2)
    
    # ランドマークを描画（オプション）
    for pt in roi_info['landmarks']:
        cv2.circle(vis_frame, tuple(pt), 1, (255, 0, 0), -1)
    
    return vis_frame

In [None]:
# メイン処理
for i in range(len(movie_paths)):
    inputMoviePath = movie_paths[i]
    rootDir = data_dirs[i]
    dataName = movie_names[i]
    save_dir = os.path.join(rootDir, SAVE_DIR)
    print(f"\n{'='*60}")
    print(f"処理中: {dataName}")
    print(f"保存先: {save_dir}")
    print(f"{'='*60}")
    os.makedirs(save_dir, exist_ok=True)

    # 動画情報取得
    cap = cv2.VideoCapture(inputMoviePath)
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    duration = total_frames / fps
    print(f"動画情報: {total_frames}フレーム, {fps:.2f}fps, {duration:.2f}秒")

    # 1フレーム目でROI検出
    ret, first_frame = cap.read()
    if not ret:
        cap.release()
        print(f"エラー: 動画の読み込みに失敗しました: {inputMoviePath}")
        continue
    
    mp_face_mesh = mp.solutions.face_mesh
    with mp_face_mesh.FaceMesh(
        static_image_mode=True,
        max_num_faces=1,
        refine_landmarks=True,
        min_detection_confidence=0.5) as face_mesh:
        
        frame_rgb = cv2.cvtColor(first_frame, cv2.COLOR_BGR2RGB)
        results_mp = face_mesh.process(frame_rgb)
        
        if not results_mp.multi_face_landmarks:
            cap.release()
            print(f"エラー: 顔が検出できませんでした: {inputMoviePath}")
            continue
        
        landmarks = results_mp.multi_face_landmarks[0]
        h, w = first_frame.shape[:2]
        
        ldmks = np.array([[int(l.x * w), int(l.y * h)] 
                        for l in landmarks.landmark])
        
        hull = ConvexHull(ldmks)
        hull_points = ldmks[hull.vertices]
        
        mask = np.zeros((h, w), dtype=np.uint8)
        cv2.fillConvexPoly(mask, hull_points, 255)

        roi_info = {
            'mask': mask,
            'hull_points': hull_points,
            'landmarks': ldmks
        }

        vis_frame = visualize_roi(first_frame, roi_info)
        roi_vis_path = os.path.join(save_dir, "roi_visualization.jpg")
        cv2.imwrite(roi_vis_path, vis_frame)
        print(f"ROI可視化画像を保存: {roi_vis_path}")
        print(f"1フレーム目でROI検出完了: {len(hull_points)}個の凸包頂点")

    # 全フレームでRGB信号抽出
    print("全フレームで特徴量抽出中...")
    
    cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
    frame_count = 0

    # 結果を格納する辞書
    results = {
        'frame_number': [], 'timestamp': [], 'contrast': [],
        'r_mean': [], 'g_mean': [], 'b_mean': [],
        'r_std': [], 'g_std': [], 'b_std': [],
        'h_mean': [], 's_mean': [], 'v_mean': [],
        'h_std': [], 's_std': [], 'v_std': [],
        'l_mean': [],
        # LBP特徴量
        'lbp_entropy': [], 'lbp_variance': [], 'lbp_skewness': [],
        'lbp_kurtosis': [], 'lbp_chi2_distance': [], 'lbp_uniform_ratio': [],
        # オプティカルフロー
        'flow_mean_motion': [], 'flow_std_motion': [],
        'flow_ratio_1px': [], 'flow_ratio_5px': [], 'flow_ratio_10px': [],
        # SSIM & PSNR
        'ssim': [], 'psnr': [],
        # Canny
        'canny_edge_ratio': [],
        # GLCM
        'glcm_isotropy': [],
        # 皮膚色角度
        'angle': []
    }

    gray_first_frame = cv2.cvtColor(first_frame, cv2.COLOR_BGR2GRAY)

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        
        timestamp = frame_count / fps
        
        # BGRをRGBに変換
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        
        # マスク領域のピクセルを抽出
        valid_pixels = frame_rgb[mask > 0]
        
        if len(valid_pixels) > 0:
            # RGB統計量
            r_mean = np.mean(valid_pixels[:, 0])
            g_mean = np.mean(valid_pixels[:, 1])
            b_mean = np.mean(valid_pixels[:, 2])
            
            r_std = np.std(valid_pixels[:, 0])
            g_std = np.std(valid_pixels[:, 1])
            b_std = np.std(valid_pixels[:, 2])
            
            # HSV統計量
            roi_bgr = frame[mask > 0]
            roi_hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)[mask > 0]
            
            h_mean = np.mean(roi_hsv[:, 0])
            s_mean = np.mean(roi_hsv[:, 1])
            v_mean = np.mean(roi_hsv[:, 2])
            
            h_std = np.std(roi_hsv[:, 0])
            s_std = np.std(roi_hsv[:, 1])
            v_std = np.std(roi_hsv[:, 2])
            
            contrast = np.std(roi_bgr)
            lightness = v_mean
            
            # グレースケールROIを取得
            gray_roi = gray[mask > 0].reshape(-1)
            x, y, w_roi, h_roi = cv2.boundingRect(mask)
            gray_roi_2d = gray[y:y+h_roi, x:x+w_roi]
            mask_roi_2d = mask[y:y+h_roi, x:x+w_roi]
            gray_roi_2d_masked = np.where(mask_roi_2d > 0, gray_roi_2d, 0)
            
            # LBP特徴量
            lbp_features = calculate_lbp_features(gray_roi_2d_masked)
            
            # Canny特徴量
            canny_features = calculate_canny_features(gray_roi_2d_masked)
            
            # GLCM特徴量
            glcm_features = calculate_glcm_features(gray_roi_2d_masked)

            # 皮膚色角度
            angle = calculate_skin_angle(
                r_signal=np.array([r_mean]),
                g_signal=np.array([g_mean]),
                b_signal=np.array([b_mean]),
            )
            
            # オプティカルフロー・SSIM・PSNR(最初のフレームとの比較)
            if gray_first_frame is not None:
                flow_features = calculate_optical_flow_features(gray_first_frame, gray, mask)
                ssim_psnr_features = calculate_ssim_psnr(gray_first_frame, gray, mask)
            else:
                flow_features = {
                    'flow_mean_motion': np.nan, 'flow_std_motion': np.nan,
                    'flow_ratio_1px': np.nan, 'flow_ratio_5px': np.nan, 
                    'flow_ratio_10px': np.nan
                }
                ssim_psnr_features = {'ssim': np.nan, 'psnr': np.nan}
            
        else:
            raise ValueError("ROI内に有効なピクセルが存在しません")

        # 結果を保存
        results['frame_number'].append(frame_count)
        results['timestamp'].append(timestamp)
        results['contrast'].append(contrast)
        results['r_mean'].append(r_mean)
        results['g_mean'].append(g_mean)
        results['b_mean'].append(b_mean)
        results['r_std'].append(r_std)
        results['g_std'].append(g_std)
        results['b_std'].append(b_std)
        results['h_mean'].append(h_mean)
        results['s_mean'].append(s_mean)
        results['v_mean'].append(v_mean)
        results['h_std'].append(h_std)
        results['s_std'].append(s_std)
        results['v_std'].append(v_std)
        results['l_mean'].append(lightness)
        
        # LBP
        results['lbp_entropy'].append(lbp_features['lbp_entropy'])
        results['lbp_variance'].append(lbp_features['lbp_variance'])
        results['lbp_skewness'].append(lbp_features['lbp_skewness'])
        results['lbp_kurtosis'].append(lbp_features['lbp_kurtosis'])
        results['lbp_chi2_distance'].append(lbp_features['lbp_chi2_distance'])
        results['lbp_uniform_ratio'].append(lbp_features['lbp_uniform_ratio'])
        
        # オプティカルフロー
        results['flow_mean_motion'].append(flow_features['flow_mean_motion'])
        results['flow_std_motion'].append(flow_features['flow_std_motion'])
        results['flow_ratio_1px'].append(flow_features['flow_ratio_1px'])
        results['flow_ratio_5px'].append(flow_features['flow_ratio_5px'])
        results['flow_ratio_10px'].append(flow_features['flow_ratio_10px'])
        
        # SSIM & PSNR
        results['ssim'].append(ssim_psnr_features['ssim'])
        results['psnr'].append(ssim_psnr_features['psnr'])
        
        # Canny
        results['canny_edge_ratio'].append(canny_features['canny_edge_ratio'])
        
        # GLCM
        results['glcm_isotropy'].append(glcm_features['glcm_isotropy'])

        # 皮膚色角度
        results['angle'].append(angle)
        
        # 前フレームを保存
        prev_frame = frame.copy()
        prev_gray = gray.copy()
        
        frame_count += 1
        
        if frame_count % 100 == 0:
            print(f"  処理中: {frame_count}/{total_frames} フレーム ({frame_count/total_frames*100:.1f}%)")

    cap.release()
    print(f"特徴量抽出完了: {frame_count}フレーム処理")

    # DataFrameに変換して保存
    signals_df = pd.DataFrame(results)

    save_path = os.path.join(save_dir, "extracted_signals.csv")
    signals_df.to_csv(save_path, index=False)
    print(f"抽出した信号を保存: {save_path}")

### windowごとに抽出信号を分類

抽出指標
- ウェーブレット変換
  - グレースケール画像のウェーブレット変換のHHサブバンドのエネルギー
- FFT
  - グレースケール画像
    - 高周波エネルギー比

In [None]:
def array_to_full_string(arr):
    """NumPy配列を省略なしの文字列に変換"""
    if isinstance(arr, str):
        return arr
    elif isinstance(arr, (np.ndarray, list)):
        # NumPy配列またはリストを完全な文字列に変換
        arr_np = np.array(arr)
        return np.array2string(arr_np, threshold=np.inf, max_line_width=np.inf, separator=' ')
    else:
        return str(arr)

In [None]:
class StrideSegmentCalculator:
    def __init__(
        self,
        window_sizes: List[float] = [2, 3, 4, 5],
        strides: List[float] = [0.1, 0.5, 1, 1.5, 2],
    ):
        """
        Parameters:
        -----------
        window_sizes : List[float]
            窓幅（秒）のリスト
        strides : List[float]
            移動秒数のリスト
        """
        self.window_sizes = window_sizes
        self.strides = strides

    def calculate_overlap(self, window_size: float, stride: float) -> float:
        """
        窓幅と移動秒数からオーバーラップ率を計算

        Parameters:
        -----------
        window_size : float
            窓幅（秒）
        stride : float
            移動秒数（秒）

        Returns:
        --------
        float
            オーバーラップ率（%）
        """
        if stride >= window_size:
            return 0
        overlap = (window_size - stride) / window_size * 100
        return round(overlap, 2)

    def calculate_segments(
        self, window_size: float, stride: float, total_frames: int, fps: int
    ) -> List[Tuple[int, int]]:
        """
        フレーム数から解析区間を計算

        Parameters:
        -----------
        window_size : float
            窓幅（秒）
        stride : float
            移動秒数（秒）
        total_frames : int
            総フレーム数
        fps : int
            フレームレート

        Returns:
        --------
        List[Tuple[int, int]]
            各区間の(開始フレーム, 終了フレーム)のリスト
        """
        frames_per_window = round(window_size * fps)
        frames_per_stride = round(stride * fps)

        segments = []
        start_frame = 0

        while start_frame + frames_per_window <= total_frames:
            segments.append((start_frame, start_frame + frames_per_window))
            start_frame += frames_per_stride

        return segments

    def create_analysis_dataframe(self, total_frames: int, fps: int) -> pd.DataFrame:
        """
        全ての窓幅と移動秒数の組み合わせに対してDataFrameを生成

        Parameters:
        -----------
        total_frames : int
            総フレーム数
        fps : int
            フレームレート

        Returns:
        --------
        pd.DataFrame
            各条件でのセグメント情報を含むDataFrame
            columns: window_size, stride, overlap, segment_number, frame_start, frame_end
        """
        data_dict = {
            "window_size": [],
            "stride": [],
            "overlap": [],
            "segment_number": [],
            "frame_start": [],
            "frame_end": [],
        }

        for window_size in self.window_sizes:
            for stride in self.strides:
                overlap = self.calculate_overlap(window_size, stride)
                segments = self.calculate_segments(
                    window_size, stride, total_frames, fps
                )

                for i, (start_frame, end_frame) in enumerate(segments):
                    data_dict["window_size"].append(window_size)
                    data_dict["stride"].append(stride)
                    data_dict["overlap"].append(overlap)
                    data_dict["segment_number"].append(i)
                    data_dict["frame_start"].append(start_frame)
                    data_dict["frame_end"].append(end_frame)

        return pd.DataFrame(data_dict)


class PulseAnalysisDataStrides:
    def __init__(self, window_sizes, strides):
        # 窓枠とストライドの値を定義
        self.window_sizes = window_sizes
        self.strides = strides

        # accuracyのみを格納するDataFrameを初期化
        self.results = pd.DataFrame(
            index=pd.Index(self.window_sizes, name="window_size"),
            columns=pd.Index(self.strides, name="strides"),
        )

    def add_accuracy(self, window_size: float, strides: int, accuracy: float):
        """
        精度データを追加する

        Parameters:
        -----------
        window_size : float
            窓幅（秒）
        strides : int
            ストライド（s）
        accuracy : float
            精度値
        """
        self.results.loc[window_size, strides] = accuracy

    def _create_heatmap_dataframe(self):
        """
        ヒートマップ用のDataFrameを作成する内部メソッド

        Returns:
        --------
        pd.DataFrame
            ヒートマップ用に整形されたDataFrame
        """
        data = {"stride": self.strides}
        for window_size in self.window_sizes:
            data[window_size] = [
                self.results.loc[window_size, stride] for stride in self.strides
            ]
        df = pd.DataFrame(data).set_index("stride").T
        return df

    def save_heatmap(
        self,
        title: str,
        save_path: str,
        figsize: tuple = (10, 8),
        cmap: str = "YlGnBu",
        colorbar_label: str = "MAE",
    ):
        """
        ヒートマップを作成して保存する

        Parameters:
        -----------
        title : str
            プロットのタイトル
        save_path : str
            保存先のパス
        figsize : tuple, optional
            図のサイズ (default: (10, 8))
        cmap : str, optional
            カラーマップ (default: 'YlGnBu')
        colorbar_label : str, optional
            カラーバーのラベル (default: 'MAE')
        """
        df = self._create_heatmap_dataframe()

        # ヒートマップを作成
        plt.figure(figsize=figsize)
        sns.heatmap(
            df, annot=True, fmt=".4f", cmap=cmap, cbar_kws={"label": colorbar_label}
        )
        plt.title(f"{title}")
        plt.xlabel("Stride [s]")
        plt.ylabel("Window Size [s]")
        plt.tight_layout()
        plt.savefig(save_path, dpi=300)
        plt.close()

    def save_heatmap_std(self, title: str, save_path: str, figsize: tuple = (10, 8)):
        """
        標準偏差のヒートマップを作成して保存する

        Parameters:
        -----------
        title : str
            プロットのタイトル
        save_path : str
            保存先のパス
        figsize : tuple, optional
            図のサイズ (default: (10, 8))
        """
        self.save_heatmap(
            title,
            save_path,
            figsize,
            cmap="Reds",
            colorbar_label="Standard Deviation",
        )

def find_closest_ground_truth_rri_and_time(
    rPPG_peak_time, trueValueRRITimeArray, trueValueRRIArray
):
    """
    与えられたrPPGpeakTimeと最も近い時間のtrueValueRRIと時間を返す

    Parameters:
    -----------
    rPPG_peak_time : float
        rPPGのピーク時間
    trueValueRRITimeArray : array-like
        真値RRIの時間配列
    trueValueRRIArray : array-like
        真値RRIの値配列

    Returns:
    --------
    float
        最も近い時間の真値RRI時間
        最も近い時間の真値RRI値
    """
    # 時間差の絶対値を計算
    time_differences = np.abs(trueValueRRITimeArray - rPPG_peak_time)

    # 最小の時間差のインデックスを取得
    closest_index = np.argmin(time_differences)

    # 対応する時間とRRI値を返す
    return trueValueRRITimeArray[closest_index], trueValueRRIArray[closest_index]

In [None]:
WINDOW_SIZES = [2, 4, 6, 8, 10]
STRIDES = [2]

In [None]:
def calculate_wavelet_hh_energy(gray_frames):
    """
    ウェーブレット変換のHHサブバンド(高周波-高周波)のエネルギーを計算
    
    Parameters:
    -----------
    gray_frames : list of np.ndarray
        グレースケール画像のリスト
    
    Returns:
    --------
    hh_energies : np.ndarray
        各フレームのHHサブバンドエネルギー
    mean_hh_energy : float
        窓内の平均HHエネルギー
    std_hh_energy : float
        窓内のHHエネルギーの標準偏差
    """
    hh_energies = []
    
    for gray_frame in gray_frames:
        # 2次元ウェーブレット変換(Haar wavelet)
        coeffs = pywt.dwt2(gray_frame, 'haar')
        cA, (cH, cV, cD) = coeffs
        
        # HH(cD: diagonal detail)のエネルギーを計算
        hh_energy = np.sum(cD ** 2)
        hh_energies.append(hh_energy)
    
    hh_energies = np.array(hh_energies)
    
    return {
        'hh_energies': hh_energies,
        'mean_hh_energy': np.mean(hh_energies),
        'std_hh_energy': np.std(hh_energies),
        'max_hh_energy': np.max(hh_energies),
        'min_hh_energy': np.min(hh_energies)
    }

In [None]:
def calculate_fft_high_freq_ratio(gray_frames, threshold_ratio=0.5):
    """
    グレースケール画像のFFT高周波エネルギー比を計算
    
    Parameters:
    -----------
    gray_frames : list of np.ndarray
        グレースケール画像のリスト
    threshold_ratio : float
        高周波の閾値(0.5なら周波数の上位50%を高周波とみなす)
    
    Returns:
    --------
    high_freq_ratios : np.ndarray
        各フレームの高周波エネルギー比
    mean_high_freq_ratio : float
        窓内の平均高周波エネルギー比
    """
    high_freq_ratios = []
    
    for gray_frame in gray_frames:
        # 2D FFT
        f_transform = np.fft.fft2(gray_frame)
        f_shift = np.fft.fftshift(f_transform)
        
        # パワースペクトル
        magnitude_spectrum = np.abs(f_shift) ** 2
        
        # 画像中心からの距離を計算
        rows, cols = gray_frame.shape
        crow, ccol = rows // 2, cols // 2
        
        # 距離マップを作成
        y, x = np.ogrid[:rows, :cols]
        distance_from_center = np.sqrt((x - ccol)**2 + (y - crow)**2)
        
        # 最大距離
        max_distance = np.sqrt(crow**2 + ccol**2)
        
        # 高周波領域のマスク(中心から遠い部分)
        high_freq_mask = distance_from_center > (max_distance * threshold_ratio)
        
        # 高周波エネルギーと全エネルギー
        high_freq_energy = np.sum(magnitude_spectrum[high_freq_mask])
        total_energy = np.sum(magnitude_spectrum)
        
        # 高周波エネルギー比
        high_freq_ratio = high_freq_energy / (total_energy + 1e-10)
        high_freq_ratios.append(high_freq_ratio)
    
    high_freq_ratios = np.array(high_freq_ratios)
    
    return {
        'high_freq_ratios': high_freq_ratios,
        'mean_high_freq_ratio': np.mean(high_freq_ratios),
        'std_high_freq_ratio': np.std(high_freq_ratios),
        'max_high_freq_ratio': np.max(high_freq_ratios),
        'min_high_freq_ratio': np.min(high_freq_ratios)
    }

In [None]:
def calculate_temporal_frequency_features(signal, fps):
    """
    時系列信号の周波数特徴を計算(1次元FFT)
    
    Parameters:
    -----------
    signal : np.ndarray
        時系列信号(例: RGB平均値の時系列)
    fps : float
        サンプリングレート
    
    Returns:
    --------
    frequency_features : dict
        周波数特徴量
    """
    if len(signal) < 2:
        return {
            'dominant_freq': np.nan,
            'spectral_entropy': np.nan,
            'high_freq_power_ratio': np.nan
        }
    
    # FFT
    n = len(signal)
    yf = fft(signal)
    xf = fftfreq(n, 1/fps)
    
    # 正の周波数のみ
    positive_freq_idx = xf > 0
    xf_positive = xf[positive_freq_idx]
    power_spectrum = np.abs(yf[positive_freq_idx]) ** 2
    
    # 支配的周波数
    dominant_idx = np.argmax(power_spectrum)
    dominant_freq = xf_positive[dominant_idx]
    
    # スペクトルエントロピー
    normalized_power = power_spectrum / (np.sum(power_spectrum) + 1e-10)
    spectral_entropy = -np.sum(normalized_power * np.log2(normalized_power + 1e-10))
    
    # 高周波パワー比(0.5Hz以上を高周波と定義)
    high_freq_threshold = 0.5
    high_freq_mask = xf_positive > high_freq_threshold
    high_freq_power = np.sum(power_spectrum[high_freq_mask])
    total_power = np.sum(power_spectrum)
    high_freq_power_ratio = high_freq_power / (total_power + 1e-10)
    
    return {
        'dominant_freq': dominant_freq,
        'spectral_entropy': spectral_entropy,
        'high_freq_power_ratio': high_freq_power_ratio
    }

In [None]:
def calculate_motion_energy(flow_magnitudes):
    """
    オプティカルフローから動きのエネルギーを計算
    
    Parameters:
    -----------
    flow_magnitudes : np.ndarray
        フロー magnitude の時系列
    
    Returns:
    --------
    motion_features : dict
        動き特徴量
    """
    if len(flow_magnitudes) == 0:
        return {
            'motion_energy': np.nan,
            'motion_variance': np.nan,
            'motion_smoothness': np.nan
        }
    
    # 動きエネルギー(二乗和)
    motion_energy = np.sum(flow_magnitudes ** 2)
    
    # 動き分散
    motion_variance = np.var(flow_magnitudes)
    
    # 動きの滑らかさ(一階微分の逆数)
    if len(flow_magnitudes) > 1:
        motion_diff = np.diff(flow_magnitudes)
        motion_smoothness = 1.0 / (np.mean(np.abs(motion_diff)) + 1e-10)
    else:
        motion_smoothness = np.nan
    
    return {
        'motion_energy': motion_energy,
        'motion_variance': motion_variance,
        'motion_smoothness': motion_smoothness
    }

In [None]:
def calculate_texture_dynamics(lbp_entropy_series):
    """
    テクスチャの動的変化を計算
    
    Parameters:
    -----------
    lbp_entropy_series : np.ndarray
        LBPエントロピーの時系列
    
    Returns:
    --------
    texture_dynamics : dict
        テクスチャ動的特徴量
    """
    if len(lbp_entropy_series) < 2:
        return {
            'texture_change_rate': np.nan,
            'texture_stability': np.nan
        }
    
    # テクスチャ変化率(一階微分の平均)
    texture_diff = np.diff(lbp_entropy_series)
    texture_change_rate = np.mean(np.abs(texture_diff))
    
    # テクスチャ安定性(変化率の逆数)
    texture_stability = 1.0 / (texture_change_rate + 1e-10)
    
    return {
        'texture_change_rate': texture_change_rate,
        'texture_stability': texture_stability
    }

In [None]:
# ===== メイン処理への組み込み =====
all_results_list = []
for i in range(len(movie_paths)):
    inputMoviePath = movie_paths[i]
    rootDir = data_dirs[i]
    dataName = movie_names[i]

    print(f'Processing movie: {inputMoviePath}')

    # 動画のfpsを取得
    cap = cv2.VideoCapture(inputMoviePath)
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    samplingRate = fps
    
    # ROIマスク情報の取得
    save_roi_dir = os.path.join(rootDir, SAVE_DIR)
    roi_vis_path = os.path.join(save_roi_dir, "roi_visualization.jpg")
  
    # CSVファイルの読み込み
    ecg_csv_path = os.path.join(rootDir, dataName + '.csv')
    ecg_df = pd.read_csv(ecg_csv_path)
    
    ecg_RRI_csv_path = os.path.join(rootDir, f'RRI_Simple_{movie_names[i]}.csv')
    ecg_RRI_df = pd.read_csv(ecg_RRI_csv_path)
    
    stride_segment_calculator = StrideSegmentCalculator(window_sizes=WINDOW_SIZES, strides=STRIDES)
    analysis_df = stride_segment_calculator.create_analysis_dataframe(total_frames, fps)

    # RGB信号の読み込み
    os.makedirs(save_roi_dir, exist_ok=True)
    signals_csv_path = os.path.join(save_roi_dir, f'extracted_signals.csv')
    signals_df = pd.read_csv(signals_csv_path)

    # 結果を格納するリスト
    all_window_results = []
    
    for idx, row in analysis_df.iterrows():
        window_size = row['window_size']
        frame_start = row['frame_start']
        frame_end = row['frame_end']
        stride = row['stride']

        # 窓の時間範囲を計算
        window_start_time = frame_start / fps
        window_end_time = frame_end / fps

        if(window_end_time > total_frames / fps):
            continue

        print(f"\nウィンドウ {idx}: 窓サイズ {window_size}s, ストライド {stride}s, フレーム {frame_start}-{frame_end}, 時間 {window_start_time:.2f}-{window_end_time:.2f}s")
            
        # 該当する窓の時間範囲内の真値RRIデータを抽出
        ecg_RRI_mask = (ecg_RRI_df['time'] >= window_start_time) & \
                (ecg_RRI_df['time'] < window_end_time)
        ecg_bpm_in_window = ecg_RRI_df[ecg_RRI_mask]['BPM'].values
        ecg_bpm_in_window_mean = np.mean(ecg_bpm_in_window) if len(ecg_bpm_in_window) > 0 else np.nan

        # 該当する窓の時間範囲内の信号を抽出
        bvp_mask = (signals_df['timestamp'] >= window_start_time) & (signals_df['timestamp'] < window_end_time)
        
        r_signal_in_window = signals_df[bvp_mask]['r_mean'].values
        g_signal_in_window = signals_df[bvp_mask]['g_mean'].values
        b_signal_in_window = signals_df[bvp_mask]['b_mean'].values

        r_std_signal_in_window = signals_df[bvp_mask]['r_std'].values
        g_std_signal_in_window = signals_df[bvp_mask]['g_std'].values
        b_std_signal_in_window = signals_df[bvp_mask]['b_std'].values

        h_signal_in_window = signals_df[bvp_mask]['h_mean'].values
        s_signal_in_window = signals_df[bvp_mask]['s_mean'].values
        v_signal_in_window = signals_df[bvp_mask]['v_mean'].values

        h_std_signal_in_window = signals_df[bvp_mask]['h_std'].values
        s_std_signal_in_window = signals_df[bvp_mask]['s_std'].values
        v_std_signal_in_window = signals_df[bvp_mask]['v_std'].values

        lbp_entropy = signals_df[bvp_mask]['lbp_entropy'].values
        lbp_variance = signals_df[bvp_mask]['lbp_variance'].values
        lbp_skewness = signals_df[bvp_mask]['lbp_skewness'].values
        lbp_kurtosis = signals_df[bvp_mask]['lbp_kurtosis'].values
        lbp_chi2_distance = signals_df[bvp_mask]['lbp_chi2_distance'].values
        lbp_uniform_ratio = signals_df[bvp_mask]['lbp_uniform_ratio'].values

        canny_edge_ratio = signals_df[bvp_mask]['canny_edge_ratio'].values
        glcm_isotropy = signals_df[bvp_mask]['glcm_isotropy'].values

        flow_mean_motion = signals_df[bvp_mask]['flow_mean_motion'].values
        flow_std_motion = signals_df[bvp_mask]['flow_std_motion'].values
        flow_ratio_1px = signals_df[bvp_mask]['flow_ratio_1px'].values
        flow_ratio_5px = signals_df[bvp_mask]['flow_ratio_5px'].values
        flow_ratio_10px = signals_df[bvp_mask]['flow_ratio_10px'].values

        ssim_values = signals_df[bvp_mask]['ssim'].values
        psnr_values = signals_df[bvp_mask]['psnr'].values

        lightness_signal_in_window = signals_df[bvp_mask]['l_mean'].values
        min_saturation_in_window = np.min(s_signal_in_window) if len(s_signal_in_window) > 0 else np.nan

        angles_in_window = signals_df[bvp_mask]['angle'].values
        # ===== 窓内のフレームを読み込んでウェーブレット・FFT解析 =====
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_start)
        gray_frames = []

        for frame_idx in range(int(frame_start), int(frame_end)):
            ret, frame = cap.read()
            if not ret:
                break
            gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            gray_frames.append(gray_frame)
        
        # ウェーブレットHHエネルギー
        wavelet_features = calculate_wavelet_hh_energy(gray_frames)
        
        # FFT高周波エネルギー比
        fft_features = calculate_fft_high_freq_ratio(gray_frames, threshold_ratio=0.5)
        
        # 時系列信号の周波数特徴(G信号で計算)
        temporal_freq_features_g = calculate_temporal_frequency_features(g_signal_in_window, fps)
        
        # 動き特徴量
        motion_features = calculate_motion_energy(flow_mean_motion)
        
        # テクスチャ動的特徴
        texture_dynamics = calculate_texture_dynamics(lbp_entropy)

        # 窓情報を保存
        window_info = {
            'window_index': idx,
            'window_size': window_size,
            'stride': stride,
            'frame_start': frame_start,
            'frame_end': frame_end,
            'window_start_time': window_start_time,
            'window_end_time': window_end_time,
            
            # 既存の信号
            'r_signal_in_window': array_to_full_string(r_signal_in_window),
            'g_signal_in_window': array_to_full_string(g_signal_in_window),
            'b_signal_in_window': array_to_full_string(b_signal_in_window),
            'r_std_signal_in_window': array_to_full_string(r_std_signal_in_window),
            'g_std_signal_in_window': array_to_full_string(g_std_signal_in_window),
            'b_std_signal_in_window': array_to_full_string(b_std_signal_in_window),
            'h_signal_in_window': array_to_full_string(h_signal_in_window),
            's_signal_in_window': array_to_full_string(s_signal_in_window),
            'v_signal_in_window': array_to_full_string(v_signal_in_window),
            'h_std_signal_in_window': array_to_full_string(h_std_signal_in_window),
            's_std_signal_in_window': array_to_full_string(s_std_signal_in_window),
            'v_std_signal_in_window': array_to_full_string(v_std_signal_in_window),
            'lightness_signal_in_window': array_to_full_string(lightness_signal_in_window),
            'ecg_bpm_in_window': array_to_full_string(ecg_bpm_in_window),
            'ecg_bpm_in_window_mean': ecg_bpm_in_window_mean,
            'min_saturation_in_window': min_saturation_in_window,
            
            # LBP特徴量
            'lbp_entropy': array_to_full_string(lbp_entropy),
            'lbp_variance': array_to_full_string(lbp_variance),
            'lbp_skewness': array_to_full_string(lbp_skewness),
            'lbp_kurtosis': array_to_full_string(lbp_kurtosis),
            'lbp_chi2_distance': array_to_full_string(lbp_chi2_distance),
            'lbp_uniform_ratio': array_to_full_string(lbp_uniform_ratio),
            
            # Canny & GLCM
            'canny_edge_ratio': array_to_full_string(canny_edge_ratio),
            'glcm_isotropy': array_to_full_string(glcm_isotropy),
            
            # オプティカルフロー
            'flow_mean_motion': array_to_full_string(flow_mean_motion),
            'flow_std_motion': array_to_full_string(flow_std_motion),
            'flow_ratio_1px': array_to_full_string(flow_ratio_1px),
            'flow_ratio_5px': array_to_full_string(flow_ratio_5px),
            'flow_ratio_10px': array_to_full_string(flow_ratio_10px),
            
            # ===== 新規追加: ウェーブレット特徴量 =====
            'wavelet_hh_energies': array_to_full_string(wavelet_features['hh_energies']),
            'wavelet_mean_hh_energy': wavelet_features['mean_hh_energy'],
            'wavelet_std_hh_energy': wavelet_features['std_hh_energy'],
            'wavelet_max_hh_energy': wavelet_features['max_hh_energy'],
            'wavelet_min_hh_energy': wavelet_features['min_hh_energy'],
            
            # ===== 新規追加: FFT高周波エネルギー比 =====
            'fft_high_freq_ratios': array_to_full_string(fft_features['high_freq_ratios']),
            'fft_mean_high_freq_ratio': fft_features['mean_high_freq_ratio'],
            'fft_std_high_freq_ratio': fft_features['std_high_freq_ratio'],
            'fft_max_high_freq_ratio': fft_features['max_high_freq_ratio'],
            'fft_min_high_freq_ratio': fft_features['min_high_freq_ratio'],
            
            # ===== 新規追加: 時系列周波数特徴 =====
            'temporal_dominant_freq_g': temporal_freq_features_g['dominant_freq'],
            'temporal_spectral_entropy_g': temporal_freq_features_g['spectral_entropy'],
            'temporal_high_freq_power_ratio_g': temporal_freq_features_g['high_freq_power_ratio'],
            
            # ===== 新規追加: 動き特徴量 =====
            'motion_energy': motion_features['motion_energy'],
            'motion_variance': motion_features['motion_variance'],
            'motion_smoothness': motion_features['motion_smoothness'],
            
            # ===== 新規追加: テクスチャ動的特徴 =====
            'texture_change_rate': texture_dynamics['texture_change_rate'],
            'texture_stability': texture_dynamics['texture_stability'],

            'angles_in_window': array_to_full_string(angles_in_window),
        }
        all_window_results.append(window_info)

    cap.release()
    
    # 全結果をDataFrameに変換して保存
    results_df = pd.DataFrame(all_window_results)
    results_csv_path = os.path.join(save_roi_dir, f'window_signals_{dataName}.csv')
    results_df.to_csv(results_csv_path, index=False, encoding='utf-8-sig')
    print(f"\n結果をCSVに保存: {results_csv_path}")
    
    all_results_list.append(results_df)

print("\n" + "="*60)
print("全ての動画の解析が完了しました!")
print(f"処理した動画数: {len(movie_paths)}")
print("="*60)

成果物:window_signals

## 窓ごとにbpmMAE・尖度・歪度を算出

In [None]:
def analyze_window_fft(values, fps):
    """
    時系列データの最大周波数とスペクトル情報を計算する関数

    Parameters:
    -----------
    values : array-like
        分析対象の時系列データ
    fps : int
        サンプリング周波数（1秒あたりのフレーム数）

    Returns:
    --------
    dict
        以下のキーを含む辞書：
        - 'max_freq': 検出された最大周波数
        - 'max_amplitude': 最大周波数のときの振幅
        - 'frequencies': 周波数配列（正の周波数のみ）
        - 'amplitudes': 振幅配列（正の周波数のみ）
        - 'power_spectrum': パワースペクトル
        - 'dominant_freqs': 上位5つの卓越周波数とその振幅
        - 'spectral_centroid': スペクトル重心
        - 'spectral_bandwidth': スペクトル帯域幅
        - 'total_power': 全体のパワー
    """
    # データをnumpy配列に変換
    values = np.array(values)

    # ゼロパディングで分解能を向上（窓長の8倍）
    n_pad = len(values) * 8

    # ハミング窓を適用（オプション：コメントアウトされている）
    # window = np.hamming(len(values))
    # windowed_data = values * window

    # FFTを実行（ゼロパディング適用）
    fft_result = np.fft.fft(values, n=n_pad)
    fft_freq = np.fft.fftfreq(n_pad, 1 / fps)

    # 正の周波数のみを取得
    positive_freq_idx = fft_freq > 0
    positive_fft = np.abs(fft_result[positive_freq_idx])
    positive_freq = fft_freq[positive_freq_idx]
    
    # パワースペクトルを計算
    power_spectrum = positive_fft ** 2

    # 最大周波数の検出と補間
    max_idx = np.argmax(positive_fft)
    max_amplitude = positive_fft[max_idx]
    
    if 0 < max_idx < len(positive_fft) - 1:
        # 3点を使用した放物線補間
        alpha = positive_fft[max_idx - 1]
        beta = positive_fft[max_idx]
        gamma = positive_fft[max_idx + 1]
        peak_pos = 0.5 * (alpha - gamma) / (alpha - 2 * beta + gamma)

        # 補間された周波数と振幅
        freq_resolution = fps / n_pad
        max_freq = positive_freq[max_idx] + peak_pos * freq_resolution
        
        # 補間された振幅（放物線の頂点）
        max_amplitude = beta - 0.25 * (alpha - gamma) * peak_pos
    else:
        max_freq = positive_freq[max_idx]
    
    # 上位5つの卓越周波数を検出
    top_indices = np.argsort(positive_fft)[-5:][::-1]
    dominant_freqs = [(positive_freq[idx], positive_fft[idx]) for idx in top_indices]
    
    # スペクトル特徴量の計算
    # スペクトル重心（周波数の重み付き平均）
    spectral_centroid = np.sum(positive_freq * positive_fft) / np.sum(positive_fft)
    
    # スペクトル帯域幅（重心からの重み付き分散）
    spectral_bandwidth = np.sqrt(
        np.sum(((positive_freq - spectral_centroid) ** 2) * positive_fft) / np.sum(positive_fft)
    )
    
    # 全体のパワー
    total_power = np.sum(power_spectrum)
    
    # 結果を辞書にまとめる
    result = {
        'max_freq': max_freq,
        'max_amplitude': max_amplitude,
        'frequencies': positive_freq,
        'amplitudes': positive_fft,
        'power_spectrum': power_spectrum,
        'dominant_freqs': dominant_freqs,
        'spectral_centroid': spectral_centroid,
        'spectral_bandwidth': spectral_bandwidth,
        'total_power': total_power,
        'freq_resolution': fps / n_pad,  # 周波数分解能
        'nyquist_freq': fps / 2  # ナイキスト周波数
    }
    
    return result

In [None]:
def extract_BVPsignal(r_signals, g_signals, b_signals, s_signal, fps, deviceType, bvpMethod, bvpMethodName, method_params=None):
    """
    RGB信号からBVP信号を抽出（拡張POS対応版）
    
    Parameters:
    -----------
    method_params : dict, optional
        拡張POSのパラメータ
        - use_extended: bool, 拡張POSを使用するか
        - u_v: array, 蒸気ベクトル
        - lambda1: float, 鏡面反射抑制重み
        - lambda2: float, 蒸気抑制重み
        - check_stationarity: bool, 定常性チェックするか
        - stationarity_threshold: float, 定常性判定閾値
    """
    rgb_signal = np.array([[r_signals, g_signals, b_signals]], dtype=np.float32)
    print(f"\nRGB信号の形状: {rgb_signal.shape}")
    
    signal_length = rgb_signal.shape[2]
    min_required_length = 50
    
    if signal_length < min_required_length:
        print(f"警告: 信号長が短すぎます ({signal_length} < {min_required_length})。処理をスキップします。")
        return None, None
    
    filtered_signal = [rgb_signal]
    
    # デフォルトのメソッドパラメータ
    if method_params is None:
        method_params = {}
    
    # メソッド別パラメータ設定
    if bvpMethodName in ["cupy_POS", "cpu_POS"]:
        method_params['fps'] = fps
    elif bvpMethodName == "cupy_POS_extended":
        # 拡張POSのデフォルトパラメータ
        if 'fps' not in method_params:
            method_params['fps'] = fps
        if 'use_extended' not in method_params:
            method_params['use_extended'] = True
        method_params['saturation_signal'] = s_signal
    elif bvpMethodName in ["cpu_ICA", "cpu_PCA"]:
        method_params['component'] = 'all_comp'
    
    print(f"\nBVP抽出開始 (メソッド: {bvpMethodName})")
    print(f"パラメータ: {method_params}")
    
    # BVP信号抽出
    if method_params:
        bvp_signal = vhr.BVP.RGB_sig_to_BVP(
            filtered_signal,
            fps,
            device_type=deviceType,
            method=bvpMethod,
            params=method_params
        )
    else:
        bvp_signal = vhr.BVP.RGB_sig_to_BVP(
            filtered_signal,
            fps,
            device_type=deviceType,
            method=bvpMethod
        )
    
    # 生のBVP信号を保存
    raw_bvp_signal = bvp_signal[0].copy() if len(bvp_signal) > 0 else None
    
    # 後処理フィルタリング
    bvp_signal = vhr.BVP.apply_filter(
        bvp_signal,
        vhr.BVP.BPfilter,
        params={'order': 6, 'minHz': 0.5, 'maxHz': 2.0, 'fps': fps}
    )
    
    bvp_signal = vhr.BVP.apply_filter(bvp_signal, vhr.BVP.zeromean)
    
    filtered_bvp_signal = bvp_signal[0] if len(bvp_signal) > 0 else None
    
    print(f"\nBVP信号抽出完了")
    return raw_bvp_signal, filtered_bvp_signal

In [None]:
# メソッドの組み合わせを定義
methodCombinations = [
    ['cuda', cupy_POS, "cupy_POS"]
]

for i in range(len(movie_paths)):
    print(f'Processing movie: {movie_paths[i]}')
    inputMoviePath = movie_paths[i]
    rootDir = data_dirs[i]
    dataName = movie_names[i]

    cap = cv2.VideoCapture(inputMoviePath)
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    # CSVファイルの読み込み
    ecg_csv_path = os.path.join(rootDir, dataName + '.csv')
    ecg_df = pd.read_csv(ecg_csv_path)

    ecg_RRI_csv_path = os.path.join(rootDir, f'RRI_Simple_{dataName}.csv')
    ecg_RRI_df = pd.read_csv(ecg_RRI_csv_path)

    ecg_bpm_in_window = ecg_RRI_df['BPM']
    ecg_bpm_in_window_mean = ecg_RRI_df['BPM'].values.mean()

    # window_signalsの読み込み
    os.makedirs(os.path.join(rootDir, SAVE_DIR), exist_ok=True)
    window_signals_data_path = os.path.join(rootDir, SAVE_DIR, f'{"window_signals_" + dataName}.csv')
    window_signals_data_df = pd.read_csv(window_signals_data_path)
    print(f'Loaded window signals data from {window_signals_data_path}, {len(window_signals_data_df)} rows')

    stride_segment_calculator = StrideSegmentCalculator(window_sizes=WINDOW_SIZES, strides=STRIDES)
    analysis_df = stride_segment_calculator.create_analysis_dataframe(total_frames, fps)

    # 結果を格納するリスト
    all_window_results = []
    
    for idx, row in analysis_df.iterrows():
        window_size = row['window_size']
        frame_start = row['frame_start']
        frame_end = row['frame_end']
        stride = row['stride']

        window_start_time = frame_start / fps
        window_end_time = frame_end / fps

        if window_end_time > total_frames / fps:
            continue

        print(f"\nウィンドウ {idx}: 窓サイズ {window_size}s, ストライド {stride}s, フレーム {frame_start}-{frame_end}, 時間 {window_start_time:.2f}-{window_end_time:.2f}s")

        # 真値RRIデータを抽出
        ecg_RRI_mask = (ecg_RRI_df['time'] >= window_start_time) & \
                       (ecg_RRI_df['time'] < window_end_time)
        ecg_bpm_in_window = ecg_RRI_df[ecg_RRI_mask]['BPM'].values
        ecg_bpm_in_window_mean = np.mean(ecg_bpm_in_window) if len(ecg_bpm_in_window) > 0 else np.nan
        print(f'    True Value RRI count in window: {len(ecg_bpm_in_window)}, Mean BPM: {ecg_bpm_in_window_mean:.2f}')

        # RGB信号を抽出
        bvp_mask = window_signals_data_df['window_index'] == idx
        print(f'    BVP mask has {bvp_mask.sum()} matching rows')

        filtered_rows = window_signals_data_df[bvp_mask]

        r_signal_in_window = filtered_rows['r_signal_in_window'].values[0]
        g_signal_in_window = filtered_rows['g_signal_in_window'].values[0]
        b_signal_in_window = filtered_rows['b_signal_in_window'].values[0]
        saturation_signal_in_window = filtered_rows['s_signal_in_window'].values[0]
        lightness_signal_in_window = filtered_rows['lightness_signal_in_window'].values[0]

        # 文字列をNumPy配列に変換
        r_signal_in_window = np.fromstring(r_signal_in_window[1:-1], sep=' ')
        print(f'    R signal in window length: {len(r_signal_in_window)}')
        g_signal_in_window = np.fromstring(g_signal_in_window[1:-1], sep=' ')
        b_signal_in_window = np.fromstring(b_signal_in_window[1:-1], sep=' ')
        saturation_signal_in_window = np.fromstring(saturation_signal_in_window[1:-1], sep=' ')
        lightness_signal_in_window = np.fromstring(lightness_signal_in_window[1:-1], sep=' ')
        
        if len(r_signal_in_window) == 0 or len(g_signal_in_window) == 0 or len(b_signal_in_window) == 0:
            raise ValueError("RGB信号が窓内に存在しません")

        # BVPメソッドの設定
        for methodCombination in methodCombinations:
            deviceType = methodCombination[0] 
            bvpMethod = methodCombination[1] 
            bvpMethodName = methodCombination[2]
            method_params = methodCombination[3].copy() if len(methodCombination) > 3 else {}


            print(f"\n{'='*60}")
            print(f"実行メソッド: {bvpMethodName}")
            print(f"{'='*60}")

            # rgbからBVPを計算
            raw_bvp_signal_in_window, filtered_bvp_signal_in_window = extract_BVPsignal(
                r_signal_in_window,
                g_signal_in_window,
                b_signal_in_window,
                saturation_signal_in_window,
                fps,
                deviceType,
                bvpMethod,
                bvpMethodName,
                method_params=method_params
            )
            
            # BVP信号の抽出に失敗した場合はスキップ
            if filtered_bvp_signal_in_window is None:
                print(f"ウィンドウ {idx} をスキップ: BVP信号の抽出に失敗")
                continue
            
            # FFT解析
            raw_bvp_signal_in_window = raw_bvp_signal_in_window.flatten() if raw_bvp_signal_in_window is not None else None
            filtered_bvp_signal_in_window = filtered_bvp_signal_in_window.flatten()
            fft_result_dic = analyze_window_fft(filtered_bvp_signal_in_window, fps)

            # MAEの計算
            rppg_bpm = fft_result_dic['max_freq'] * 60
            rppg_freq = fft_result_dic['frequencies']
            rppg_amplitude = fft_result_dic['amplitudes']
            rppg_pwd = fft_result_dic['power_spectrum']

            bpm_MAE = np.abs(ecg_bpm_in_window_mean - rppg_bpm) if not np.isnan(ecg_bpm_in_window_mean) else np.nan

            print(f"\n結果サマリー:")
            print(f"  ECG BPM: {ecg_bpm_in_window_mean:.2f}")
            print(f"  rPPG BPM: {rppg_bpm:.2f}")
            print(f"  MAE: {bpm_MAE:.2f}")

            # raw_bvp_singalとfiltered_bvp_signalの尖度と歪度を計算
            if raw_bvp_signal_in_window is not None:
                raw_bvp_kurtosis = scipy.stats.kurtosis(raw_bvp_signal_in_window)
                raw_bvp_skewness = scipy.stats.skew(raw_bvp_signal_in_window)
                filtered_bvp_kurtosis = scipy.stats.kurtosis(filtered_bvp_signal_in_window)
                filtered_bvp_skewness = scipy.stats.skew(filtered_bvp_signal_in_window)

            # 窓情報を保存
            window_info = {
                'window_index': idx,
                'bvp_method': bvpMethodName,
                'window_size': window_size,
                'stride': stride,
                'frame_start': frame_start,
                'frame_end': frame_end,
                'window_start_time': window_start_time,
                'window_end_time': window_end_time,
                'r_signal_in_window': array_to_full_string(r_signal_in_window),
                'g_signal_in_window': array_to_full_string(g_signal_in_window),
                'b_signal_in_window': array_to_full_string(b_signal_in_window),
                'saturation_signal_in_window': array_to_full_string(saturation_signal_in_window),
                'lightness_signal_in_window': array_to_full_string(lightness_signal_in_window),
                'raw_bvp_in_window': raw_bvp_signal_in_window,
                'filtered_bvp_in_window': filtered_bvp_signal_in_window,
                'ecg_bpm_in_window': ecg_bpm_in_window,
                'ecg_bpm_mean': ecg_bpm_in_window_mean,
                'rppg_bpm': rppg_bpm,
                'bpm_MAE': bpm_MAE,
                'max_freq': fft_result_dic['max_freq'],
                'max_amplitude': fft_result_dic['max_amplitude'],
                'spectral_centroid': fft_result_dic['spectral_centroid'],
                'spectral_bandwidth': fft_result_dic['spectral_bandwidth'],
                'total_power': fft_result_dic['total_power'],
                'raw_bvp_kurtosis': raw_bvp_kurtosis,
                'raw_bvp_skewness': raw_bvp_skewness,
                'filtered_bvp_kurtosis': filtered_bvp_kurtosis,
                'filtered_bvp_skewness': filtered_bvp_skewness
            }
            all_window_results.append(window_info)
    
    # 全結果をDataFrameに変換して保存
    results_df = pd.DataFrame(all_window_results)
    results_save_dir = os.path.join(rootDir, SAVE_DIR)
    os.makedirs(results_save_dir, exist_ok=True)
    results_csv_path = os.path.join(results_save_dir, f'window_analysis_{dataName}.csv')
    results_df.to_csv(results_csv_path, index=False, encoding='utf-8-sig')
    print(f"\nFFT結果をCSVに保存: {results_csv_path}")  

### 現段階の成果物
- extracted_signals
- window_signals
- window_analysis

### 相関解析

In [None]:
def calculate_stats(value_str):
    """文字列から配列を抽出し、統計量を計算"""
    if pd.isnull(value_str):
        return pd.Series([np.nan, np.nan, np.nan, np.nan, np.nan])
    
    try:
        values = np.fromstring(value_str[1:-1], sep=' ')
        mean_val = values.mean()
        std_val = values.std()
        max_val = values.max()
        min_val = values.min()
        iqr_val = np.percentile(values, 75) - np.percentile(values, 25)
        return pd.Series([mean_val, std_val, max_val, min_val, iqr_val])
    except:
        return pd.Series([np.nan, np.nan, np.nan, np.nan, np.nan])

In [None]:
def calculate_feature_mae_correlations(df, feature_names, save_path=None, title_prefix="", top_n=20):
    """
    各特徴量とMAEの相関を計算し、可視化
    
    Parameters:
    -----------
    df : pd.DataFrame
        特徴量とbpm_MAEを含むDataFrame
    feature_names : list
        特徴量の列名リスト
    save_path : str or None
        保存先パス
    title_prefix : str
        タイトルの接頭辞
    top_n : int
        表示する上位特徴量の数
    
    Returns:
    --------
    corr_df : pd.DataFrame
        相関係数のDataFrame
    """
    if 'bpm_MAE' not in df.columns:
        print("警告: bpm_MAE列が見つかりません。")
        return None
    
    # 相関を計算
    correlations = []
    for feature in feature_names:
        if feature in df.columns:
            # 欠損値を除外
            valid_mask = df[feature].notna() & df['bpm_MAE'].notna()
            if valid_mask.sum() > 10:  # 最低10サンプル必要
                try:
                    corr = df.loc[valid_mask, feature].corr(df.loc[valid_mask, 'bpm_MAE'])
                    # NaNでない場合のみ追加
                    if not np.isnan(corr):
                        correlations.append({
                            'feature': feature,
                            'correlation': corr,
                            'abs_correlation': abs(corr),
                            'n_samples': valid_mask.sum()
                        })
                        print(f"{feature} と MAE の相関: {corr:.4f} (サンプル数: {valid_mask.sum()})")
                except Exception as e:
                    print(f"警告: {feature} の相関計算でエラー: {e}")
                    continue
    
    # correlationsが空の場合の処理
    if len(correlations) == 0:
        print(f"警告: {title_prefix}有効な相関が計算できませんでした。")
        return None
    
    # DataFrameに変換してソート
    corr_df = pd.DataFrame(correlations).sort_values('abs_correlation', ascending=False)
    
    # 上位を表示
    print(f"\n=== {title_prefix}MAEと相関が強い特徴量 Top {min(top_n, len(corr_df))} ===")
    print(corr_df.head(top_n).to_string(index=False))
    
    # 可視化
    top_n_actual = min(top_n, len(corr_df))
    plt.figure(figsize=(12, max(8, top_n_actual * 0.4)))
    top_features = corr_df.head(top_n_actual)
    colors = ['red' if x < 0 else 'blue' for x in top_features['correlation'].values]
    plt.barh(range(len(top_features)), top_features['correlation'].values, color=colors, alpha=0.7)
    plt.yticks(range(len(top_features)), top_features['feature'].values, fontsize=10)
    plt.xlabel('Correlation with MAE', fontsize=12)
    plt.ylabel('Feature', fontsize=12)
    plt.title(f'{title_prefix}Top {top_n_actual} Features Correlated with MAE', fontsize=14)
    plt.axvline(x=0, color='k', linestyle='-', linewidth=0.5)
    plt.grid(True, alpha=0.3, axis='x')
    plt.tight_layout()
    
    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        print(f"相関グラフを保存: {save_path}")
    
    plt.show()
    
    return corr_df


def plot_window_param_mae_time_series(df, feature_names=None, top_n=5, corr_df=None, 
                                       save_path=None, title_prefix=""):
    """
    上位相関特徴量とMAEの時系列プロット
    
    Parameters:
    -----------
    df : pd.DataFrame
        特徴量とbpm_MAEを含むDataFrame
    feature_names : list or None
        特徴量のリスト（Noneの場合はcorr_dfから取得）
    top_n : int
        プロットする特徴量の数
    corr_df : pd.DataFrame or None
        相関係数のDataFrame
    save_path : str or None
        保存先パス
    title_prefix : str
        タイトルの接頭辞
    """
    if 'bpm_MAE' not in df.columns:
        print("警告: bpm_MAE列が見つかりません。")
        return
    
    # 上位特徴量を取得
    if corr_df is not None and len(corr_df) > 0:
        top_n_actual = min(top_n, len(corr_df))
        top_features = corr_df.head(top_n_actual)['feature'].values
    elif feature_names is not None:
        top_n_actual = min(top_n, len(feature_names))
        top_features = feature_names[:top_n_actual]
    else:
        print("警告: プロットする特徴量が指定されていません。")
        return
    
    # window_start_timeがない場合はスキップ
    if 'window_start_time' not in df.columns:
        print("警告: window_start_time列が見つかりません。時系列プロットをスキップします。")
        return
    
    # プロット
    fig, axes = plt.subplots(top_n_actual + 1, 1, figsize=(14, 3*(top_n_actual + 1)))
    
    if top_n_actual == 0:
        axes = [axes]
    elif not isinstance(axes, np.ndarray):
        axes = [axes]
    
    # MAEをプロット
    if 'data_name' in df.columns and df['data_name'].nunique() > 1:
        for data_name in df['data_name'].unique():
            data_subset = df[df['data_name'] == data_name].sort_values('window_start_time')
            axes[0].plot(data_subset['window_start_time'], data_subset['bpm_MAE'], 
                        label=data_name, alpha=0.7, marker='o', markersize=3)
        axes[0].legend(loc='upper right', fontsize=8)
    else:
        df_sorted = df.sort_values('window_start_time')
        axes[0].plot(df_sorted['window_start_time'], df_sorted['bpm_MAE'], 
                    alpha=0.7, marker='o', markersize=3)
    
    axes[0].set_ylabel('BPM MAE', fontsize=12)
    axes[0].set_xlabel('Window Start Time (s)', fontsize=12)
    axes[0].grid(True, alpha=0.3)
    axes[0].set_title('MAE', fontsize=11)
    
    # 各特徴量をプロット
    for i, feature in enumerate(top_features, start=1):
        if feature not in df.columns:
            continue
        
        if 'data_name' in df.columns and df['data_name'].nunique() > 1:
            for data_name in df['data_name'].unique():
                data_subset = df[df['data_name'] == data_name].sort_values('window_start_time')
                axes[i].plot(data_subset['window_start_time'], data_subset[feature], 
                            label=data_name, alpha=0.7, marker='o', markersize=3)
        else:
            df_sorted = df.sort_values('window_start_time')
            axes[i].plot(df_sorted['window_start_time'], df_sorted[feature], 
                        alpha=0.7, marker='o', markersize=3)
        
        axes[i].set_ylabel(feature, fontsize=10)
        axes[i].set_xlabel('Window Start Time (s)', fontsize=12)
        axes[i].grid(True, alpha=0.3)
        
        # 相関係数を表示
        if corr_df is not None:
            corr_row = corr_df[corr_df['feature'] == feature]
            if not corr_row.empty:
                corr_val = corr_row['correlation'].values[0]
                axes[i].set_title(f'{feature} (r={corr_val:.3f})', fontsize=11)
    
    plt.suptitle(f'{title_prefix}Top Features and MAE Time Series', fontsize=14, y=0.995)
    plt.tight_layout()
    
    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        print(f"時系列グラフを保存: {save_path}")
    
    plt.show()


def plot_param_vs_mae(df, feature_names=None, top_n=9, corr_df=None, 
                      save_path=None, title_prefix=""):
    """
    上位相関特徴量とMAEの散布図（3x3グリッド）
    
    Parameters:
    -----------
    df : pd.DataFrame
        特徴量とbpm_MAEを含むDataFrame
    feature_names : list or None
        特徴量のリスト
    top_n : int
        プロットする特徴量の数
    corr_df : pd.DataFrame or None
        相関係数のDataFrame
    save_path : str or None
        保存先パス
    title_prefix : str
        タイトルの接頭辞
    """
    if 'bpm_MAE' not in df.columns:
        print("警告: bpm_MAE列が見つかりません。")
        return
    
    # 上位特徴量を取得
    if corr_df is not None and len(corr_df) > 0:
        top_n_actual = min(top_n, len(corr_df))
        top_features = corr_df.head(top_n_actual)['feature'].values
    elif feature_names is not None:
        top_n_actual = min(top_n, len(feature_names))
        top_features = feature_names[:top_n_actual]
    else:
        print("警告: プロットする特徴量が指定されていません。")
        return
    
    # グリッドサイズを計算
    n_cols = 3
    n_rows = int(np.ceil(len(top_features) / n_cols))
    
    fig, axes = plt.subplots(n_rows, n_cols, figsize=(15, 5*n_rows))
    
    # axesを1次元配列に変換
    if n_rows == 1 and n_cols == 1:
        axes = np.array([axes])
    elif n_rows == 1 or n_cols == 1:
        axes = axes.flatten()
    else:
        axes = axes.flatten()
    
    for idx, feature in enumerate(top_features):
        if feature not in df.columns:
            continue
        
        ax = axes[idx]
        
        # 窓サイズごとに色分け
        if 'window_size' in df.columns and df['window_size'].nunique() > 1:
            for window_size in sorted(df['window_size'].unique()):
                data_subset = df[df['window_size'] == window_size]
                ax.scatter(data_subset[feature], data_subset['bpm_MAE'], 
                          label=f'{window_size}s', alpha=0.6, s=30)
            ax.legend(title='Window', fontsize=8)
        else:
            ax.scatter(df[feature], df['bpm_MAE'], alpha=0.6, s=30)
        
        ax.set_xlabel(feature, fontsize=10)
        ax.set_ylabel('BPM MAE', fontsize=10)
        ax.grid(True, alpha=0.3)
        
        # 相関係数を表示
        if corr_df is not None:
            corr_row = corr_df[corr_df['feature'] == feature]
            if not corr_row.empty:
                corr_val = corr_row['correlation'].values[0]
                ax.set_title(f'r={corr_val:.3f}', fontsize=11)
    
    # 余分なサブプロットを非表示
    for idx in range(len(top_features), len(axes)):
        axes[idx].axis('off')
    
    plt.suptitle(f'{title_prefix}Top Features vs MAE', fontsize=14)
    plt.tight_layout()
    
    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        print(f"散布図を保存: {save_path}")
    
    plt.show()

In [None]:
def load_window_data(dataFrameArrays, data_names, exclude_cols=None, array_cols=None):
    """
    窓の特徴量CSVファイルを読み込み
    
    Parameters:
    -----------
    dataFrameArrays : list of sdataframes
        窓特徴量のDataFrameリスト
    data_names : list of str
        各DataFrameに対応するデータ名リスト
    
    Returns:
    --------
    all_data : pd.DataFrame
        統合されたデータ
    feature_names : list
        特徴量の列名リスト
    """
    all_data = []

    for df, data_name in zip(dataFrameArrays, data_names):
        df['data_name'] = data_name
        all_data.append(df)
    
    # データを統合
    combined_df = pd.concat(all_data, ignore_index=True)
    
    exclude_cols.extend(array_cols)
    
    feature_names = [col for col in combined_df.columns if col not in exclude_cols]
    
    return combined_df, feature_names

動画別 × 窓長ごとに相関解析 + 全体×窓長ごとに相関解析してTOP10を算出

In [None]:
for i in range(len(movie_paths)):
    inputMoviePath = movie_paths[i]
    rootDir = data_dirs[i]
    dataName = movie_names[i]
    
    print(f"\n{'='*80}")
    print(f"動画: {dataName}")
    print(f"{'='*80}")
    
    # window_analysis (MAEを含む)のCSVを読み込み
    window_analysis_csv_path = os.path.join(rootDir, SAVE_DIR, f'window_analysis_{dataName}.csv')
    
    if not os.path.exists(window_analysis_csv_path):
        print(f"警告: {window_analysis_csv_path} が見つかりません。スキップします。")
        continue
    
    # window_analysis_dataframeを読み込み（MAEを含む）
    print("窓解析データを読み込み中...")
    window_analysis_dataframe = pd.read_csv(window_analysis_csv_path)
    
    # window_signals (特徴量)のCSVを読み込み
    window_signals_csv_path = os.path.join(rootDir, SAVE_DIR, f'window_signals_{dataName}.csv')
    
    if not os.path.exists(window_signals_csv_path):
        print(f"警告: {window_signals_csv_path} が見つかりません。スキップします。")
        continue

    # =====================特徴量データを読み込み======================
    window_signals_dataframe = pd.read_csv(window_signals_csv_path)
    
    # 時系列データの統計量を計算して特徴量に追加
    array_cols = [
        'r_signal_in_window', 'g_signal_in_window', 'b_signal_in_window',
        'r_std_signal_in_window', 'g_std_signal_in_window', 'b_std_signal_in_window',
        'h_signal_in_window', 's_signal_in_window', 'v_signal_in_window',
        'h_std_signal_in_window', 's_std_signal_in_window', 'v_std_signal_in_window',
        'lightness_signal_in_window', 
        'lbp_entropy', 'lbp_variance', 'lbp_skewness', 'lbp_kurtosis',
        'lbp_chi2_distance', 'lbp_uniform_ratio',
        'canny_edge_ratio', 'glcm_isotropy',
        'flow_mean_motion', 'flow_std_motion', 'flow_ratio_1px', 'flow_ratio_5px', 'flow_ratio_10px',
        'wavelet_hh_energies', 'fft_high_freq_ratios',
        'angles_in_window'
    ]
    
    # それぞれに対して集約統計量を計算
    for col in array_cols:
        if col in window_signals_dataframe.columns:
            print(f"処理中: {col}")
            stats_df = window_signals_dataframe[col].apply(calculate_stats)
            stats_df.columns = [f'{col}_mean', f'{col}_std', f'{col}_max', f'{col}_min', f'{col}_iqr']
            window_signals_dataframe = pd.concat([window_signals_dataframe, stats_df], axis=1)
    
    # window_indexでマージして、MAEを追加
    window_signals_dataframe = pd.merge(
        window_signals_dataframe, 
        window_analysis_dataframe[['window_index', 'bpm_MAE', 'ecg_bpm_mean', 'rppg_bpm']],
        on='window_index',
        how='left'
    )
    print(window_analysis_dataframe.head())


    # =====================窓特徴量データを読み込み==========================
    df_all, feature_names = load_window_data(
        dataFrameArrays=[window_signals_dataframe],
        data_names=[dataName],
        exclude_cols=[
            'window_index', 'window_size', 'stride', 'frame_start', 'frame_end',
            'window_start_time', 'window_end_time', 'data_name', 'ecg_bpm_in_window', 'ecg_bpm_in_window_mean', 'bpm_MAE', 'ecg_bpm_mean', 'rppg_bpm'
        ],
        array_cols=array_cols
    )

    print(f"特徴量数: {len(feature_names)}")
    
    # =====================窓長ごとに指標とMAEの相関を計算=====================
    for window_size in WINDOW_SIZES:
        print(f"\n{'─'*60}")
        print(f"窓長: {window_size}秒")
        print(f"{'─'*60}")
        
        # 該当する窓長のデータのみを抽出
        df_window = df_all[df_all['window_size'] == window_size].copy()
        
        if len(df_window) == 0:
            print(f"警告: 窓長{window_size}秒のデータがありません。スキップします。")
            continue
        
        print(f"データ数: {len(df_window)} 窓")
        
        # 保存ディレクトリ
        save_dir = os.path.join(rootDir, SAVE_DIR, 'corr_window_analysis', 
                               f'per_video_per_window', f'{dataName}_window_{window_size}s')
        os.makedirs(save_dir, exist_ok=True)
        
        # 相関解析
        corr_df = calculate_feature_mae_correlations(
            df_window, 
            feature_names,
            save_path=os.path.join(save_dir, "feature_mae_correlations.png"),
            title_prefix=f"{dataName} - {window_size}s: ",
            top_n=20
        )

            # CSVに保存
        corr_df.to_csv(os.path.join(save_dir, "feature_mae_correlations.csv"), index=False, encoding='utf-8-sig')
        print(f"相関係数CSVを保存: {os.path.join(save_dir, 'feature_mae_correlations.csv')}")

        # MAEと指標の時系列プロット
        if corr_df is not None:
            plot_window_param_mae_time_series(
                df_window,
                top_n=5,
                corr_df=corr_df,
                save_path=os.path.join(save_dir, "param_mae_time_series.png"),
                title_prefix=f"{dataName} - {window_size}s: "
            )
            
            # ===== 指標-MAE散布図 =====
            print("\n[MAE分析]")
            plot_param_vs_mae(
                df_window,
                top_n=9,
                corr_df=corr_df,
                save_path=os.path.join(save_dir, "param_vs_mae_scatter.png"),
                title_prefix=f"{dataName} - {window_size}s: "
            )

パターン2: 全動画を統合 × 窓長ごとに相関解析

ここ実行前にpath関連を全動画に変更

In [None]:
print("="*80)
print("パターン3: 全動画を統合 × 窓長ごとに相関解析")
print("="*80)

all_dataframes = []
all_data_names = []

for i in range(len(movie_paths)):
    rootDir = data_dirs[i]
    dataName = movie_names[i]
    
    window_analysis_csv_path = os.path.join(rootDir, SAVE_DIR, f'window_analysis_{dataName}.csv')
    window_signals_csv_path = os.path.join(rootDir, SAVE_DIR, f'window_signals_{dataName}.csv')
    
    if not os.path.exists(window_analysis_csv_path) or not os.path.exists(window_signals_csv_path):
        print(f"警告: {dataName} のファイルが見つかりません。スキップします。")
        continue
    
    print(f"読み込み中: {dataName}")
    
    window_analysis_dataframe = pd.read_csv(window_analysis_csv_path)
    window_signals_dataframe = pd.read_csv(window_signals_csv_path)
    
    array_cols = [
        'r_signal_in_window', 'g_signal_in_window', 'b_signal_in_window',
        'r_std_signal_in_window', 'g_std_signal_in_window', 'b_std_signal_in_window',
        'h_signal_in_window', 's_signal_in_window', 'v_signal_in_window',
        'h_std_signal_in_window', 's_std_signal_in_window', 'v_std_signal_in_window',
        'lightness_signal_in_window', 
        'lbp_entropy', 'lbp_variance', 'lbp_skewness', 'lbp_kurtosis',
        'lbp_chi2_distance', 'lbp_uniform_ratio',
        'canny_edge_ratio', 'glcm_isotropy',
        'flow_mean_motion', 'flow_std_motion', 'flow_ratio_1px', 'flow_ratio_5px', 'flow_ratio_10px',
        'wavelet_hh_energies', 'fft_high_freq_ratios',
        'angles_in_window'
    ]
    
    for col in array_cols:
        if col in window_signals_dataframe.columns:
            print(f"  処理中: {col}")
            stats_df = window_signals_dataframe[col].apply(calculate_stats)
            stats_df.columns = [f'{col}_mean', f'{col}_std', f'{col}_max', f'{col}_min', f'{col}_iqr']
            window_signals_dataframe = pd.concat([window_signals_dataframe, stats_df], axis=1)
    
    # valid_g_signalとvalid_canny_signalのノイズ特徴量を計算
    print(f"  === {dataName}: ノイズ特徴量の計算 ===")
    
    long_noise_threshold_array = [15, 30]
    
    # MAEを追加
    window_signals_dataframe = pd.merge(
        window_signals_dataframe, 
        window_analysis_dataframe[['window_index', 'bpm_MAE', 'ecg_bpm_mean', 'rppg_bpm']],
        on='window_index',
        how='left'
    )
    
    all_dataframes.append(window_signals_dataframe)
    all_data_names.append(dataName)

print(f"\n読み込んだ動画数: {len(all_dataframes)}")

df_all_videos, feature_names = load_window_data(
    all_dataframes, 
    all_data_names, 
    array_cols=array_cols,
    exclude_cols=[
        'window_index', 'window_size', 'stride', 'frame_start', 'frame_end',
        'window_start_time', 'window_end_time', 'data_name', 'ecg_bpm_in_window', 'ecg_bpm_in_window_mean', 'bpm_MAE', 'ecg_bpm_mean', 'rppg_bpm'
    ])

print(f"\n全動画統合データ数: {len(df_all_videos)} 窓")
print(f"特徴量数: {len(feature_names)}")
print(f"動画別の内訳: {df_all_videos['data_name'].value_counts().to_dict()}")

for window_size in WINDOW_SIZES:
    print(f"\n{'='*80}")
    print(f"窓長: {window_size}秒（全動画統合）")
    print(f"{'='*80}")
    
    df_window = df_all_videos[df_all_videos['window_size'] == window_size].copy()
    
    if len(df_window) == 0:
        print(f"警告: 窓長{window_size}秒のデータがありません。スキップします。")
        continue
    
    print(f"データ数: {len(df_window)} 窓")
    print(f"動画別の内訳: {df_window['data_name'].value_counts().to_dict()}")
    
    save_dir = os.path.join('corr_window_analysis_combined',f'window_{window_size}s')
    os.makedirs(save_dir, exist_ok=True)
    
    # 相関解析
    corr_df = calculate_feature_mae_correlations(
        df_window, 
        feature_names,
        save_path=os.path.join(save_dir, "feature_mae_correlations.png"),
        title_prefix=f"全動画 - {window_size}s: ",
        top_n=20
    )
    
    # MAEと指標の時系列プロット
    if corr_df is not None:
        plot_window_param_mae_time_series(
            df_window,
            top_n=5,
            corr_df=corr_df,
            save_path=os.path.join(save_dir, "param_mae_time_series.png"),
            title_prefix=f"全動画 - {window_size}s: "
        )
        
        # ===== パラメータ-MAE散布図 =====
        print("\n[MAE分析]")
        plot_param_vs_mae(
            df_window,
            top_n=9,
            corr_df=corr_df,
            save_path=os.path.join(save_dir, "param_vs_mae_scatter.png"),
            title_prefix=f"全動画 - {window_size}s: "
        )
    
    # 動画別の上位特徴量の分布
    if corr_df is not None and len(corr_df) > 0:
        # 上位3つの特徴量について動画別分布を可視化
        top_n_features = min(3, len(corr_df))
        
        fig, axes = plt.subplots(1, top_n_features, figsize=(6*top_n_features, 6))
        if top_n_features == 1:
            axes = [axes]
        
        for idx in range(top_n_features):
            top_feature = corr_df.iloc[idx]['feature']
            correlation = corr_df.iloc[idx]['correlation']
            
            ax = axes[idx]
            df_window.boxplot(column=top_feature, by='data_name', ax=ax)
            ax.set_title(f'{top_feature}\n(相関: {correlation:.3f})')
            ax.set_xlabel('Video')
            ax.set_ylabel(top_feature)
            ax.tick_params(axis='x', rotation=45)
            plt.sca(ax)
            plt.xticks(rotation=45, ha='right')
        
        plt.suptitle(f'全動画 - {window_size}s: 上位特徴量の動画別分布')
        plt.tight_layout()
        plt.savefig(os.path.join(save_dir, "top_features_by_video.png"), dpi=300, bbox_inches='tight')
        plt.show()
        plt.close()

print("\n" + "="*80)
print("パターン3完了")
print("="*80)

### 偏相関解析(今回は1つの変数で蒸気の有無を予測するのでしなくてよい)

In [None]:
def calculate_partial_correlations(df, target_col, feature_cols, method='pearson'):
    """
    各特徴量と目標変数の偏相関係数を計算
    
    Parameters:
    -----------
    df : pd.DataFrame
        データフレーム
    target_col : str
        目標変数のカラム名 (例: 'bpm_MAE')
    feature_cols : list
        特徴量のカラム名リスト (上位N個)
    method : str
        相関係数の種類 ('pearson', 'spearman')
    
    Returns:
    --------
    pd.DataFrame
        カラム: ['feature', 'simple_corr', 'partial_corr', 'p_value', 'diff']
        - simple_corr: 単純相関係数
        - partial_corr: 偏相関係数
        - p_value: 偏相関のp値
        - diff: simple_corr - partial_corr (減少量)
    """
    import pingouin as pg
    
    results = []
    
    # 欠損値を除去
    df_clean = df[[target_col] + feature_cols].dropna()
    
    for feature in feature_cols:
        # 単純相関
        simple_corr = df_clean[feature].corr(df_clean[target_col], method=method)
        
        # 偏相関: 他の全特徴量を制御変数として使用
        covar = [f for f in feature_cols if f != feature]
        
        partial_result = pg.partial_corr(
            data=df_clean,
            x=feature,
            y=target_col,
            covar=covar,
            method=method
        )
        
        partial_corr = partial_result['r'].values[0]
        p_value = partial_result['p-val'].values[0]
        
        results.append({
            'feature': feature,
            'simple_corr': simple_corr,
            'partial_corr': partial_corr,
            'p_value': p_value,
            'diff': simple_corr - partial_corr
        })
    
    results_df = pd.DataFrame(results)
    results_df = results_df.sort_values('partial_corr', key=abs, ascending=False)
    
    return results_df

In [None]:
def plot_simple_vs_partial_correlation(results_df, save_path=None, title_prefix=""):
    """
    単純相関と偏相関の比較を可視化
    
    Parameters:
    -----------
    results_df : pd.DataFrame
        calculate_partial_correlations() の出力
    save_path : str
        保存先パス
    title_prefix : str
        タイトルの接頭辞
    """
    import matplotlib.pyplot as plt
    import numpy as np
    
    fig, axes = plt.subplots(1, 2, figsize=(16, 6))
    
    # ===== 左: 単純相関 vs 偏相関の棒グラフ =====
    ax1 = axes[0]
    x = np.arange(len(results_df))
    width = 0.35
    
    bars1 = ax1.barh(x - width/2, results_df['simple_corr'].abs(), 
                     width, label='単純相関', alpha=0.8, color='steelblue')
    bars2 = ax1.barh(x + width/2, results_df['partial_corr'].abs(), 
                     width, label='偏相関', alpha=0.8, color='coral')
    
    ax1.set_yticks(x)
    ax1.set_yticklabels(results_df['feature'], fontsize=9)
    ax1.set_xlabel('相関係数の絶対値', fontsize=11)
    ax1.set_title(f'{title_prefix}単純相関 vs 偏相関', fontsize=12, fontweight='bold')
    ax1.legend()
    ax1.grid(axis='x', alpha=0.3)
    ax1.invert_yaxis()
    
    # ===== 右: 相関の減少量 =====
    ax2 = axes[1]
    colors = ['red' if d > 0 else 'green' for d in results_df['diff']]
    
    ax2.barh(results_df['feature'], results_df['diff'], color=colors, alpha=0.7)
    ax2.set_xlabel('減少量 (単純相関 - 偏相関)', fontsize=11)
    ax2.set_title(f'{title_prefix}相関係数の減少量', fontsize=12, fontweight='bold')
    ax2.axvline(x=0, color='black', linestyle='--', linewidth=0.8)
    ax2.grid(axis='x', alpha=0.3)
    ax2.invert_yaxis()
    
    # 注釈
    ax2.text(0.98, 0.02, '正: 他の特徴量との相関が高い\n負: 独立性が高い', 
             transform=ax2.transAxes, fontsize=9, 
             verticalalignment='bottom', horizontalalignment='right',
             bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
    
    plt.tight_layout()
    
    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        print(f"保存完了: {save_path}")
    
    plt.close()

In [None]:
for i in range(len(movie_paths)):
    inputMoviePath = movie_paths[i]
    rootDir = data_dirs[i]
    dataName = movie_names[i]
    
    print(f"\n{'='*80}")
    print(f"動画: {dataName}")
    print(f"{'='*80}")
    
    # window_analysis (MAEを含む)のCSVを読み込み
    window_analysis_csv_path = os.path.join(rootDir, SAVE_DIR, f'window_analysis_{dataName}.csv')
    
    if not os.path.exists(window_analysis_csv_path):
        print(f"警告: {window_analysis_csv_path} が見つかりません。スキップします。")
        continue
    
    # window_analysis_dataframeを読み込み(MAEを含む)
    print("窓解析データを読み込み中...")
    window_analysis_dataframe = pd.read_csv(window_analysis_csv_path)
    
    # window_signals (特徴量)のCSVを読み込み
    window_signals_csv_path = os.path.join(rootDir, SAVE_DIR, f'window_signals_{dataName}.csv')
    
    if not os.path.exists(window_signals_csv_path):
        print(f"警告: {window_signals_csv_path} が見つかりません。スキップします。")
        continue

    # =====================特徴量データを読み込み======================
    window_signals_dataframe = pd.read_csv(window_signals_csv_path)
    
    # 時系列データの統計量を計算して特徴量に追加
    array_cols = [
        'r_signal_in_window', 'g_signal_in_window', 'b_signal_in_window',
        'r_std_signal_in_window', 'g_std_signal_in_window', 'b_std_signal_in_window',
        'h_signal_in_window', 's_signal_in_window', 'v_signal_in_window',
        'h_std_signal_in_window', 's_std_signal_in_window', 'v_std_signal_in_window',
        'lightness_signal_in_window', 
        'lbp_entropy', 'lbp_variance', 'lbp_skewness', 'lbp_kurtosis',
        'lbp_chi2_distance', 'lbp_uniform_ratio',
        'canny_edge_ratio', 'glcm_isotropy',
        'flow_mean_motion', 'flow_std_motion', 'flow_ratio_1px', 'flow_ratio_5px', 'flow_ratio_10px',
        'wavelet_hh_energies', 'fft_high_freq_ratios',
        'angles_in_window'
    ]
    
    # それぞれに対して集約統計量を計算
    for col in array_cols:
        if col in window_signals_dataframe.columns:
            print(f"処理中: {col}")
            stats_df = window_signals_dataframe[col].apply(calculate_stats)
            stats_df.columns = [f'{col}_mean', f'{col}_std', f'{col}_max', f'{col}_min', f'{col}_iqr']
            window_signals_dataframe = pd.concat([window_signals_dataframe, stats_df], axis=1)
    
    # window_indexでマージして、MAEを追加
    window_signals_dataframe = pd.merge(
        window_signals_dataframe, 
        window_analysis_dataframe[['window_index', 'bpm_MAE', 'ecg_bpm_mean', 'rppg_bpm']],
        on='window_index',
        how='left'
    )

    # =====================窓特徴量データを読み込み==========================
    df_all, feature_names = load_window_data(
        dataFrameArrays=[window_signals_dataframe],
        data_names=[dataName],
        exclude_cols=[
            'window_index', 'window_size', 'stride', 'frame_start', 'frame_end',
            'window_start_time', 'window_end_time', 'data_name', 'ecg_bpm_in_window', 
            'ecg_bpm_in_window_mean', 'bpm_MAE', 'ecg_bpm_mean', 'rppg_bpm'
        ],
        array_cols=array_cols
    )

    print(f"特徴量数: {len(feature_names)}")
    
    # =====================窓長ごとに偏相関解析=====================
    for window_size in WINDOW_SIZES:
        print(f"\n{'─'*60}")
        print(f"窓長: {window_size}秒")
        print(f"{'─'*60}")
        
        # 該当する窓長のデータのみを抽出
        df_window = df_all[df_all['window_size'] == window_size].copy()
        
        if len(df_window) == 0:
            print(f"警告: 窓長{window_size}秒のデータがありません。スキップします。")
            continue
        
        print(f"データ数: {len(df_window)} 窓")
        
        # 保存ディレクトリ
        save_dir = os.path.join(rootDir, SAVE_DIR, 'corr_window_analysis', 
                               f'per_video_per_window', f'{dataName}_window_{window_size}s')
        os.makedirs(save_dir, exist_ok=True)
        
        # 相関解析結果を読み込み
        corr_csv_path = os.path.join(save_dir, 'feature_mae_correlations.csv')
        
        if not os.path.exists(corr_csv_path):
            print(f"警告: {corr_csv_path} が見つかりません。スキップします。")
            continue
        
        print("相関解析データを読み込み中...")
        corr_df = pd.read_csv(corr_csv_path)
        
        # ===== 偏相関解析 (新規) =====
        print("\n[偏相関解析]")
        
        # 上位10特徴量を取得
        top_features = corr_df.head(10)['feature'].tolist()
        print(f"上位10特徴量: {top_features}")
        
        # 偏相関を計算
        try:
            partial_corr_df = calculate_partial_correlations(
                df=df_window,
                target_col='bpm_MAE',
                feature_cols=top_features,
                method='pearson'
            )
            
            # 結果を表示
            print("\n偏相関解析結果:")
            print(partial_corr_df.to_string(index=False))
            
            # 可視化
            plot_simple_vs_partial_correlation(
                results_df=partial_corr_df,
                save_path=os.path.join(save_dir, "simple_vs_partial_correlation.png"),
                title_prefix=f"{dataName} - {window_size}s: "
            )
            
            # CSVとして保存
            partial_corr_df.to_csv(
                os.path.join(save_dir, "partial_correlations.csv"),
                index=False
            )
            print(f"偏相関結果を保存しました: {save_dir}/partial_correlations.csv")
            
        except Exception as e:
            print(f"エラー: 偏相関計算中にエラーが発生しました: {e}")
            continue