# bf

In [6]:
import os
import glob
import pandas as pd
import numpy as np
import zipfile
from scipy.fft import fft, fftfreq
from scipy.signal import medfilt

# === 基礎設定 ===
base_path = r"E:\EarthScienceFair_Data"
target_folders = ["1", "2", "3", "4"]

# 質量字典 (kg)
mass_dict = {
    "1-G2": 1.9505, "1-G3": 2.0122, "2-G1": 2.2201, "2-G2": 2.6162,
    "2-G3": 2.6162, "2-G4": 2.6162, "2-G5": 1.9495, "3-G1": 1.9650,
    "3-G2": 1.9531, "4-G1": 1.8559,
}
default_mass = 1.9000

# === 核心函數庫 ===

def auto_trim_index(y, threshold_factor=3.0, window_size=20):
    """偵測開始波動的索引，排除前 3s 雜訊"""
    if len(y) < window_size: return 0
    base_std = np.std(y[:min(100, len(y))])
    y_series = pd.Series(y)
    rolling_std = y_series.rolling(window=window_size).std().fillna(0).values
    trigger_threshold = base_std * threshold_factor
    indices = np.where(rolling_std > trigger_threshold)[0]
    return max(0, indices[0] - window_size) if len(indices) > 0 else 0

def filter_outliers(data, threshold=0.1):
    """過濾數據突變點"""
    if len(data) <= 1: return np.ones(len(data), dtype=bool)
    valid_mask = np.ones(len(data), dtype=bool)
    for i in range(1, len(data)):
        if abs(data[i] - data[i-1]) > threshold:
            valid_mask[i] = False
    return valid_mask

def clean_data_robust(data, time_data=False):
    s = pd.to_numeric(pd.Series(data), errors='coerce')
    
    # --- 關鍵修正：不刪除點，改用插值填補 NaN ---
    # 這樣才能確保 start_idx 裁切的位置在時間軸上是準確的
    s = s.interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
    s_clean = s.values
    
    if len(s_clean) == 0: return np.array([])
    
    if time_data:
        return s_clean - s_clean[0]
    else:
        # 過濾異常突波
        mask = filter_outliers(s_clean, threshold=0.1)
        s_filtered = s_clean.copy()
        # 將異常點設為前後平均值而非刪除
        s_filtered[~mask] = np.nan
        s_filtered = pd.Series(s_filtered).interpolate().fillna(method='bfill').values
        
        # 第一次去中心化
        s_filtered = s_filtered - np.mean(s_filtered)
        return s_filtered

# === 開始處理檔案 ===

xlsx_files = []
for folder in target_folders:
    folder_path = os.path.join(base_path, folder)
    if os.path.exists(folder_path):
        xlsx_files.extend(glob.glob(os.path.join(folder_path, "**", "*.xlsx"), recursive=True))

print(f"找到 {len(xlsx_files)} 個 xlsx 檔案\n")

group_data = {}
results = []

for tracker_file in xlsx_files:
    print(f"處理中: {tracker_file}")
    file_name = os.path.basename(tracker_file)
    parts = file_name.split('-')
    if len(parts) < 2: continue
    combined_key = f"{parts[0]}-{parts[1]}"
    mass = mass_dict.get(combined_key, default_mass)
    
    try:
        # 讀取 Excel
        df = pd.read_excel(tracker_file)
        
        # 1. 讀取與清理原始數據
        t_s_raw = df.iloc[:, 0].values
        # 側面數據 (ye 到 ya)
        side_raw = {k: df.iloc[:, i+1].values for i, k in enumerate(["ye", "yd", "yc", "yb", "ya"])}
        # 俯瞰數據 (x, y)
        x_raw = df.iloc[:, -2].values
        y_raw = df.iloc[:, -1].values
        
        t_s = clean_data_robust(t_s_raw, time_data=True)
        x_clean = clean_data_robust(x_raw)
        y_clean = clean_data_robust(y_raw)

        # 2. 處理俯瞰數據 (RMS 與 比例)
        if len(x_clean) > 10 and len(y_clean) > 10:
            # 中位數濾波移除尖峰
            x_med = medfilt(x_clean, kernel_size=5)
            y_med = medfilt(y_clean, kernel_size=5)
            
            # 自動裁切
            start_idx = auto_trim_index(y_med)
            x_final = x_med[start_idx:]
            y_final = y_med[start_idx:]
            
            # 第二次去中心化：確保裁切後的震盪完全繞著 0 旋轉
            x_final = x_final - np.mean(x_final)
            y_final = y_final - np.mean(y_final)
            
            rms_x = np.sqrt(np.mean(x_final**2))
            rms_y = np.sqrt(np.mean(y_final**2))
            ratio = rms_y / rms_x if rms_x > 1e-8 else np.nan
        else:
            rms_x, rms_y, ratio, start_idx = np.nan, np.nan, np.nan, 0

        # 3. 處理側面數據 (FFT 頻率偵測)
        main_freqs = []
        for key, raw_val in side_raw.items():
            s_clean = clean_data_robust(raw_val)
            if len(s_clean) > start_idx + 10:
                # 同步裁切與去中心化
                s_trimmed = s_clean[start_idx:]
                s_centered = s_trimmed - np.mean(s_trimmed)
                
                # 計算 FFT (重心法)
                dt = np.mean(np.diff(t_s[:len(s_centered)]))
                N = len(s_centered)
                if dt <= 0 or N < 10: continue
                
                yf = fft(s_centered)
                xf = fftfreq(N, dt)[:N//2]
                amp = 2.0/N * np.abs(yf[:N//2])
                
                # 頻率遮罩：只看 1.2~5.0 Hz 區間
                mask = (xf >= 1.2) & (xf <= 5.0)
                if np.any(mask):
                    m_xf, m_amp = xf[mask], amp[mask]
                    # 重心法公式：sum(f * amp) / sum(amp)
                    f_centroid = np.sum(m_xf * m_amp) / np.sum(m_amp)
                    main_freqs.append(f_centroid)
        
        f_n = np.mean(main_freqs) if main_freqs else np.nan
        print(f"  -> 自然頻率: {f_n:.2f} Hz, RMS_x: {rms_x:.4f}")
        
        # 計算剛性 k
        k = mass * (2 * np.pi * f_n)**2 if not np.isnan(f_n) else np.nan

        # 4. 加速度處理 (讀取同名 Zip)
        folder = os.path.dirname(tracker_file)
        base_name = os.path.splitext(os.path.basename(tracker_file))[0]
        zip_path = os.path.join(folder, f"{base_name}.zip")
        
        rms_acc = np.nan
        if os.path.exists(zip_path):
            try:
                with zipfile.ZipFile(zip_path, 'r') as z:
                    csv_files = [f for f in z.namelist() if f.endswith('.csv')]
                    if csv_files:
                        with z.open(csv_files[0]) as f:
                            acc_df = pd.read_csv(f)
                            acc_abs = pd.to_numeric(acc_df.iloc[:, -1], errors='coerce').dropna().values
                            if len(acc_abs) > 0:
                                # 加速度去中心化
                                acc_centered = acc_abs - np.mean(acc_abs)
                                rms_acc = np.sqrt(np.mean(acc_centered**2))
            except Exception as acc_e:
                print(f"  加速度讀取失敗: {acc_e}")

        # 5. 儲存單次實驗結果
        results.append([tracker_file, combined_key, rms_x, rms_y, ratio, f_n, k, rms_acc])
        
        if combined_key not in group_data:
            group_data[combined_key] = {p: [] for p in ['rms_x', 'rms_y', 'ratio', 'f_n', 'k', 'rms_acc']}
        
        for field, val in zip(['rms_x', 'rms_y', 'ratio', 'f_n', 'k', 'rms_acc'], 
                               [rms_x, rms_y, ratio, f_n, k, rms_acc]):
            group_data[combined_key][field].append(val)

    except Exception as e:
        print(f"檔案 {tracker_file} 處理發生錯誤: {e}")

# === 計算不確定度統計 ===

uncertainty_list = []
for group_name, data in group_data.items():
    stats = {'Group': group_name}
    print(f"\n--- {group_name} 統計結果 ---")
    for param, values in data.items():
        v = [val for val in values if not np.isnan(val)]
        if len(v) >= 2:
            m = np.mean(v)
            u = np.std(v, ddof=1) / np.sqrt(len(v))
            rel_u = (u / m * 100) if m != 0 else 0
            stats[f"{param}_mean"] = m
            stats[f"{param}_u"] = u
            print(f"  {param:7}: 平均={m:8.4f}, 相對不確定度={rel_u:6.2f}%")
        else:
            stats[f"{param}_mean"] = v[0] if v else np.nan
            stats[f"{param}_u"] = np.nan
    uncertainty_list.append(stats)

# === 存檔 ===
results_df = pd.DataFrame(results, columns=["File", "Group", "RMS_x", "RMS_y", "Ratio", "f_n", "k", "RMS_acc"])
uncertainty_df = pd.DataFrame(uncertainty_list)

results_df.to_csv("analysis_results.csv", index=False, encoding='utf-8-sig')
uncertainty_df.to_csv("uncertainty_summary.csv", index=False, encoding='utf-8-sig')

print("\n分析完成！結果已存入 analysis_results.csv 與 uncertainty_summary.csv")

找到 13 個 xlsx 檔案

處理中: E:\EarthScienceFair_Data\1\G2\1-G2-1.xlsx
  -> 自然頻率: nan Hz, RMS_x: 0.0249


  s = s.interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  s_filtered = pd.Series(s_filtered).interpolate().fillna(method='bfill').values
  f_centroid = np.sum(m_xf * m_amp) / np.sum(m_amp)


處理中: E:\EarthScienceFair_Data\1\G2\1-G2-2.xlsx


  s = s.interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  s_filtered = pd.Series(s_filtered).interpolate().fillna(method='bfill').values
  f_centroid = np.sum(m_xf * m_amp) / np.sum(m_amp)
  s = s.interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  s_filtered = pd.Series(s_filtered).interpolate().fillna(method='bfill').values
  f_centroid = np.sum(m_xf * m_amp) / np.sum(m_amp)


  -> 自然頻率: nan Hz, RMS_x: 0.0243
處理中: E:\EarthScienceFair_Data\1\G2\1-G2-3.xlsx
  -> 自然頻率: nan Hz, RMS_x: 0.0246
處理中: E:\EarthScienceFair_Data\1\G2\1-G2-4.xlsx


  s = s.interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  s_filtered = pd.Series(s_filtered).interpolate().fillna(method='bfill').values
  f_centroid = np.sum(m_xf * m_amp) / np.sum(m_amp)
  s = s.interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  s_filtered = pd.Series(s_filtered).interpolate().fillna(method='bfill').values


  -> 自然頻率: nan Hz, RMS_x: 0.0246
處理中: E:\EarthScienceFair_Data\1\G2\1-G2-5.xlsx
  -> 自然頻率: 3.07 Hz, RMS_x: 0.0241
處理中: E:\EarthScienceFair_Data\1\G3\1-G3-1.xlsx


  s = s.interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  s_filtered = pd.Series(s_filtered).interpolate().fillna(method='bfill').values
  s = s.interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  s_filtered = pd.Series(s_filtered).interpolate().fillna(method='bfill').values


  -> 自然頻率: 3.03 Hz, RMS_x: 0.1202
處理中: E:\EarthScienceFair_Data\1\G3\1-G3-2.xlsx
  -> 自然頻率: 3.21 Hz, RMS_x: 0.0242
處理中: E:\EarthScienceFair_Data\1\G3\1-G3-3.xlsx


  s = s.interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  s_filtered = pd.Series(s_filtered).interpolate().fillna(method='bfill').values
  f_centroid = np.sum(m_xf * m_amp) / np.sum(m_amp)
  s = s.interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  s_filtered = pd.Series(s_filtered).interpolate().fillna(method='bfill').values
  f_centroid = np.sum(m_xf * m_amp) / np.sum(m_amp)


  -> 自然頻率: nan Hz, RMS_x: 0.0248
處理中: E:\EarthScienceFair_Data\1\G3\1-G3-4.xlsx
  -> 自然頻率: nan Hz, RMS_x: 0.0242
處理中: E:\EarthScienceFair_Data\1\G3\1-G3-5.xlsx
  -> 自然頻率: 3.14 Hz, RMS_x: 0.0241
處理中: E:\EarthScienceFair_Data\2\G1\2-G1-1.xlsx


  s = s.interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  s_filtered = pd.Series(s_filtered).interpolate().fillna(method='bfill').values
  s = s.interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  s_filtered = pd.Series(s_filtered).interpolate().fillna(method='bfill').values


  -> 自然頻率: 3.12 Hz, RMS_x: 0.0248
處理中: E:\EarthScienceFair_Data\2\G1\2-G1-2.xlsx
  -> 自然頻率: 3.11 Hz, RMS_x: 0.0252
處理中: E:\EarthScienceFair_Data\2\G1\2-G1-3.xlsx


  s = s.interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  s_filtered = pd.Series(s_filtered).interpolate().fillna(method='bfill').values
  s = s.interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  s_filtered = pd.Series(s_filtered).interpolate().fillna(method='bfill').values


  -> 自然頻率: 3.09 Hz, RMS_x: 0.0247

--- 1-G2 統計結果 ---
  rms_x  : 平均=  0.0245, 相對不確定度=  0.59%
  rms_y  : 平均=  0.0006, 相對不確定度=  3.53%
  ratio  : 平均=  0.0252, 相對不確定度=  3.70%
  rms_acc: 平均=  4.8214, 相對不確定度=  2.06%

--- 1-G3 統計結果 ---
  rms_x  : 平均=  0.0435, 相對不確定度= 44.10%
  rms_y  : 平均=  0.0020, 相對不確定度= 60.31%
  ratio  : 平均=  0.0377, 相對不確定度= 13.78%
  f_n    : 平均=  3.1278, 相對不確定度=  1.65%
  k      : 平均=777.5796, 相對不確定度=  3.29%
  rms_acc: 平均=  5.2637, 相對不確定度=  1.90%

--- 2-G1 統計結果 ---
  rms_x  : 平均=  0.0249, 相對不確定度=  0.67%
  rms_y  : 平均=  0.0021, 相對不確定度= 19.48%
  ratio  : 平均=  0.0838, 相對不確定度= 19.02%
  f_n    : 平均=  3.1051, 相對不確定度=  0.33%
  k      : 平均=845.0890, 相對不確定度=  0.66%
  rms_acc: 平均=  5.2414, 相對不確定度=  3.25%

分析完成！結果已存入 analysis_results.csv 與 uncertainty_summary.csv
