In [None]:
import os
import glob
import pandas as pd
import numpy as np
import zipfile
from scipy.fft import fft, fftfreq

# Settings for file structure
base_path = r"E:\EarthScienceFair_Data"
target_folders = ["1", "2", "3", "4"]  # 只處理 file 1-4

# Mass configuration (kg) - 用 dict 對應不同組別的質量
# Key 為組別名稱（例如 "G1", "G2"），Value 為該組的質量
mass_dict = {
    "G1": 1.8559,
    "G2": 1.8600,  # 請根據實際情況修改
    "G3": 1.8550,  # 請根據實際情況修改
    "G4": 1.8580,  # 請根據實際情況修改
    "G5": 1.8570,  # 請根據實際情況修改
    "G6": 1.8590,  # 請根據實際情況修改
    "G7": 1.8560,  # 請根據實際情況修改
}

# 預設質量（如果某個組別不在 dict 中）
default_mass = 1.8559

# 儲存所有實驗的原始數據，用於計算不確定度
# 結構: {G1: {rms_x: [...], rms_y: [...], ...}, G2: {...}, ...}
group_data = {}

results = []

def filter_outliers(data, threshold=0.1):
    """
    過濾異常值：若前一筆數據和後一筆數據落差超過 threshold，則標記為異常
    回傳清理後的資料索引
    """
    if len(data) <= 1:
        return np.ones(len(data), dtype=bool)
    
    valid_mask = np.ones(len(data), dtype=bool)
    
    for i in range(1, len(data)):
        if abs(data[i] - data[i-1]) > threshold:
            valid_mask[i] = False
    
    return valid_mask

def clean_data_with_outlier_filter(data, time_data=None):
    """
    清理數據：移除非數值、轉換型別、並過濾異常值
    time_data: 如果提供，則不對時間數據進行異常值過濾
    """
    # 移除非數值（將 numpy array 轉成 pandas Series 處理）
    data_series = pd.Series(data)
    data_clean = data_series[pd.to_numeric(data_series, errors='coerce').notna()].values
    data_clean = data_clean.astype(float)
    
    # 對非時間數據進行異常值過濾
    if time_data is None:
        # 這是時間數據，只做基本清理
        if len(data_clean) > 0:
            data_clean = data_clean - data_clean[0]
        return data_clean
    else:
        # 這是測量數據，需要過濾異常值
        if len(data_clean) == 0:
            return data_clean
            
        valid_mask = filter_outliers(data_clean, threshold=0.1)
        data_filtered = data_clean[valid_mask]
        
        # 去中心化
        if len(data_filtered) > 0:
            data_filtered = data_filtered - np.mean(data_filtered)
        return data_filtered

# Find all xlsx files in target folders only
xlsx_files = []
for folder in target_folders:
    folder_path = os.path.join(base_path, folder)
    if os.path.exists(folder_path):
        xlsx_files.extend(glob.glob(os.path.join(folder_path, "**", "*.xlsx"), recursive=True))

print(f"找到 {len(xlsx_files)} 個 xlsx 檔案在 file 1-4 中\n")

for tracker_file in xlsx_files:
    print("處理:", tracker_file)
    
    # 提取實驗組別 (例如: "1-G1-1.xlsx" -> "G1")
    file_name = os.path.basename(tracker_file)
    parts = file_name.split('-')
    if len(parts) >= 2:
        group_name = parts[1]  # 例如 "G1", "G2"
    else:
        print("檔名格式不符，跳過")
        continue
    
    # 根據組別取得對應的質量
    mass = mass_dict.get(group_name, default_mass)
    print(f"  組別: {group_name}, 質量: {mass} kg")
    
    try:
        df = pd.read_excel(tracker_file)
    except:
        print("讀取失敗:", tracker_file)
        continue

    # 讀取資料欄位
    # 俯瞰視角 (最後三欄)
    t_a_raw = df.iloc[:, -3].values
    x_raw = df.iloc[:, -2].values
    y_raw = df.iloc[:, -1].values

    # 側面視角 (前六欄)
    t_s_raw = df.iloc[:, 0].values
    ye_raw = df.iloc[:, 1].values
    yd_raw = df.iloc[:, 2].values
    yc_raw = df.iloc[:, 3].values
    yb_raw = df.iloc[:, 4].values
    ya_raw = df.iloc[:, 5].values

    # 清理數據並過濾異常值
    # 時間數據
    t_a = clean_data_with_outlier_filter(t_a_raw, time_data=True)
    t_s = clean_data_with_outlier_filter(t_s_raw, time_data=True)
    
    # 俯瞰數據 (過濾異常值)
    x = clean_data_with_outlier_filter(x_raw)
    y = clean_data_with_outlier_filter(y_raw)
    
    # 側面數據 (過濾異常值)
    ye = clean_data_with_outlier_filter(ye_raw)
    yd = clean_data_with_outlier_filter(yd_raw)
    yc = clean_data_with_outlier_filter(yc_raw)
    yb = clean_data_with_outlier_filter(yb_raw)
    ya = clean_data_with_outlier_filter(ya_raw)

    # 計算 RMS
    rms_x = np.sqrt(np.mean(x**2)) if len(x) > 0 else np.nan
    rms_y = np.sqrt(np.mean(y**2)) if len(y) > 0 else np.nan
    ratio = rms_y / rms_x if (rms_x > 1e-8 and not np.isnan(rms_x)) else np.nan

    # FFT 頻率分析
    layers = {
        "ye": ye, "yd": yd, "yc": yc, "yb": yb, "ya": ya
    }
    main_freqs = []
    
    for key, y_layer in layers.items():
        if len(y_layer) == 0 or len(t_s) == 0:
            continue
            
        min_len = min(len(t_s), len(y_layer))
        t_temp = t_s[:min_len]
        y_temp = y_layer[:min_len]
        
        if len(t_temp) < 2:
            continue
            
        dt = np.mean(np.diff(t_temp))
        if dt <= 0:
            continue
            
        N = len(y_temp)
        yf = fft(y_temp)
        xf = fftfreq(N, dt)
        xf = xf[:N//2]
        amplitude = 2.0/N * np.abs(yf[:N//2])
        
        if len(amplitude) > 1:
            peak_index = np.argmax(amplitude[1:]) + 1
            main_freq = xf[peak_index]
            print(f"  {key} 主頻率: {main_freq:.2f} Hz")
            main_freqs.append(main_freq)
    
    if len(main_freqs) > 0:
        f_n = np.mean(main_freqs)
        print(f"  系統自然頻率: {f_n:.2f} Hz")
    else:
        f_n = np.nan
        print("  無法計算頻率")

    # 計算剛性
    if not np.isnan(f_n):
        omega = 2 * np.pi * f_n
        k = mass * omega**2
    else:
        k = np.nan

    # 讀取加速度資料
    folder = os.path.dirname(tracker_file)
    base_name = os.path.splitext(os.path.basename(tracker_file))[0]
    
    all_zips = glob.glob(os.path.join(folder, "*.zip"))
    zip_file = None
    
    for z in all_zips:
        if os.path.splitext(os.path.basename(z))[0] == base_name:
            zip_file = z
            break
    
    if zip_file is not None:
        try:
            with zipfile.ZipFile(zip_file, 'r') as z:
                csv_files = [f for f in z.namelist() if f.endswith('.csv')]
                
                if len(csv_files) > 0:
                    with z.open(csv_files[0]) as f:
                        acc_df = pd.read_csv(f, sep=',')
                        acc_abs_raw = acc_df.iloc[:, -1].values
                        
                        # 清理加速度數據並過濾異常值
                        acc_abs = acc_abs_raw[pd.to_numeric(acc_abs_raw, errors='coerce').notna()]
                        acc_abs = acc_abs.astype(float)
                        
                        # 過濾異常值
                        valid_mask = filter_outliers(acc_abs, threshold=0.1)
                        acc_abs_filtered = acc_abs[valid_mask]
                        
                        rms_acc = np.sqrt(np.mean(acc_abs_filtered**2)) if len(acc_abs_filtered) > 0 else np.nan
                else:
                    rms_acc = np.nan
        except Exception as e:
            print(f"  讀取 zip 檔案錯誤: {e}")
            rms_acc = np.nan
    else:
        rms_acc = np.nan
    
    # 儲存結果
    results.append([tracker_file, group_name, rms_x, rms_y, ratio, f_n, k, rms_acc])
    
    # 將數據加入群組統計 (用於計算不確定度)
    if group_name not in group_data:
        group_data[group_name] = {
            'rms_x': [], 'rms_y': [], 'ratio': [], 
            'f_n': [], 'k': [], 'rms_acc': []
        }
    
    group_data[group_name]['rms_x'].append(rms_x)
    group_data[group_name]['rms_y'].append(rms_y)
    group_data[group_name]['ratio'].append(ratio)
    group_data[group_name]['f_n'].append(f_n)
    group_data[group_name]['k'].append(k)
    group_data[group_name]['rms_acc'].append(rms_acc)
    
    print()

# 建立結果 DataFrame
results_df = pd.DataFrame(results, columns=[
    "File", "Group", "RMS_x", "RMS_y", "偏心比例", "主頻率(Hz)", "等效剛性(N/m)", "RMS加速度"
])

# 計算每個群組的不確定度
uncertainty_results = []

for group_name, data in group_data.items():
    print(f"\n計算 {group_name} 的不確定度:")
    
    uncertainties = {}
    
    for param_name, values in data.items():
        # 移除 NaN 值
        valid_values = [v for v in values if not np.isnan(v)]
        
        if len(valid_values) >= 2:
            # 計算平均值
            mean_val = np.mean(valid_values)
            
            # 計算標準差 (樣本標準差，使用 n-1)
            std_val = np.std(valid_values, ddof=1)
            
            # 計算標準不確定度 (u = s / sqrt(n))
            n = len(valid_values)
            u_val = std_val / np.sqrt(n)
            
            # 計算相對不確定度 (%)
            relative_u = (u_val / mean_val * 100) if mean_val != 0 else np.nan
            
            uncertainties[param_name] = {
                'mean': mean_val,
                'std': std_val,
                'u': u_val,
                'relative_u': relative_u,
                'n': n
            }
            
            print(f"  {param_name}:")
            print(f"    平均值 = {mean_val:.6f}")
            print(f"    標準差 = {std_val:.6f}")
            print(f"    標準不確定度 u = {u_val:.6f}")
            print(f"    相對不確定度 = {relative_u:.2f}%")
            print(f"    樣本數 n = {n}")
        else:
            uncertainties[param_name] = {
                'mean': valid_values[0] if len(valid_values) == 1 else np.nan,
                'std': np.nan,
                'u': np.nan,
                'relative_u': np.nan,
                'n': len(valid_values)
            }
            print(f"  {param_name}: 樣本數不足 (n={len(valid_values)})")
    
    uncertainty_results.append({
        'Group': group_name,
        'RMS_x_mean': uncertainties['rms_x']['mean'],
        'RMS_x_u': uncertainties['rms_x']['u'],
        'RMS_y_mean': uncertainties['rms_y']['mean'],
        'RMS_y_u': uncertainties['rms_y']['u'],
        'ratio_mean': uncertainties['ratio']['mean'],
        'ratio_u': uncertainties['ratio']['u'],
        'f_n_mean': uncertainties['f_n']['mean'],
        'f_n_u': uncertainties['f_n']['u'],
        'k_mean': uncertainties['k']['mean'],
        'k_u': uncertainties['k']['u'],
        'rms_acc_mean': uncertainties['rms_acc']['mean'],
        'rms_acc_u': uncertainties['rms_acc']['u'],
        'sample_size': uncertainties['rms_x']['n']
    })

uncertainty_df = pd.DataFrame(uncertainty_results)

# 輸出結果
print("\n" + "="*80)
print("File 1-4 分析結果:")
print("="*80)
print(results_df)

print("\n" + "="*80)
print("各組不確定度統計:")
print("="*80)
print(uncertainty_df)

# 存檔
results_df.to_csv("analysis_results_file1to4.csv", index=False, encoding='utf-8-sig')
uncertainty_df.to_csv("uncertainty_file1to4.csv", index=False, encoding='utf-8-sig')

print("\n結果已儲存至:")
print("  - analysis_results_file1to4.csv (詳細結果)")
print("  - uncertainty_file1to4.csv (不確定度統計)")

找到 5 個 xlsx 檔案在 file 1-4 中

處理: E:\EarthScienceFair_Data\1\G2\1-G2-1.xlsx


AttributeError: 'numpy.ndarray' object has no attribute 'notna'

Above code were provided by Claude and fixed by human.