In [None]:
import scipy.io as sio
import numpy as np
import os

In [None]:
def hampel_filter(data, window_size=5, threshold=3):
    filtered_data = data.copy()
    n = len(data)
    outliers_found = 0

    print(f"    Data range: {np.min(data):.3f} to {np.max(data):.3f}")
    for i in range(n):
        start = max(0, i - (window_size//2))
        end = min(i + (window_size//2) + 1, n)
        window = data[start:end]

        median_value = np.median(window)
        mad = np.median(np.abs(window - median_value))

        if mad > 0 and np.abs(data[i] - median_value) > threshold * mad:
            print(f"    Outlier: {data[i]:.3f} -> {median_value:.3f}")
            filtered_data[i] = median_value
            outliers_found += 1
            
    print(f"    Outliers found: {outliers_found}")
        # outliers_removed = np.sum(data != filtered_data)
        # outlier_percentage = outliers_removed / len(data) * 100
    return (filtered_data)

In [None]:
def preprocess(file_path):
    try:
        data = sio.loadmat(file_path)

        if 'CSIamp' not in data:
            print(f"Warning: 'CSIamp' key not found in {file_path}")
            return
        
        csi_amps = data['CSIamp'] # 342 in number

        if len(csi_amps.shape) != 2:
            print(f"Warning: Unexpected data shape {csi_amps.shape} in {file_path}")
            return

        for i in range(len(csi_amps)):
            csi_amps[i] = hampel_filter(csi_amps[i])

        filtered_data = {'CSIamp': csi_amps}
        sio.savemat(file_path, filtered_data)

    except Exception as e:
        print(f"Error processing {file_path}: {str(e)}")

In [None]:
def list_all_files(root='.'):
    processed_count = 0
    for entry in os.listdir(root):
        full_path = os.path.join(root, entry)
        if os.path.isdir(full_path):
            processed_count += list_all_files(full_path)
        else:
            if full_path.endswith('.mat'):
                preprocess(full_path)
                processed_count += 1
    return processed_count

root = '..\dataset'
processed_count = list_all_files(root)
print(f"\nTotal processed: {processed_count} files")