In [7]:
import os
import pandas as pd
import numpy as np
import scipy
from scipy import signal

def extract_test_id(file_name):
    """
    Extracts the test ID from the file name.
    """
    parts = file_name.split("_")
    return f"{parts[0]}_{parts[1]}"

def process_file(file_path):
    """
    Processes a single data file to extract peak frequencies and amplitudes.
    """
    # Extract the test ID and channel number from the file name
    file_name = os.path.basename(file_path)
    test_id = extract_test_id(file_name)


    # Load the data
    data = np.loadtxt(file_path)

    # Use Short-Time Fourier Transform (STFT) to extract the frequency components
    f, t, Zxx = signal.stft(data, fs=1/0.000019531, nperseg=50000*5)

    # Identify the frequency bins that contain the noise and its harmonics
    noise_bins = np.where((f > 190) & (f < 210))[0]
    harmonics_bins = np.where((f > 380) & (f < 420))[0]
    harmonics_bins_2 = np.where((f > 760) & (f < 840))[0]
    combined_bins = np.concatenate((noise_bins, harmonics_bins, harmonics_bins_2))

    # Apply a binary mask to the STFT output to zero out the noise and its harmonics
    mask = np.ones(Zxx.shape)
    mask[combined_bins] = 0

    # Inverse STFT to obtain the denoised signal
    _, denoised_data = signal.istft(mask * Zxx)

    # Convert the denoised data back to the original time domain
    denoised_data = np.real(denoised_data)

    # performing fft on filtered data or denoised data
    sr = 1/0.000019531 # sampling rate (Hz)
    X = scipy.fft.fft(denoised_data) # fft performed
    n = np.arange(len(X)) #length of fft
    T = len(X)/sr # sampling period
    x_freq = n/T

    # find peaks in the plot
    peaks, props = scipy.signal.find_peaks(np.abs(X[:int(len(X)*20000//sr)]), height=5, distance=100000)

    # Get the frequencies of the peaks
    peak_amplitudes = props['peak_heights']
    peak_frequencies = x_freq[peaks]
    # Sort the peak amplitudes in descending order
    sorted_indices = np.argsort(-peak_amplitudes)

    # Get the top 17 frequencies
    top_17_frequencies = peak_frequencies[sorted_indices[:17]]
    # Create a row for the current data file
    data_row = [test_id] + top_17_frequencies.tolist()

    return data_row

def process_files(file_paths):
    """
    Processes a list of data files and returns a Pandas DataFrame with peak frequencies and amplitudes.
    """
    data_dict = {}

    # Loop through the file paths and process each file
    for file_path in file_paths:
        # Process the current file to get the peak frequencies
        data_row = process_file(file_path)

        # Extract the test ID from the data row
        test_id = data_row[0]

        # Add the peak frequencies to the dictionary
        if test_id in data_dict:
            data_dict[test_id] += data_row[1:]
        else:
            data_dict[test_id] = data_row[1:]


    # Create a list of data rows for the DataFrame
    data_rows = []
    for test_id, peak_frequencies in data_dict.items():
         data_rows.append([test_id] + peak_frequencies)

    # Create a Pandas DataFrame with the processed data
    column_names = ["Test ID"] + [f"Peak {i}" for i in range(1, len(data_rows[0]))]
    df = pd.DataFrame(data_rows, columns=column_names)
    # Save the DataFrame to a CSV file
    df.to_csv("peak_auto.csv", index=False)
    return None



# Define the directory containing the data files
data_dir = "RAW_DATA"

# Get a list of file paths in the data directory
file_paths = [os.path.join(data_dir, f) for f in os.listdir(data_dir) if os.path.isfile(os.path.join(data_dir, f))]

# Process the files and generate a DataFrame
process_files(file_paths)
