In [7]:
import os
import numpy as np
import pandas as pd

data_dir = "C:/Users/madhu/Desktop/individual_project/outputs/output_train/fft_output_training_data"
speed_of_sound = 343  # Speed of sound in air in m/s
sampling_rate = 16000  # Sampling rate in Hz

# Function to calculate distance from echo (first significant peak)
def calculate_distance(signal, sampling_rate):
    signal_abs = np.abs(signal)  # Get the absolute value of the signal
    first_peak_index = np.argmax(signal_abs)  # Find the index of the first significant peak
    time_of_echo = first_peak_index / sampling_rate  # Calculate time from the index
    distance = (speed_of_sound * time_of_echo) / 2  # Calculate distance (divide by 2 for round trip)
    return distance, first_peak_index

# Initialize lists to store the combined data and labels
combined_human_data = []
labels = []

# Load all FFT files from the directory
for filename in os.listdir(data_dir):
    if filename.endswith('.npy'):  # Check for .npy files
        file_path = os.path.join(data_dir, filename)
        signal = np.load(file_path)  # Load the signal from file
        
        # Ensure the signal is a 1D array
        if signal.ndim != 1:
            signal = signal.flatten()  # Flatten if necessary
        
        combined_human_data.append(signal)  # Append the signal to the list
        
        # Determine the label based on the file name
        if 'present' in filename.lower():
            labels.append(1)  # Human present
        else:
            labels.append(0)  # Human absent

# Pad signals to the maximum length
max_length = max(len(signal) for signal in combined_human_data)
padded_signals = [np.pad(signal, (0, max_length - len(signal)), 'constant') for signal in combined_human_data]

# Convert to a NumPy array for consistent shape
combined_human_data = np.array(padded_signals)  # Now all shapes should be consistent
labels = np.array(labels)

# Check if the data and labels are aligned 
assert len(combined_human_data) == len(labels), "Data and labels must have the same length."

# Initialize lists to store results
distances = []
echo_indices = []   
presence_status = []

# Loop through each signal in the combined data
for i in range(len(combined_human_data)):
    signal = combined_human_data[i]  # Get the current signal
    distance, first_peak_index = calculate_distance(signal, sampling_rate)  # Calculate distance and peak index
    
    distances.append(distance)  # Store distance
    echo_indices.append(first_peak_index)  # Store peak index

    # Determine the presence status based on the label
    presence_status.append('Human present' if labels[i] == 1 else 'Human absent')

# Create a DataFrame to store the results
result_df = pd.DataFrame({
    'Signal Index': np.arange(len(distances)),  # Index of the signal
    'Distance (m)': distances,
    'First Echo Index': echo_indices,  # Peak position (index)
    'Presence Status': presence_status
})

# Display the calculated distances and presence status for each signal
print(result_df)

# Save the distance calculation results
save_dir = "C:/Users/madhu/Desktop/individual_project/outputs/output_train/Distance_calculation_training_data"
os.makedirs(save_dir, exist_ok=True)
distances_path = os.path.join(save_dir, 'distance_calculated.npy')
np.save(distances_path, distances)

# Save the result DataFrame to a CSV file
result_csv_path = os.path.join(save_dir, 'distance_calculation_results.csv')
result_df.to_csv(result_csv_path, index=False)

# Display the calculated distance for the first signal only
print(f"First Echo Distance for the first signal: {distances[0]:.4f} meters (Presence Status: {presence_status[0]}, Peak Index: {echo_indices[0]})")


   Signal Index  Distance (m)  First Echo Index Presence Status
0             0      192.9375             18000    Human absent
1             1      128.6250             12000   Human present
First Echo Distance for the first signal: 192.9375 meters (Presence Status: Human absent, Peak Index: 18000)
