In [6]:
import pandas as pd
import numpy as np
from scipy import signal

# Section 1: Load Data
file_path = 'Forward_1.xlsx'  # Ensure the file path is correct
df = pd.read_excel(file_path)

# Print the column names for debugging
print("Original Columns:", df.columns)

# Clean column names (remove leading/trailing whitespace)
df.columns = df.columns.str.strip()

# Verify column names again
print("Cleaned Columns:", df.columns)

# Section 2: Preprocess Data
# Ensure numeric data types for relevant columns
for col in ['ax', 'ay', 'az', 'wx', 'wy', 'wz', 'Bx', 'By', 'Bz']:
    df[col] = pd.to_numeric(df[col], errors='coerce')  # Convert to numeric, setting errors to NaN

# Check for missing values after conversion
print("Data types:\n", df.dtypes)
print("Missing values:\n", df.isnull().sum())

# Calculate total acceleration
df['total_accel'] = np.sqrt(df['ax']**2 + df['ay']**2 + df['az']**2)

# Section 3: Detect Peaks
# Detect peaks in the total acceleration
peaks, _ = signal.find_peaks(df['total_accel'], height=1.0)  # Adjust height threshold as needed
print("Detected Peaks at Indices:", peaks)

# Initialize lists to store fall direction and event information
fall_direction = []
fall_event = []

# Section 4: Extract Features around Peaks
for peak in peaks:
    if peak > 50 and peak < len(df) - 50:  # Ensure there's data around the peak
        segment = df.iloc[peak-50:peak+50]  # Get 50 samples before and after
        fall_direction.append('forward')  # Assuming direction is known
        fall_event.append(1)  # Mark as fall event
    else:
        fall_direction.append(np.nan)
        fall_event.append(0)

# Extend lists to match the DataFrame length
fall_direction = fall_direction + [np.nan] * (len(df) - len(fall_direction))
fall_event = fall_event + [0] * (len(df) - len(fall_event))

# Add new columns to the DataFrame
df['Fall'] = fall_event
df['Direction'] = fall_direction

# Section 5: Feature Extraction
def extract_features(segment):
    # Check if segment['total_accel'] is NaN
    if pd.isna(segment['total_accel']):  # Check for single NaN value
        print("Segment contains null values, skipping.")
        return pd.Series()  # Return an empty series if any null values are present

    # Since segment is a single row, we can create a window around it for the calculations
    features = {
        'min': segment['total_accel'],
        'max': segment['total_accel'],
        'mean': segment['total_accel'],
        'skewness': np.nan,  # You may want to calculate skewness if you have a window of data
        'kurtosis': np.nan,  # Same for kurtosis
        'autocorrelation': np.nan,  # Autocorrelation also needs more data
    }
    return pd.Series(features)


# Create a feature DataFrame
features_df = df.iloc[peaks].apply(extract_features, axis=1)

# Section 6: Save Preprocessed Data
output_file_path = 'processed_fall_data.xlsx'
df.to_excel(output_file_path, index=False)

print("Preprocessed data saved to:", output_file_path)


Original Columns: Index(['time', 'ax', 'ay', 'az', 'wx', 'wy', 'wz', 'Bx', 'By', 'Bz'], dtype='object')
Cleaned Columns: Index(['time', 'ax', 'ay', 'az', 'wx', 'wy', 'wz', 'Bx', 'By', 'Bz'], dtype='object')
Data types:
 time    float64
ax      float64
ay      float64
az      float64
wx      float64
wy      float64
wz      float64
Bx      float64
By      float64
Bz      float64
dtype: object
Missing values:
 time    0
ax      0
ay      0
az      0
wx      0
wy      0
wz      0
Bx      0
By      0
Bz      0
dtype: int64
Detected Peaks at Indices: [ 29  47  55  72  85  98 116 124 132 149 159 179 192 201 244 253 274 287
 300 309 317 335 343 356 382 391 403 411 420 434 442 464 477 493 502 520
 532 550 558 567]
Preprocessed data saved to: processed_fall_data.xlsx


In [8]:
import pandas as pd
import numpy as np
from scipy import signal, stats
from scipy.fft import fft

# Section 1: Load Data
file_path = 'Forward_1.xlsx'
df = pd.read_excel(file_path)

# Clean column names (remove leading/trailing whitespace)
df.columns = df.columns.str.strip()

# Ensure numeric data types for relevant columns
for col in ['ax', 'ay', 'az', 'wx', 'wy', 'wz', 'Bx', 'By', 'Bz']:
    df[col] = pd.to_numeric(df[col], errors='coerce')

# Calculate total acceleration
df['total_accel'] = np.sqrt(df['ax']**2 + df['ay']**2 + df['az']**2)

# Detect peaks in the total acceleration
peaks, _ = signal.find_peaks(df['total_accel'], height=1.0)

# Feature extraction function (26 features)
def extract_features(segment, axis):
    """Extracts 26 features from a single axis segment of accelerometer, gyroscope, or magnetometer data."""

    # Basic Statistical Features
    min_val = segment[axis].min()
    max_val = segment[axis].max()
    mean_val = segment[axis].mean()
    skewness = stats.skew(segment[axis].dropna())
    kurtosis = stats.kurtosis(segment[axis].dropna())

    # Autocorrelation Features (lags 1-11)
    autocorr_vals = [segment[axis].autocorr(lag=i) for i in range(1, 12)]

    # Frequency Domain Features (First 5 frequencies and their amplitudes)
    segment_data = segment[axis].fillna(0).to_numpy()
    freq_data = np.abs(fft(segment_data))  # Magnitude of DFT
    sorted_indices = np.argsort(freq_data)[::-1]  # Sort frequencies by magnitude, descending
    top_freqs = sorted_indices[:5]
    top_amplitudes = freq_data[top_freqs]

    features = {
        f'{axis}_min': min_val,
        f'{axis}_max': max_val,
        f'{axis}_mean': mean_val,
        f'{axis}_skewness': skewness,
        f'{axis}_kurtosis': kurtosis,
    }

    # Add autocorrelation features
    for i, autocorr_val in enumerate(autocorr_vals, start=1):
        features[f'{axis}_autocorr_lag_{i}'] = autocorr_val

    # Add frequency and amplitude features
    for i, (freq, amp) in enumerate(zip(top_freqs, top_amplitudes), start=1):
        features[f'{axis}_freq_{i}'] = freq
        features[f'{axis}_amplitude_{i}'] = amp

    return pd.Series(features)

# Section 6: Apply Feature Extraction around Peaks
all_features = []
for peak in peaks:
    if peak > 50 and peak < len(df) - 50:  # Ensure there's data around the peak
        segment = df.iloc[peak-50:peak+50]  # Get 50 samples before and after

        # Extract features for each axis of accelerometer, gyroscope, and magnetometer data
        features = pd.concat([
            extract_features(segment, 'ax'),
            extract_features(segment, 'ay'),
            extract_features(segment, 'az'),
            extract_features(segment, 'wx'),
            extract_features(segment, 'wy'),
            extract_features(segment, 'wz'),
            extract_features(segment, 'Bx'),
            extract_features(segment, 'By'),
            extract_features(segment, 'Bz')
        ])

        # Label features based on peak event (manually set here for demonstration)
        # In a real scenario, you would programmatically determine these based on fall data or user input
        features['fall'] = 1  # 1 for fall, 0 for non-fall; adjust as needed
        features['fall_direction'] = 'forward'  # Options: 'forward', 'backward', 'lateral'

        all_features.append(features)

# Create a DataFrame with all extracted features
features_df = pd.DataFrame(all_features)

# Section 7: Save Preprocessed Data
output_file_path = 'processed_fall_data_features.xlsx'
features_df.to_excel(output_file_path, index=False)

print("Preprocessed data with features saved to:", output_file_path)


Preprocessed data with features saved to: processed_fall_data_features.xlsx
