In [1]:
import pandas as pd
import numpy as np
import json

# Load the JSON file
file_path = '../../tests/testData/testDataVisualisation.json'  # Replace with your JSON file path
with open(file_path, 'r') as file:
    raw_json = json.load(file)

# Flatten the JSON and extract relevant fields
def flatten_json_to_df(raw_json):
    flattened_data = []
    for attribute in raw_json:
        user_id = attribute.get('userId', None)
        seizure_times = attribute.get('seizureTimes', [])
        datapoints = attribute.get('datapoints', [])

        for point in datapoints:
            event_id = point.get('eventId', None)
            hr = point.get('hr', [])
            o2Sat = point.get('o2Sat', [])
            rawData = point.get('rawData', [])
            rawData3D = point.get('rawData3D', [])
            alarmState = point.get('alarmState', None)
            alarmPhrase = point.get('alarmPhrase', None)
            flattened_data.append({
                'eventId': event_id,
                'userId': user_id,
                'hr': hr,
                'o2Sat': o2Sat,
                'rawData': rawData,
                'rawData3D': rawData3D,
                'seizure_times': seizure_times,
                "alarmState": alarmState,
                "alarmPhrase": alarmPhrase
            })

    return pd.DataFrame(flattened_data)

# Create the DataFrame
df_sensordata = flatten_json_to_df(raw_json)

# Sampling frequency (25 Hz as per your clarification)
sampling_rate = 25  # in Hz

# Define FFT calculation function for each row
def calculate_fft(raw_data):
    raw_data = raw_data - np.mean(raw_data)  # Remove the DC component
    fft_result = np.fft.fft(raw_data)  # Compute FFT
    frequencies = np.fft.fftfreq(len(raw_data), d=1/sampling_rate)  # Compute frequencies
    fft_magnitude = np.abs(fft_result)  # Compute the magnitude
    positive_frequencies = frequencies[:len(frequencies)//2]  # Only positive frequencies
    positive_fft_magnitude = fft_magnitude[:len(frequencies)//2]  # Only positive FFT magnitudes
    return positive_frequencies, positive_fft_magnitude

# Add a new column for FFT data for all rows in the DataFrame
def add_fft_column(df):
    fft_results = []
    for _, row in df.iterrows():
        raw_data = np.array(row['rawData'])
        _, positive_fft_magnitude = calculate_fft(raw_data)  # Calculate FFT for the row
        fft_results.append(list(positive_fft_magnitude))  # Append FFT result
    df['FFT'] = fft_results
    return df

# Add the FFT column to the DataFrame
df_sensordata = add_fft_column(df_sensordata)

# Step 1: Add 'timestep' column in 5-second increments
df_sensordata['timestep'] = df_sensordata.index * 5

# Step 2: Add 'label' column, initialized to 0
df_sensordata['label'] = 0

# Step 3: Function to label based on 'seizure_times' and 'alarmPhrase'
def label_alarm_events(df):
    for idx, row in df.iterrows():
        if row['alarmPhrase'] == 'ALARM':  # If alarmPhrase is ALARM
            alarm_time = row['timestep']
            seizure_times = row['seizure_times']
            
            # Process the seizure times list, assuming seizure_times are in seconds
            for seizure in seizure_times:
                start_time = alarm_time + seizure  # Adjust by the seizure offset
                
                # Label the rows before and after the alarm (within the range of seizure_times)
                # For the negative offset (before alarm)
                before_idx = df[(df['timestep'] >= start_time) & (df['timestep'] < alarm_time)].index
                df.loc[before_idx, 'label'] = 1  # Mark as seizure (1)
                
                # For the positive offset (after alarm)
                after_idx = df[(df['timestep'] >= alarm_time) & (df['timestep'] <= start_time)].index
                df.loc[after_idx, 'label'] = 1  # Mark as seizure (1)
    return df

# Apply the function to label the DataFrame
df_sensordata = label_alarm_events(df_sensordata)

# Step 4: Drop the 'seizure_times' column
df_sensordata.drop(columns=['seizure_times','alarmState'], inplace=True)

# Step 5: Save the labeled DataFrame to a CSV file
df_sensordata.to_csv('generatedCsvDatasets/sensordata_labeled.csv', index=False)

# Display the first few rows of the updated DataFrame
df_sensordata.head(3)

Unnamed: 0,eventId,userId,hr,o2Sat,rawData,rawData3D,alarmPhrase,FFT,timestep,label
0,407,39,67,-1,"[1496, 1480, 1500, 1492, 1496, 1484, 1500, 149...",[],OK,"[1.2960299500264227e-11, 143.05125737182817, 5...",0,0
1,407,39,67,-1,"[1492, 1508, 1496, 1476, 1484, 1476, 1496, 150...",[],OK,"[9.094947017729282e-13, 75.0235079481899, 31.0...",5,0
2,407,39,68,-1,"[1488, 1496, 1484, 1492, 1492, 1508, 1504, 148...",[],OK,"[2.2737367544323206e-13, 91.25440903139302, 81...",10,0


- load_data: Loads and flattens the JSON data into a DataFrame.
- calculate_fft: Computes the FFT of the raw data.
- add_fft_column: Adds a column of FFT magnitudes.
- add_timestep_and_label: Adds the timestep and label columns.
- label_alarm_events: Labels the rows based on the alarmPhrase and seizure_times.
- process_data: A convenience method that runs the entire pipeline and returns the final processed DataFrame.