In [1]:
# Read pickle file and show the contents

import pickle
import sys
import os
import matplotlib.pyplot as plt

def read_pickle_file(file_path):
    """Read a pickle file and return the contents."""
    if not os.path.exists(file_path):
        print(f"File {file_path} does not exist.")
        return None

    with open(file_path, 'rb') as file:
        data = pickle.load(file)
    
    return data

data = read_pickle_file("/home/abhaygupta/workspaceVM/capstone/rPPG-Toolbox/runs/exp/PURE_SizeW72_SizeH72_ClipLength180_DataTypeDiffNormalized_Standardized_DataAugNone_LabelTypeDiffNormalized_Crop_faceTrue_BackendY5F_Large_boxTrue_Large_size1.5_Dyamic_DetFalse_det_len30_Median_face_boxFalse/saved_test_outputs/UBFC-rPPG_DeepPhys_PURE_outputs.pickle")
print(data.keys())
print(data['labels'])
# print(data['predictions'])
# # # plt.plot(data['predictions']['1001'][0])
# # # plt.show()
# print(data['labels'].keys())
# print(data['labels']['1001'][0].shape)
# plt.plot(data['labels']['1001'][0].flatten())
# plt.show()

dict_keys(['predictions', 'labels', 'label_type', 'fs', 'spo2_labels'])
{'1001': {0: tensor([[-0.5829],
        [-0.3514],
        [-0.1199],
        [-0.1171],
        [ 1.1229],
        [ 2.3080],
        [ 2.6704],
        [ 1.3616],
        [-0.2810],
        [-1.2428],
        [-1.1781],
        [-0.8295],
        [-0.3997],
        [-0.0179],
        [ 0.2135],
        [ 0.1378],
        [-0.2108],
        [-0.5594],
        [-0.6738],
        [-0.6986],
        [-0.6986],
        [-0.3776],
        [-0.1779],
        [-0.1171],
        [ 0.0826],
        [ 0.9545],
        [ 2.1411],
        [ 2.4085],
        [ 1.2435],
        [-0.3398],
        [-1.1229],
        [-1.2085],
        [-0.7884],
        [-0.3254],
        [ 0.0000],
        [ 0.1845],
        [ 0.1668],
        [-0.2464],
        [-0.5829],
        [-0.6448],
        [-0.8157],
        [-0.5223],
        [-0.4080],
        [-0.1765],
        [ 0.0000],
        [ 0.0000],
        [ 0.0000],
        [ 0.6565],
   

In [2]:
import os
import numpy as np
import pandas as pd
import antropy as ant
import neurokit2 as nk
import heartpy as hp
import logging
import warnings

logging.basicConfig(level=logging.INFO)

def compute_statistical(signal):
    """Compute 9 statistical features"""
    return {
        'mean': np.nanmean(signal),
        'min': np.nanmin(signal),
        'max': np.nanmax(signal),
        'std': np.nanstd(signal),
        'dynamic_range': np.ptp(signal),
        'p10': np.nanpercentile(signal, 10),
        'p25': np.nanpercentile(signal, 25),
        'p75': np.nanpercentile(signal, 75),
        'p90': np.nanpercentile(signal, 90)
    }

def compute_fractal(signal, sf=30):
    """Compute 6 fractal features"""
    try:
        sub_windows = [signal[i:i+60] for i in range(0, 180, 60)]
        return {
            'katz': ant.katz_fd(signal),
            'higuchi': ant.higuchi_fd(signal),
            'dfa': ant.detrended_fluctuation(signal),
            'mean_katz': np.mean([ant.katz_fd(sw) for sw in sub_windows]),
            'mean_higuchi': np.mean([ant.higuchi_fd(sw) for sw in sub_windows]),
            'mean_dfa': np.mean([ant.detrended_fluctuation(sw) for sw in sub_windows])
        }
    except Exception as e:
        logging.warning(f"Fractal computation error: {str(e)}")
        return {}

def compute_entropy(signal, sf=30):
    """Compute 6 entropy features"""
    try:
        return {
            'perm_entropy': ant.perm_entropy(signal, normalize=True),
            'spectral_entropy': ant.spectral_entropy(signal, sf, method='welch'),
            'approx_entropy': ant.app_entropy(signal), 
            'sample_entropy': ant.sample_entropy(signal),
            'hjorth_mobility': ant.hjorth_params(signal)[0],
            'hjorth_complexity': ant.hjorth_params(signal)[1]
        }
    except Exception as e:
        logging.warning(f"Entropy computation error: {str(e)}")
        return {}
    
def calculate_hrv_time_features(signal, sf=30, selected_feat = None):
    """Compute given time-domain HRV features"""
    try:
        signals, info = nk.ppg_process(signal, sampling_rate=sf)
        nn_intervals = np.diff(info["PPG_Peaks"]) / sf * 1000 
        hrv = nk.hrv_time(info["PPG_Peaks"], sampling_rate=sf)
        return {feature: hrv[feature].values[0] for feature in selected_feat if feature in hrv.columns}
        
    except Exception as e:
        logging.warning(f"HRV time features computation error: {str(e)}")
        return {}
    
# def calculate_hrv_frequency_features(signal, sf=30): Not implemented because of short signal length
# def calculate_hrv_nonlinear_features(signal, sf=30): Not implemented because of some error of short signal length. A part of it can be implemented removing the error
    

def process_file(file_path):
    try:
        signal = np.load(file_path).flatten()
        if len(signal) != 180:
            raise ValueError("Signal length must be 180 samples")
            
        features = {}
        features['filename'] = file_path
        features.update(compute_statistical(signal))
        features.update(compute_fractal(signal))
        features.update(compute_entropy(signal))
        selected_hrv_time_features = ['HRV_RMSSD', 'HRV_SDSD', 'HRV_pNN50', 'HRV_pNN20','HRV_HTI']
        # selected_hrv_freq_features = [f for f in selected_hrv_time_features if f in nk.hrv_time(signal).columns]
        # selected_hrv_nonLin_features = [f for f in selected_hrv_time_features if f in nk.hrv_time(signal).columns]
        features.update(calculate_hrv_time_features(signal, selected_feat = selected_hrv_time_features))
        return features
    except Exception as e:
        logging.error(f"Error processing {file_path}: {str(e)}")
        return {}

### ***Saving predictions from pickle to numpy***

In [3]:
import os
import numpy as np
import pickle

# Path to the pickle file
pickle_file_path = "/home/abhaygupta/workspaceVM/capstone/rPPG-Toolbox/runs/exp/PURE_SizeW72_SizeH72_ClipLength180_DataTypeDiffNormalized_Standardized_DataAugNone_LabelTypeDiffNormalized_Crop_faceTrue_BackendY5F_Large_boxTrue_Large_size1.5_Dyamic_DetFalse_det_len30_Median_face_boxFalse/saved_test_outputs/UBFC-rPPG_DeepPhys_PURE_outputs.pickle"

# Folder to save the npy files
save_folder = "/home/abhaygupta/workspaceVM/capstone/rPPG-Toolbox/PreprocessedData/PURE_SizeW72_SizeH72_ClipLength180_DataTypeDiffNormalized_Standardized_DataAugNone_LabelTypeDiffNormalized_Crop_faceTrue_BackendY5F_Large_boxTrue_Large_size1.5_Dyamic_DetFalse_det_len30_Median_face_boxFalse"

# Function to load the pickle file
def read_pickle_file(path):
    with open(path, "rb") as f:
        return pickle.load(f)

# Load data
data = read_pickle_file(pickle_file_path)

# Process and save each 180-length prediction vector
for subject_id in data['predictions'].keys():
    for segment_id in data['predictions'][subject_id].keys():
        vector = data['predictions'][subject_id][segment_id]
        
        # Convert to numpy array
        vector = np.array(vector)
        
        # Construct the filename
        filename = f"{subject_id}_predictions{segment_id}.npy"
        save_path = os.path.join(save_folder, filename)
        
        # Save to file
        np.save(save_path, vector)

        print(f"Saved: {save_path}")


Saved: /home/abhaygupta/workspaceVM/capstone/rPPG-Toolbox/PreprocessedData/PURE_SizeW72_SizeH72_ClipLength180_DataTypeDiffNormalized_Standardized_DataAugNone_LabelTypeDiffNormalized_Crop_faceTrue_BackendY5F_Large_boxTrue_Large_size1.5_Dyamic_DetFalse_det_len30_Median_face_boxFalse/1001_predictions0.npy
Saved: /home/abhaygupta/workspaceVM/capstone/rPPG-Toolbox/PreprocessedData/PURE_SizeW72_SizeH72_ClipLength180_DataTypeDiffNormalized_Standardized_DataAugNone_LabelTypeDiffNormalized_Crop_faceTrue_BackendY5F_Large_boxTrue_Large_size1.5_Dyamic_DetFalse_det_len30_Median_face_boxFalse/1001_predictions1.npy
Saved: /home/abhaygupta/workspaceVM/capstone/rPPG-Toolbox/PreprocessedData/PURE_SizeW72_SizeH72_ClipLength180_DataTypeDiffNormalized_Standardized_DataAugNone_LabelTypeDiffNormalized_Crop_faceTrue_BackendY5F_Large_boxTrue_Large_size1.5_Dyamic_DetFalse_det_len30_Median_face_boxFalse/1001_predictions2.npy
Saved: /home/abhaygupta/workspaceVM/capstone/rPPG-Toolbox/PreprocessedData/PURE_SizeW72_

In [4]:
output_ppg_files_dataFileList = "/home/abhaygupta/workspaceVM/capstone/rPPG-Toolbox/PreprocessedData/DataFileLists/PURE_SizeW72_SizeH72_ClipLength180_DataTypeDiffNormalized_Standardized_DataAugNone_LabelTypeDiffNormalized_Crop_faceTrue_BackendY5F_Large_boxTrue_Large_size1.5_Dyamic_DetFalse_det_len30_Median_face_boxFalse_0.0_1.0.csv"
output_ppg_files_data = pd.read_csv(output_ppg_files_dataFileList, index_col=0)
file_paths = output_ppg_files_data['input_files'].values
file_paths = [file_path.replace('input', 'predictions') for file_path in file_paths] # Modify the file_path in file_paths by changing 'input' to 'label'
dataset = []

for file_path in file_paths:
    features = process_file(file_path)
    dataset.append(features)

df = pd.DataFrame(dataset)
df.to_csv('rppg_features_dataset.csv', index=False)
print(f"Dataset created with {len(df)} files and {len(df.columns)-1} features")

  freqs, _, Pxy = _spectral_helper(x, y, fs, window, nperseg, noverlap,
  freqs, _, Pxy = _spectral_helper(x, y, fs, window, nperseg, noverlap,
  freqs, _, Pxy = _spectral_helper(x, y, fs, window, nperseg, noverlap,
  freqs, _, Pxy = _spectral_helper(x, y, fs, window, nperseg, noverlap,
  freqs, _, Pxy = _spectral_helper(x, y, fs, window, nperseg, noverlap,
  freqs, _, Pxy = _spectral_helper(x, y, fs, window, nperseg, noverlap,
  freqs, _, Pxy = _spectral_helper(x, y, fs, window, nperseg, noverlap,
  freqs, _, Pxy = _spectral_helper(x, y, fs, window, nperseg, noverlap,
  freqs, _, Pxy = _spectral_helper(x, y, fs, window, nperseg, noverlap,
  freqs, _, Pxy = _spectral_helper(x, y, fs, window, nperseg, noverlap,
  freqs, _, Pxy = _spectral_helper(x, y, fs, window, nperseg, noverlap,
  freqs, _, Pxy = _spectral_helper(x, y, fs, window, nperseg, noverlap,
  freqs, _, Pxy = _spectral_helper(x, y, fs, window, nperseg, noverlap,
  freqs, _, Pxy = _spectral_helper(x, y, fs, window, nperseg, no

Dataset created with 647 files and 26 features


In [53]:
# Main processing
folder_path = '/home/abhaygupta/workspaceVM/capstone/rPPG-Toolbox/PreprocessedData/dummyFolder'  # Replace with your folder path
dataset = []

for file_name in os.listdir(folder_path):
    if file_name.endswith('.npy'):
        file_path = os.path.join(folder_path, file_name)
        features = process_file(file_path)
        if features:
            features['filename'] = file_name
            dataset.append(features)

df = pd.DataFrame(dataset)
df.to_csv('rppg_features_dataset.csv', index=False)
print(f"Dataset created with {len(df)} files and {len(df.columns)-1} features")

Dataset created with 1 files and 26 features


  freqs, _, Pxy = _spectral_helper(x, y, fs, window, nperseg, noverlap,


### ***Trying to calculate hrv features***

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import neurokit2 as nk
import pandas as pd

signal = np.load("/home/abhaygupta/workspaceVM/capstone/rPPG-Toolbox/PreprocessedData/PURE_SizeW72_SizeH72_ClipLength180_DataTypeDiffNormalized_Standardized_DataAugNone_LabelTypeDiffNormalized_Crop_faceTrue_BackendHC_Large_boxTrue_Large_size1.5_Dyamic_DetFalse_det_len30_Median_face_boxFalse/206_label0.npy")
sampling_rate = 30

# # Replace signal values with 0.0 with 0.00000001
# signal = np.where(signal == 0.0, 0.0000001, signal)
# print(signal)

signals, info = nk.ppg_process(signal, sampling_rate=sampling_rate) 
peaks = info["PPG_Peaks"]
nn_intervals = np.diff(peaks) / sampling_rate * 1000 
print(peaks)
print(nn_intervals)
hrv = nk.hrv_time(info["PPG_Peaks"], sampling_rate=30, show=False)
hrv



# print(hrv.columns)
# hrv
# hrv_indices = nk.hrv_frequency(peaks, sampling_rate=sampling_rate, show=False)
# hrv_indices = nk.hrv_nonlinear(peaks, sampling_rate=sampling_rate, show=False)


[-7.30188277e-01 -3.68204933e-01 -3.59909481e-01 -1.80991672e-01
 -8.29545163e-03 -1.83065535e-01  5.20162524e-01  1.94320955e+00
  2.67754555e+00  2.26107617e+00  8.13142793e-01 -4.76611403e-01
 -8.84219731e-01 -1.09217162e+00 -9.40214030e-01 -5.78230686e-01
 -3.31818065e-02  1.45736003e-01 -1.06332607e-01 -3.64057207e-01
 -6.86637156e-01 -5.45048879e-01 -7.28114414e-01 -5.45048879e-01
 -4.13829917e-01 -3.64057207e-01 -1.80991672e-01 -5.59942985e-02
 -1.83065535e-01  4.24764830e-01  1.56934862e+00  2.60081262e+00
  2.86626707e+00  1.55106092e+00  3.05423447e-02 -6.55529212e-01
 -9.11179949e-01 -9.11179949e-01 -9.11179949e-01 -4.44937860e-01
 -1.80991672e-01 -8.50283793e-02 -3.70844395e-01 -5.45048879e-01
 -9.11179949e-01 -5.45048879e-01 -7.28114414e-01 -6.44594299e-01
 -5.47122742e-01 -2.84684817e-01 -1.83065535e-01 -7.52246637e-02
 -1.80991672e-01  1.44227739e-01  1.25826919e+00  2.56819641e+00
  3.14680416e+00  1.93114343e+00  3.60852146e-01 -4.83398591e-01
 -7.84674311e-01 -1.09217

  warn(


ValueError: cannot convert float NaN to integer

### ***Read csv and extract ground truth spo2 values***

In [5]:
import pandas as pd
import numpy as np
import os

# Load the dataset - make sure the path is correct
file_path = 'rppg_features_dataset.csv'  # Adjust path if needed
df = pd.read_csv(file_path)

# Function to extract spo2 values from the modified file path
def extract_spo2_from_path(path):
    # Replace 'label' with 'spo2' in the filename
    spo2_path = path.replace('_predictions', '_spo2')
    try:
        # Load the numpy array from the spo2 file
        spo2_values = np.load(spo2_path)
        # Calculate mean spo2 value
        mean_spo2 = np.mean(spo2_values)
        return mean_spo2
    except Exception as e:
        print(f"Error loading {spo2_path}: {e}")
        return np.nan

# Apply the function to extract ground truth spo2 values
df['ground_truth_spo2'] = df['filename'].apply(extract_spo2_from_path)

# Save the updated dataframe with ground truth values
output_file_path = 'rppg_features_dataset_with_spo2.csv'
df.to_csv(output_file_path, index=False)

# Display basic statistics of the ground truth values
spo2_stats = df['ground_truth_spo2'].describe()
print("SpO2 Ground Truth Statistics:")
print(spo2_stats)

# Check for missing values
missing_spo2 = df['ground_truth_spo2'].isna().sum()
print(f"Number of missing SpO2 values: {missing_spo2}")


SpO2 Ground Truth Statistics:
count    647.000000
mean      97.093968
std        1.666053
min       89.666667
25%       96.000000
50%       97.579262
75%       98.419052
max       99.000000
Name: ground_truth_spo2, dtype: float64
Number of missing SpO2 values: 0
