# Test Your Algorithm

## Instructions
1. From the **Pulse Rate Algorithm** Notebook you can do one of the following:
   - Copy over all the **Code** section to the following Code block.
   - Download as a Python (`.py`) and copy the code to the following Code block.
2. In the bottom right, click the <span style="color:blue">Test Run</span> button. 

### Didn't Pass
If your code didn't pass the test, go back to the previous Concept or to your local setup and continue iterating on your algorithm and try to bring your training error down before testing again.

### Pass
If your code passes the test, complete the following! You **must** include a screenshot of your code and the Test being **Passed**. Here is what the starter filler code looks like when the test is run and should be similar. A passed test will include in the notebook a green outline plus a box with **Test passed:** and in the Results bar at the bottom the progress bar will be at 100% plus a checkmark with **All cells passed**.
![Example](example.png)

1. Take a screenshot of your code passing the test, make sure it is in the format `.png`. If not a `.png` image, you will have to edit the Markdown render the image after Step 3. Here is an example of what the `passed.png` would look like 
2. Upload the screenshot to the same folder or directory as this jupyter notebook.
3. Rename the screenshot to `passed.png` and it should show up below.
![Passed](passed.png)
4. Download this jupyter notebook as a `.pdf` file. 
5. Continue to Part 2 of the Project. 

In [None]:
import glob
import sys
import numpy as np
import scipy as sp
import scipy.io
from scipy.signal import find_peaks
import matplotlib.pyplot as plt
from matplotlib import mlab
import matplotlib.gridspec as gridspec
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.linear_model import Ridge
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
import pandas as pd


def LoadTroikaDataset():
    """
    Retrieve the .mat filenames for the troika dataset.

    Review the README in ./datasets/troika/ to understand the organization of the .mat files.

    Returns:
        data_fls: Names of the .mat files that contain signal data
        ref_fls: Names of the .mat files that contain reference data
        <data_fls> and <ref_fls> are ordered correspondingly, so that ref_fls[5] is the 
            reference data for data_fls[5], etc...
    """
    data_dir = "./datasets/troika/training_data"
    data_fls = sorted(glob.glob(data_dir + "/DATA_*.mat"))
    ref_fls = sorted(glob.glob(data_dir + "/REF_*.mat"))
    return data_fls, ref_fls

def LoadTroikaDataFile(data_fl):
    """
    Loads and extracts signals from a troika data file.

    Usage:
        data_fls, ref_fls = LoadTroikaDataset()
        ppg, accx, accy, accz = LoadTroikaDataFile(data_fls[0])

    Args:
        data_fl: (str) filepath to a troika .mat file.

    Returns:
        numpy arrays for ppg, accx, accy, accz signals.
    """
    data = sp.io.loadmat(data_fl)['sig']
    ### return the since the third row
    return data[2:]

def AggregateErrorMetric(pr_errors, confidence_est):
    """
    Computes an aggregate error metric based on confidence estimates.

    Computes the MAE at 90% availability. 

    Args:
        pr_errors: a numpy array of errors between pulse rate estimates and corresponding 
            reference heart rates.
        confidence_est: a numpy array of confidence estimates for each pulse rate
            error.

    Returns:
        the MAE at 90% availability
    """
    # Higher confidence means a better estimate. The best 90% of the estimates
    #    are above the 10th percentile confidence.
    percentile90_confidence = np.percentile(confidence_est, 10)

    # Find the errors of the best pulse rate estimates
    best_estimates = pr_errors[confidence_est >= percentile90_confidence]

    # Return the mean absolute error
    return np.mean(np.abs(best_estimates))

def create_features(indice, ppg, accx, accy, accz, min_freq_v, max_freq_v, fs=125, verbose=False, plot_frequency_ppg = False, plot_frequency_acc = False):
    """
    Apply Troika process to the pppg and accelerometer signals in order to extract feature for our trained dataset.
    
    
    :param indice: index of the iteration realted to the windows time.
    :param ppg: slice PPG signal.
    :type ppg: ndarray
    :param accx: accelerometer signal on the x-axis
    :type accx: ndarray
    :param accy: accelerometer signal on the y-axis
    :type accy: ndarray
    :param accz: accelerometer signal on the z-axis
    :type accz: ndarray
    :param min_freq_v: min frequency allowed in our descompose signal.
    :type min_freq_v: float
    :param max_freq_v: max frequency allowed in our descompose signal.
    :type max_freq_v: float
    :param fs:  sampling rate
    :type  fs: integer
    :param verbose: show the value of ground truth for the slice window.
    :type verbose: bool
    :param plot_frequency_ppg: plot the frequency domain of the ppg signal for the segment of the signal.
    :type  plot_frequency_ppg: bool
    :param plot_frequency_acc: plot the frequency domain of the acc signal for the segment of the signal.
    :type  plot_frequency_acc: bool
    """
    
    #############################################################################
    #### First Band pass Filtering all the signal PPG, and acceleration 
    ##  Use the 40-240BPM range to create your pass band.
    #############################################################################
    ppg = bandpass_filter(ppg, min_freq = min_freq_v, max_freq = max_freq_v)
    accx = bandpass_filter(accx, min_freq = min_freq_v, max_freq = max_freq_v)
    accy = bandpass_filter(accy, min_freq = min_freq_v, max_freq = max_freq_v)
    accz = bandpass_filter(accz, min_freq = min_freq_v, max_freq = max_freq_v)
    
    # Aggregate accelerometer data into single signal
    accy_mean = accy - np.mean(accy) # Center Y values
    acc_mag_unfiltered = np.sqrt(accx**2 + accy_mean**2 + accz**2)
    acc_mag = bandpass_filter(acc_mag_unfiltered, min_freq = min_freq_v, max_freq = max_freq_v)
  
    #############################################################################
    #### Signal Decomposition ##################################################
    #############################################################################
    fft_len = len(ppg)*4
    
    fft_ppg_initial = np.fft.rfft(ppg)
    freqs_original = np.fft.rfftfreq(len(ppg), 1 / fs)
  
    fft_acc_original = np.fft.rfft(acc_mag)
    freqs_acc_original = np.fft.rfftfreq(len(acc_mag), 1 / fs)
    
    freqs_initial = np.fft.rfftfreq(fft_len, 1 / fs)
    low_freqs = (freqs_initial >= (min_freq_v)) & (freqs_initial <= (max_freq_v))
    
    ### get magnitude
    mag_freq_ppg, fft_ppg = fourier_transform(ppg, freqs_initial, low_freqs, fft_len)
    mag_freq_acc, fft_acc = fourier_transform(acc_mag, freqs_initial, low_freqs, fft_len)    
    
    #############################################################################
    ### Find Peaks ##########################################################
    peaks_ppg = find_peaks(mag_freq_ppg, height=30, distance=1)[0]
    peaks_acc = find_peaks(mag_freq_acc, height=30, distance=1)[0]
    #############################################################################
     
    if plot_frequency_ppg:
        if indice < 2:
            gs = gridspec.GridSpec(2, 2)
            plt.figure()
            ax = plt.subplot(gs[0, 0])
            ax.set_title(f'Initial Frequency PPG {indice}')
            ax.plot(freqs_original, fft_ppg_initial)
            ax.set_xlabel('Frequency (Hz)')
        
            ax = plt.subplot(gs[0, 1])
            ax.set_title(f'Low Frequency PPG {indice}')
            ax.plot(freqs_initial[low_freqs], mag_freq_ppg)
            ax.set_xlabel('Frequency (Hz)')   
            
            
            ### Peaks plot            
            ax = plt.subplot(gs[1, :])
            ax.set_title(f'Find Peaks {indice}')
            ax.plot(mag_freq_ppg)
            ax.plot(peaks_ppg, mag_freq_ppg[peaks_ppg], '+r', ms = 10)
            ax.set_xlabel('Time')              
            plt.show()      
    
    
    if plot_frequency_acc:
        if indice < 2:
            gs = gridspec.GridSpec(2, 2)
            plt.figure()
            ax = plt.subplot(gs[0, 0])
            ax.set_title(f'Initial Frequency ACC {indice}')
            ax.plot(freqs_acc_original, fft_acc_original)
            ax.set_xlabel('Frequency (Hz)')
        
            ax = plt.subplot(gs[0, 1])
            ax.set_title(f'Low Frequency ACC {indice}')
            ax.plot(freqs_acc_original[low_freqs], mag_freq_acc)
            ax.set_xlabel('Frequency (Hz)')   
            
            
            ### Peaks plot            
            ax = plt.subplot(gs[1, :])
            ax.set_title(f'Find Peaks ACC {indice}')
            ax.plot(mag_freq_ppg)
            ax.plot(peaks_acc, mag_freq_acc[peaks_acc], 'xg', ms = 10)
            ax.set_xlabel('Time')              
            
            plt.show()       
    
    
    ppg_feature = freqs_original[np.argmax(fft_ppg_initial)]
    acc_feature = freqs_acc_original[np.argmax(fft_acc_original)]
    

    return (np.array([ppg_feature, acc_feature, len(peaks_ppg), len(peaks_acc)])), freqs_initial,  fft_ppg
                

        
def Confidence(estimation, freqs_initial, fft_ppg, min_freq_v):        
    """
    Calculate the confidence base on the summing of the frequency spectrum around the estimation of the heart rate 
    and dividing it by the sum of the entire spectrum.
    
    
    :param estimation: heart rate estimation
    :type estimation: float
    
    :param freqs_initial: the Discrete Fourier Transform sample frequencies
    :type freqs_initial: ndarray
    
    :param fft_ppg: Compute fourier transformation
    :type fft_ppg: ndarray
    
    :param min_freq_v: min frequency allowed in our descompose signal.
    :type min_freq_v: float
    
    :return: confidence value
    :rtype: float
    """
    #############################################################################
    #### Confidence ############################################################
    #############################################################################
    chosen_freq = estimation / 60.0
    
    win_freqs = (freqs_initial >= chosen_freq - min_freq_v) & (freqs_initial <= chosen_freq + min_freq_v)
    abs_fft_ppg = np.abs(fft_ppg)

    # Sum frequency spectrum near pulse rate estimate and divide by sum of entire spectrum
    confidence = np.sum(abs_fft_ppg[win_freqs])/np.sum(abs_fft_ppg)

    return (estimation, confidence)


def fourier_transform(x, freqs, low_freqs, fft_len):
    ''' Compute and return FFT and magnitude of FFT for given low frequencies
 
    :param x: input signal to transform
    :type x: ndarray
    :param freqs: full list of FFT frequency bins.
    :type freqs: integer ndarray
    :param low_freqs: low frequency bins between 40 BPM and 240 BPM
    :type low_freqs: boolean ndarray
    :param fft_len: length of FFT to compute
    :type fft_len: integer  
    :return:
    mag_freq_x: magnitude of lower frequencies of the FFT transformed signal
    fft_x: FFT of normalized input signal
    '''
    # Take an FFT of the mean normalized signal
    norm_x = (x - np.mean(x))/(max(x)-min(x))
    fft_x = np.fft.rfft(norm_x, fft_len)

    # Calculate magnitude of the lower frequencies
    mag_freq_x = np.abs(fft_x)[low_freqs]
    return mag_freq_x, fft_x


def bandpass_filter(signal, fs = 125, min_freq = 40/60.0, max_freq = 240/60.0):
    """
    Filter the signal between the min_freq and max_freq
    
    
    :param signal: signal from PPG or Accelerometer
    :type signal: ndarray
    
    :param fs: sampling rate
    :type fs: integer
    
    :param min_freq:  min frequency allowed in our descompose signal.
    :type min_freq: float
    
    :param max_freq:  max frequency allowed in our descompose signal.
    :type max_freq: float
    
    :return:  band pass signal
    :rtype: ndarray
    """
    
    pass_band = (min_freq, max_freq)
    b, a = scipy.signal.butter(3, pass_band, btype = 'bandpass', fs=fs)
    return scipy.signal.filtfilt(b, a, signal)


def TrainModel():

    data_fls, ref_fls = LoadTroikaDataset()

    features_list, target_list = [], []
    
    for data_fl, ref_fl in zip(data_fls, ref_fls):    
        # Load data using LoadTroikaDataFile
        ppg, accx, accy, accz = LoadTroikaDataFile(data_fl)
        Fs = 125 # Troika data has sampling rate of 125 Hz
        # Compute pulse rate estimates and estimation confidence.

        min_freq_val = 40/60.0
        max_freq_val = 240/60.0    

        #######################################
        ######################################
        # Load data using LoadTroikaDataFile
        window_size = 8*Fs # Ground truth BPM provided in 8 second windows
        window_shift = 2*Fs # Successive ground truth windows overlap by 2 seconds

        reference = sp.io.loadmat(ref_fl)

        errs = []
        confs = []

        # For each 8 second window, compute a predicted BPM and confidence and compare to ground truth
        offset = 0
        
        for indice, eval_window_idx in enumerate(range(len(reference['BPM0']))):
            window_start = offset
            window_end = window_size + offset
            offset += window_shift

            ppg_window = ppg[window_start:window_end]
            accx_window = accx[window_start:window_end]
            accy_window = accy[window_start:window_end]
            accz_window = accz[window_start:window_end]
            groundTruthBPM = reference['BPM0'][eval_window_idx][0]

            feature, _ , _ = create_features(indice, ppg_window, accx_window, accy_window, accz_window, 
                                        min_freq_val, max_freq_val, Fs, verbose = False, 
                                        plot_frequency_ppg = False, plot_frequency_acc = False)            

            features_list.append(feature)
            target_list.append(groundTruthBPM)

    features = np.array(features_list)
    target =  np.array(target_list)
    
    dffeatures = pd.DataFrame(features)
    
    model_rf = RandomForestRegressor(n_estimators = 350, max_depth = 15)

    X_train, X_test, y_train, y_test = train_test_split(features, target, test_size = 0.2, random_state = 2021)
    model_rf.fit(X_train, y_train)

    return model_rf    

global model_ml
model_ml = TrainModel()


def Evaluate():
    """
    Top-level function evaluation function.

    Runs the pulse rate algorithm on the Troika dataset and returns an aggregate error metric.

    :return: Pulse rate error on the Troika dataset. See AggregateErrorMetric.
    :rtype: 
    """
    ## Trained Model

    
    
    ### Test Algorithm
    # Retrieve dataset files
    data_fls, ref_fls = LoadTroikaDataset()
    errs, confs = [], []
    
    
    for data_fl, ref_fl in zip(data_fls, ref_fls):
        # Run the pulse rate algorithm on each trial in the dataset
        errors, confidence = RunPulseRateAlgorithm(data_fl, ref_fl)
        errs.append(errors)
        confs.append(confidence)
        # Compute aggregate error metric
        
    errs = np.hstack(errs)
    confs = np.hstack(confs)
    return AggregateErrorMetric(errs, confs)

def RunPulseRateAlgorithm(data_fl, ref_fl):
    """
    :param data_fl:
    :param ref_fl:
    
    
    """
    # Load data using LoadTroikaDataFile
    ppg, accx, accy, accz = LoadTroikaDataFile(data_fl)
    Fs = 125 # Troika data has sampling rate of 125 Hz
    # Compute pulse rate estimates and estimation confidence.
    
    min_freq_val = 40/60.0
    max_freq_val = 240/60.0
    
    #######################################
    ######################################
    # Load data using LoadTroikaDataFile
    window_size = 8*Fs # Ground truth BPM provided in 8 second windows
    window_shift = 2*Fs # Successive ground truth windows overlap by 2 seconds
    
    reference = sp.io.loadmat(ref_fl)

    errs = []
    confs = []
    
    # For each 8 second window, compute a predicted BPM and confidence and compare to ground truth
    offset = 0
    

    
    for indice, eval_window_idx in enumerate(range(len(reference['BPM0']))):
        # Set verbose to True to visualize plot analysis
        verbose = True
        # verbose = True if eval_window_idx == 28 else False
    
        window_start = offset
        window_end = window_size+offset
        offset += window_shift
        
        if verbose:
            print(f"Win start,end: {window_start}, {window_end}")

        ppg_window = ppg[window_start:window_end]
        accx_window = accx[window_start:window_end]
        accy_window = accy[window_start:window_end]
        accz_window = accz[window_start:window_end]
        
        groundTruthBPM = reference['BPM0'][eval_window_idx][0]
    
        feature, freqs_initial,  fft_ppg  = create_features(indice, ppg_window, accx_window, accy_window, accz_window, 
                                    min_freq_val, max_freq_val, Fs, verbose = verbose, 
                                    plot_frequency_ppg = False, plot_frequency_acc = False)
        
        
        estimation_hr =  model_ml.predict(np.reshape(feature, (1, -1)))[0]
        
        if verbose:
            print(f'Ground Truth BPM: {groundTruthBPM}')
            print(f'Prediction BPM {estimation_hr}')

            
        estimation_hr, conf = Confidence(estimation_hr, freqs_initial, fft_ppg, min_freq_val)    
        predError = groundTruthBPM - estimation_hr
        errs.append(predError)
        confs.append(conf)    
    
    
    # Return per-estimate mean absolute error and confidence as a 2-tuple of numpy arrays.
    errors, confidence = np.array(errs), np.array(confs)  # Dummy placeholders. Remove
    return errors, confidence

