In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.spatial import KDTree
from scipy.integrate import solve_ivp
from mpl_toolkits.mplot3d import Axes3D
import warnings
warnings.filterwarnings('ignore')
import matplotlib as mpl
from statsmodels.graphics.tsaplots import plot_acf
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import train_test_split
from sklearn import metrics  
# from pyearth import Earth # for building a MARS model
import plotly.graph_objects as go
import plotly

import scipy.io as sio 

import glob,os
import random

from sklearn.ensemble import RandomForestRegressor

## Load training dataset

In [None]:
# load data
path = 'C:/Users/MrLiangyiLYU/Desktop/SSR_ECG_niose/train_ecg_noise_ssr'
file = glob.glob(os.path.join(path,'*.mat'))

label_train = []
for f in file:
    label_train.append(f[76:-4])

print(label_train)
   
d={}
for i in range(len(file)):
    mat = sio.loadmat(file[i])
    d['mat_'+label_train[i]] = pd.DataFrame(mat['feature_person'],
                                     columns=['PTTp',
                                              'CfSlope','CfSlope(t-3)','CfSlope(t-6)',
                                              'sys_notch_vol','sys_notch_vol(t-3)','sys_notch_vol(t-6)',
                                              'sys_foot_RI','sys_foot_RI(t-3)','sys_foot_RI(t-6)',
                                              'HR','HR(t-3)','HR(t-6)',
                                              'SBP','DBP','MBP'])

d1 = []
for l in label_train:
    df = d['mat_'+l]
    d1.append(df)
df_sample = pd.concat(d1)
df_sample = df_sample.reset_index(drop=True)
print(df_sample.shape)
df_sample

## Load testing dataset

In [None]:
# test train
path = '/Users/lyuliangyi/Desktop/HKCOCHE/state_space_reconstruction/test_noisessr/train_wnssr'
file = glob.glob(os.path.join(path,'*.mat'))

label_ttrain = []
for f in file:
    label_ttrain.append(f[87:-4])
    
d_test_train={}
for i in range(len(file)):
    mat = sio.loadmat(file[i])
    d_test_train[label_ttrain[i]] = pd.DataFrame(mat['feature_person'],
                                     columns=['PTTp',
                                              'CfSlope','CfSlope(t-3)','CfSlope(t-6)',
                                              'sys_notch_vol','sys_notch_vol(t-3)','sys_notch_vol(t-6)',
                                              'sys_foot_RI','sys_foot_RI(t-3)','sys_foot_RI(t-6)',
                                              'HR','HR(t-3)','HR(t-6)',
                                              'SBP','DBP','MBP'])

In [None]:
# test test
path = '/Users/lyuliangyi/Desktop/HKCOCHE/state_space_reconstruction/test_noisessr/test_wnssr'
file = glob.glob(os.path.join(path,'*.mat'))

label_ttest = []
for f in file:
    label_ttest.append(f[86:-4])
    
d_test_test={}
for i in range(len(file)):
    mat = sio.loadmat(file[i])
    d_test_test[label_ttest[i]] = pd.DataFrame(mat['feature_person'],
                                     columns=['PTTp',
                                              'CfSlope','CfSlope(t-3)','CfSlope(t-6)',
                                              'sys_notch_vol','sys_notch_vol(t-3)','sys_notch_vol(t-6)',
                                              'sys_foot_RI','sys_foot_RI(t-3)','sys_foot_RI(t-6)',
                                              'HR','HR(t-3)','HR(t-6)',
                                              'SBP','DBP','MBP'])

In [None]:
def rf_model(df_sample,testid):

    df_total = df_sample.append(d_test_train[testid]).reset_index(drop=True)
    df_test = d_test_test[testid]
    X_train = df_total[['PTTp','CfSlope','sys_notch_vol','sys_foot_RI','HR']]
    y_train = df_total[['SBP','DBP','MBP']]
    X_test = df_test[['PTTp','CfSlope','sys_notch_vol','sys_foot_RI','HR']]
    y_test = df_test[['SBP','DBP','MBP']]

    model = RandomForestRegressor(n_estimators=100, random_state= 2023)

    model.fit(X_train, y_train) # fit the RF model

    y_pred = model.predict(X_test) # predict on testing data

    #calculate RMSE
    rmse_sbp = np.sqrt(metrics.mean_squared_error(y_test["SBP"], y_pred[:, 0]))
    rmse_dbp = np.sqrt(metrics.mean_squared_error(y_test["DBP"], y_pred[:, 1]))
    rmse_mbp = np.sqrt(metrics.mean_squared_error(y_test["MBP"], y_pred[:, 2]))

    #calculate MAD
    mad_sbp = metrics.mean_absolute_error(y_test['SBP'], y_pred[:, 0])
    mad_dbp = metrics.mean_absolute_error(y_test['DBP'], y_pred[:, 1])
    mad_mbp = metrics.mean_absolute_error(y_test['MBP'], y_pred[:, 2])
    
    df_result = pd.DataFrame({'SBP_RMSE':rmse_sbp,
                              'DBP_RMSE':rmse_dbp,
                              'MBP_RMSE':rmse_mbp,
                              'SBP_MAD':mad_sbp,
                              'DBP_MAD':mad_dbp,
                              'MBP_MAD':mad_mbp},index=[0])

    return y_pred,df_result

In [None]:
testid = sorted(label_ttest)
bp_result = []
dict_pred = {}
for id in testid:
    y_pred,result = rf_model(df_sample, id)
    result['ID'] = id
    dict_pred[id] = y_pred
    bp_result.append(result)
BP_result = pd.concat(bp_result).reset_index(drop=True)
BP_result

In [None]:
BP_result.describe()

In [None]:
BP_result.to_csv('rf_5f_wnssr.csv',index=False)