In [1]:
#Load Dataset

In [2]:
import pandas as pd

heart_rate_non_linear_train = pd.read_csv('heart_rate_non_linear_features_train.csv')
time_domain_train = pd.read_csv('time_domain_features_train.csv')
frequency_domain_train = pd.read_csv('frequency_domain_features_train.csv')

In [3]:
# Convert the 'datasetId' column in heart_rate_non_linear_train to string
heart_rate_non_linear_train['datasetId'] = heart_rate_non_linear_train['datasetId'].astype(str)

# First merge using 'datasetId' from heart_rate_non_linear_train and 'uuid' from time_domain_train
train_data = pd.merge(heart_rate_non_linear_train, time_domain_train, left_on='datasetId', right_on='uuid', how='inner')

In [4]:
# Check merged DataFrame
print("Merged DataFrame shape after first merge:", train_data.shape)
print("Columns in merged DataFrame after first merge:", train_data.columns)
print(train_data.head())  # Print first few rows 

Merged DataFrame shape after first merge: (0, 27)
Columns in merged DataFrame after first merge: Index(['uuid_x', 'SD1', 'SD2', 'sampen', 'higuci', 'datasetId', 'condition',
       'MEAN_RR', 'MEDIAN_RR', 'SDRR', 'RMSSD', 'SDSD', 'SDRR_RMSSD', 'HR',
       'pNN25', 'pNN50', 'KURT', 'SKEW', 'MEAN_REL_RR', 'MEDIAN_REL_RR',
       'SDRR_REL_RR', 'RMSSD_REL_RR', 'SDSD_REL_RR', 'SDRR_RMSSD_REL_RR',
       'KURT_REL_RR', 'SKEW_REL_RR', 'uuid_y'],
      dtype='object')
Empty DataFrame
Columns: [uuid_x, SD1, SD2, sampen, higuci, datasetId, condition, MEAN_RR, MEDIAN_RR, SDRR, RMSSD, SDSD, SDRR_RMSSD, HR, pNN25, pNN50, KURT, SKEW, MEAN_REL_RR, MEDIAN_REL_RR, SDRR_REL_RR, RMSSD_REL_RR, SDSD_REL_RR, SDRR_RMSSD_REL_RR, KURT_REL_RR, SKEW_REL_RR, uuid_y]
Index: []

[0 rows x 27 columns]


In [5]:
# Now check if 'uuid' exists in the merged DataFrame
if 'uuid' not in train_data.columns:
    print("Error: 'uuid' column is missing in train_data. Check merge keys or column names.")
else:
    # Now merge with the third dataset on 'uuid'
    train_data = pd.merge(train_data, frequency_domain_train, on='uuid', how='inner')

    # Check final merged DataFrame
    print("Final merged DataFrame shape:", train_data.shape)
    print("Columns in final merged DataFrame:", train_data.columns)


Error: 'uuid' column is missing in train_data. Check merge keys or column names.


In [6]:
# Explore data
train_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 0 entries
Data columns (total 27 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   uuid_x             0 non-null      object 
 1   SD1                0 non-null      float64
 2   SD2                0 non-null      float64
 3   sampen             0 non-null      float64
 4   higuci             0 non-null      float64
 5   datasetId          0 non-null      object 
 6   condition          0 non-null      object 
 7   MEAN_RR            0 non-null      float64
 8   MEDIAN_RR          0 non-null      float64
 9   SDRR               0 non-null      float64
 10  RMSSD              0 non-null      float64
 11  SDSD               0 non-null      float64
 12  SDRR_RMSSD         0 non-null      float64
 13  HR                 0 non-null      float64
 14  pNN25              0 non-null      float64
 15  pNN50              0 non-null      float64
 16  KURT               0 non-null      flo

In [7]:
print(train_data.head())

Empty DataFrame
Columns: [uuid_x, SD1, SD2, sampen, higuci, datasetId, condition, MEAN_RR, MEDIAN_RR, SDRR, RMSSD, SDSD, SDRR_RMSSD, HR, pNN25, pNN50, KURT, SKEW, MEAN_REL_RR, MEDIAN_REL_RR, SDRR_REL_RR, RMSSD_REL_RR, SDSD_REL_RR, SDRR_RMSSD_REL_RR, KURT_REL_RR, SKEW_REL_RR, uuid_y]
Index: []

[0 rows x 27 columns]


In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Merge the dataframes if necessary to have a single DataFrame with all features
train_data = pd.merge(heart_rate_non_linear_train, time_domain_train, on='uuid')

In [9]:
# Features and target variable

In [10]:
X = train_data.drop(columns=['HR'])  # Assuming 'HR' is the target variable for heart rate
y = train_data['HR']

In [11]:
# Identify non-numeric columns

In [12]:
non_numeric_cols = X.select_dtypes(include=['object']).columns.tolist()
print("Non-numeric columns:", non_numeric_cols)

Non-numeric columns: ['uuid', 'datasetId', 'condition']


In [13]:
# Option 1: Drop non-numeric columns

In [14]:
X = X.drop(columns=non_numeric_cols)

In [15]:
# Standardize features

In [16]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [17]:
# Split data into train and test sets

In [18]:
X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [19]:
# Reshape for LSTM input [samples, timesteps, features]

In [20]:
X_train_reshaped = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_val_reshaped = X_val.reshape((X_val.shape[0], 1, X_val.shape[1]))