#Data Preprocessing


In [18]:
import pandas as pd

In [19]:
# Load the datasets
heart_rate_non_linear_train = pd.read_csv('/content/drive/MyDrive/Updated_Dataset/heart_rate_non_linear_features_train.csv')
time_domain_train = pd.read_csv('/content/drive/MyDrive/Updated_Dataset/time_domain_features_train.csv')
frequency_domain_train = pd.read_csv('/content/drive/MyDrive/Updated_Dataset/frequency_domain_features_train.csv')


In [20]:
# Convert 'datasetId' in heart_rate_non_linear_train and 'uuid' in other datasets to the same type
# heart_rate_non_linear_train['datasetId'] = heart_rate_non_linear_train['datasetId'].astype(str)
# time_domain_train['uuid'] = time_domain_train['uuid'].astype(str)
# frequency_domain_train['uuid'] = frequency_domain_train['uuid'].astype(str)

In [21]:
# Merge datasets on 'uuid'
train_data = pd.merge(heart_rate_non_linear_train, time_domain_train, on='uuid')
train_data = pd.merge(train_data, frequency_domain_train, on='uuid')

In [22]:
# Drop unnecessary columns like 'uuid' and 'datasetId'
train_data.drop(columns=['uuid', 'datasetId'], inplace=True)

In [23]:
# Define mapping for conditions
condition_mapping = {
    'no stress': 0,
    'interruption': 1,
    'time pressure': 2
}

In [24]:
# Create a new column 'condition_numeric' based on the mapping
train_data['condition_numeric'] = train_data['condition'].map(condition_mapping)

# Check the updated DataFrame
print(train_data[['condition', 'condition_numeric']].head())


      condition  condition_numeric
0     no stress                  0
1  interruption                  1
2  interruption                  1
3     no stress                  0
4     no stress                  0


In [25]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Reshape the data for LSTM input (samples, timesteps, features)
X_train_reshaped = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_val_reshaped = X_val.reshape((X_val.shape[0], 1, X_val.shape[1]))

# Check shapes and types
print(X_train_reshaped.shape)
print(y_train.shape)
print(X_val_reshaped.shape)
print(y_val.shape)

(15999, 1, 33)
(15999,)
(4000, 1, 33)
(4000,)


#Model Architecture

In [28]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout


# Build the RNN model
model = Sequential([
    LSTM(50, activation='relu', input_shape=(1, X_train.shape[1])),
    Dropout(0.2),
    Dense(1)  # Final layer for regression (single output for heart rate)
])

# Compile the model
model.compile(optimizer='adam', loss='mse')

# View model summary
model.summary()

  super().__init__(**kwargs)


#Model Training

In [33]:
# Train the model with smaller batch size
history = model.fit(X_train_reshaped, y_train, epochs=100, batch_size=16, validation_data=(X_val_reshaped, y_val), verbose=1)

Epoch 1/100
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - loss: 25.3880 - val_loss: 1.1335
Epoch 2/100
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 23.9034 - val_loss: 1.4707
Epoch 3/100
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 24.3067 - val_loss: 0.9362
Epoch 4/100
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 24.8450 - val_loss: 1.8669
Epoch 5/100
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 23.9873 - val_loss: 1.3852
Epoch 6/100
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 23.4163 - val_loss: 2.0199
Epoch 7/100
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: 23.7640 - val_loss: 1.4877
Epoch 8/100
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - loss: 22.8865 - val_loss: 1.2313
Epoch 9/

#Predictions on validation data

In [35]:
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

# Predictions on validation data
y_pred = model.predict(X_val_reshaped)

# Calculate metrics
mse = mean_squared_error(y_val, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_val, y_pred)

# Print results
print(f'Mean Squared Error: {mse}')
print(f'Root Mean Squared Error: {rmse}')
print(f'R² Score: {r2}')

[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Mean Squared Error: 0.277841978727658
Root Mean Squared Error: 0.5271071795447848
R² Score: 0.9974816992123902


#Data visualization