In [1]:
!pip install pandas
!pip install seaborn matplotlib pandas scikit_learn
!pip install numpy
!pip install imbalanced-learn
!pip install xgboost



In [2]:

import numpy as np
import pandas as pd 

from sklearn.model_selection import train_test_split, StratifiedKFold

from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer

from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [3]:
import pickle

file_path = r"C:\Users\PC\Desktop\INTEGRISENSE\WESAD\S2\S2.pkl"

with open(file_path, 'rb') as f:
    data = pickle.load(f, encoding='latin1')

# Show top-level keys
print("Top-level keys:", data.keys())

# Show what's inside 'signal', if it exists
if 'signal' in data:
    print("Keys inside 'signal':", data['signal'].keys())
else:
    print("No 'signal' key found")

Top-level keys: dict_keys(['signal', 'label', 'subject'])
Keys inside 'signal': dict_keys(['chest', 'wrist'])


In [4]:
print("Chest signals:", data['signal']['chest'].keys())
print("Wrist signals:", data['signal']['wrist'].keys())

Chest signals: dict_keys(['ACC', 'ECG', 'EMG', 'EDA', 'Temp', 'Resp'])
Wrist signals: dict_keys(['ACC', 'BVP', 'EDA', 'TEMP'])


In [5]:
import pickle
import numpy as np

def compute_hrv_rmssd(bvp_signal):
    # Compute HRV using RMSSD from BVP (after smoothing)
    diff = np.diff(bvp_signal)
    squared_diff = np.square(diff)
    rmssd = np.sqrt(np.mean(squared_diff))
    return rmssd

# Path to your file
file_path = r"C:\Users\PC\Desktop\INTEGRISENSE\WESAD\S2\S2.pkl"

with open(file_path, 'rb') as f:
    data = pickle.load(f, encoding='latin1')

# Get wrist signals
wrist = data['signal']['wrist']
bvp = wrist['BVP'].flatten()
eda_wrist = wrist['EDA'].flatten()
temp_wrist = wrist['TEMP'].flatten()
acc_wrist = wrist['ACC']  # shape: (time, 3)

# Feature 1: HRV (RMSSD) from BVP
hrv_rmssd = compute_hrv_rmssd(bvp)

# Feature 2: Mean BVP
bvp_mean = np.mean(bvp)

# Feature 3: Mean EDA
eda_wrist_mean = np.mean(eda_wrist)

# Feature 4: Mean TEMP
temp_wrist_mean = np.mean(temp_wrist)

# Feature 5: Mean magnitude of ACC
acc_wrist_magnitude = np.linalg.norm(acc_wrist, axis=1)
acc_wrist_mean_mag = np.mean(acc_wrist_magnitude)

# Combine all 5 features into a single array
X = np.array([[hrv_rmssd, bvp_mean, eda_wrist_mean, temp_wrist_mean, acc_wrist_mean_mag]])

# Load labels (just for reference)
y_raw = data['label']  # same length as original samples

print("Features loaded from wrist only:")
print(f"Shape of X: {X.shape}")
print(f"Feature vector: {X}")

Features loaded from wrist only:
Shape of X: (1, 5)
Feature vector: [[ 1.19436825e+01 -4.26828014e-04  3.91743329e-01  3.43869452e+01
   6.34344863e+01]]


In [6]:
import pickle
import pandas as pd

# Load the pickle file
file_path = r"C:\Users\PC\Desktop\INTEGRISENSE\WESAD\S2\S2.pkl"
with open(file_path, 'rb') as f:
    data = pickle.load(f, encoding='latin1')

# Extract wrist signals
wrist = data['signal']['wrist']

# Flatten signals
bvp = wrist['BVP'].flatten()
eda = wrist['EDA'].flatten()
temp = wrist['TEMP'].flatten()
acc_x = wrist['ACC'][:, 0]
acc_y = wrist['ACC'][:, 1]
acc_z = wrist['ACC'][:, 2]

# Get the minimum common length
min_len = min(len(bvp), len(eda), len(temp), len(acc_x), len(acc_y), len(acc_z))

# Truncate all signals to same length
wrist_signals = {
    'BVP': bvp[:min_len],
    'EDA': eda[:min_len],
    'TEMP': temp[:min_len],
    'ACC_X': acc_x[:min_len],
    'ACC_Y': acc_y[:min_len],
    'ACC_Z': acc_z[:min_len],
}

# Convert to DataFrame
wrist_df = pd.DataFrame(wrist_signals)

# Show head of DataFrame
print("Head of wrist signals from S2:")
print(wrist_df.head())

Head of wrist signals from S2:
     BVP       EDA   TEMP  ACC_X  ACC_Y  ACC_Z
0 -59.37  1.138257  35.41   62.0  -21.0  107.0
1 -53.42  1.125444  35.41   66.0   13.0   53.0
2 -44.40  1.011405  35.41   41.0    9.0   15.0
3 -33.17  1.033188  35.41   52.0   16.0   24.0
4 -20.79  0.935807  35.41   54.0   15.0   34.0


In [7]:
import os
import pickle
import numpy as np
import pandas as pd
from scipy.signal import resample

# Path to the root WESAD dataset
root_path = "C:/Users/PC/Desktop/INTEGRISENSE/WESAD"

# Subjects to process (excluding S12, which doesn't exist)
subjects = [f"S{i}" for i in range(2, 18) if i != 12]

# Features to extract
selected_features = ['EDA', 'TEMP', 'ACC', 'BVP']

# Standard target sample length for downsampling
target_length = 7000

# Storage
X, y = [], []

for subj in subjects:
    try:
        file_path = os.path.join(root_path, subj, f"{subj}.pkl")
        with open(file_path, 'rb') as f:
            data = pickle.load(f, encoding='latin1')

        # Extract sensor signals and labels
        signal = data['signal']['wrist']
        label = data['label']

        # Extract and downsample selected features
        feature_arrays = []
        for feature in selected_features:
            values = signal[feature]
            if feature == 'ACC':
                # Flatten 3-axis ACC data
                acc_magnitude = np.linalg.norm(values, axis=1)
                values = acc_magnitude.reshape(-1, 1)
            if values.shape[0] != target_length:
                values = resample(values, target_length)
            feature_arrays.append(values if values.ndim == 2 else values.reshape(-1, 1))

        # Combine all features into one feature array
        features = np.hstack(feature_arrays)

        # Downsample and align labels to same length
        label_array = label[:]
        label_array = resample(label_array.reshape(-1, 1), target_length).astype(int).flatten()

        # Filter out label 0 (undefined), keep 1=calm and 2=stress
        mask = np.isin(label_array, [1, 2])
        filtered_features = features[mask]
        filtered_labels = label_array[mask]

        # Relabel: 1 → 0 (calm), 2 → 1 (stress)
        filtered_labels = np.where(filtered_labels == 1, 0, 1)

        X.append(filtered_features)
        y.append(filtered_labels)

        print(f"{subj}: Loaded {filtered_features.shape[0]} samples")

    except Exception as e:
        print(f"{subj}: Failed to process: {e}")

# Combine all subject data
if X and y:
    X = np.vstack(X)
    y = np.concatenate(y)

    print("\nFinal dataset shape:")
    print("X:", X.shape)
    print("y:", y.shape)

    unique, counts = np.unique(y, return_counts=True)
    print("\nLabel distribution after filtering and relabeling:")
    for label, count in zip(unique, counts):
        print(f"Label {label}: {count} samples")
else:
    print("No data loaded. Please verify your dataset path and contents.")

S2: Loaded 1577 samples
S3: Loaded 1509 samples
S4: Loaded 1533 samples
S5: Loaded 1605 samples
S6: Loaded 1414 samples
S7: Loaded 1900 samples
S8: Loaded 1848 samples
S9: Loaded 1910 samples
S10: Loaded 1918 samples
S11: Loaded 1947 samples
S13: Loaded 1831 samples
S14: Loaded 1836 samples
S15: Loaded 1950 samples
S16: Loaded 1804 samples
S17: Loaded 1779 samples

Final dataset shape:
X: (26361, 4)
y: (26361,)

Label distribution after filtering and relabeling:
Label 0: 16856 samples
Label 1: 9505 samples


In [8]:
# Convert to DataFrame to inspect head
columns = ['EDA', 'TEMP', 'ACC_Mag', 'BVP']
df = pd.DataFrame(X, columns=columns)
df['label'] = y

# Show head
print(df.head())

        EDA       TEMP    ACC_Mag        BVP  label
0  1.585790  35.808589  65.824687  11.236238      0
1  1.574424  35.825170  63.123662  26.610110      0
2  1.525942  35.828855  62.771683  51.587284      0
3  1.513203  35.829831  62.626886  27.933278      0
4  1.497633  35.810250  62.588797  33.487841      0


In [9]:
import sys
print(sys.executable)

C:\Users\PC\AppData\Local\Programs\Python\Python313\python.exe


In [10]:
import sys
!{sys.executable} -m pip install imbalanced-learn



In [11]:
from imblearn.over_sampling import SMOTE
!pip install xgboost



In [13]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, confusion_matrix

# Replace this with your actual dataset
# Ensure df is defined earlier in your notebook as your final combined data
X = df[['EDA', 'TEMP', 'ACC_Mag', 'BVP']].values
y = df['label'].values

# Step 1: Split the data with stratification
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# Step 2: Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Step 3: Apply SMOTE to balance the training set
smote = SMOTE(random_state=42)
X_train_balanced, y_train_balanced = smote.fit_resample(X_train, y_train)

# Step 4: Train using XGBoost
model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
model.fit(X_train_balanced, y_train_balanced)

# Step 5: Predict and evaluate
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

# Optional: Confusion matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


              precision    recall  f1-score   support

           0       0.90      0.74      0.82      3372
           1       0.65      0.86      0.74      1901

    accuracy                           0.79      5273
   macro avg       0.78      0.80      0.78      5273
weighted avg       0.81      0.79      0.79      5273

Confusion Matrix:
[[2511  861]
 [ 269 1632]]


In [14]:
import numpy as np
print("Class distribution after SMOTE:", np.bincount(y_train_balanced))

Class distribution after SMOTE: [13484 13484]


In [26]:
sample = [[0.9, 36.0, 3.2, 0.8]]  # Example input
scaled_sample = scaler.transform(sample)
predicted_label = model.predict(scaled_sample)
print("Prediction:", "Stressed" if predicted_label[0] == 1 else "Calm")

Prediction: Stressed


In [27]:
import joblib
joblib.dump(model, "stress_detection_model.pkl")

['stress_detection_model.pkl']