In [12]:
import pickle

def load_and_explore_pkl(filename):
    # Load the pickle file with latin1 encoding
    with open(filename, 'rb') as f:
        data = pickle.load(f, encoding='latin1')  # Use 'latin1' to avoid encoding issues
    
    # Print a summary of the loaded data
    if isinstance(data, dict):
        # print("Loaded data keys:")
        # print(data.keys())  # Print all keys
        
        # Print summary of each key
        for key in data:
            print(f"\nKey: {key}")
            if key == 'signal':
                print("  - signal keys:", data[key].keys())
                print("    - chest keys:", data[key]['chest'].keys())
                print("    - wrist keys:", data[key]['wrist'].keys())
            elif key == 'label':
                print("  - label shape:", data[key].shape)
            else:
                print(f"  - {key}:", data[key])
    else:
        print("Loaded data is not a dictionary")
    
    return data

# Load data
filename = "../S2/S2.pkl"  # Replace with the relative path to your actual .pkl filename
data = load_and_explore_pkl(filename)
# labels = data['label']



Key: signal
  - signal keys: dict_keys(['chest', 'wrist'])
    - chest keys: dict_keys(['ACC', 'ECG', 'EMG', 'EDA', 'Temp', 'Resp'])
    - wrist keys: dict_keys(['ACC', 'BVP', 'EDA', 'TEMP'])

Key: label
  - label shape: (4255300,)

Key: subject
  - subject: S2


In [13]:
import numpy as np

# Retrieve labels
labels = data['label']

# Define the labels to remove
labels_to_remove = {0,5,6,7}

# Get indices of the labels to keep
indices_to_keep = np.array([i for i, label in enumerate(labels) if label not in labels_to_remove], dtype=int)

# Function to filter signal data
def filter_signals(signal_data, indices):
    return signal_data[indices]

# Filter chest data
filtered_chest = {modality: filter_signals(np.array(data['signal']['chest'][modality]), indices_to_keep) for modality in data['signal']['chest']}

# Filter labels
filtered_labels = np.array(labels)[indices_to_keep]

# Truncate the filtered chest data and labels to ensure consistency
max_length = min(len(filtered_labels), len(filtered_chest['ACC']))

truncated_filtered_labels = filtered_labels[:max_length]
truncated_filtered_chest = {modality: filtered_chest[modality][:max_length] for modality in filtered_chest}

# Update data dictionary with filtered chest data and labels
filtered_data = {
    'signal': {
        'chest': truncated_filtered_chest
    },
    'label': truncated_filtered_labels
}

# Verify the lengths after filtering
print(f"Filtered labels length: {len(filtered_data['label'])}")
for modality in filtered_data['signal']['chest']:
    print(f"Filtered chest {modality} length: {len(filtered_data['signal']['chest'][modality])}")


Filtered labels length: 2022299
Filtered chest ACC length: 2022299
Filtered chest ECG length: 2022299
Filtered chest EMG length: 2022299
Filtered chest EDA length: 2022299
Filtered chest Temp length: 2022299
Filtered chest Resp length: 2022299


In [14]:
# Extract variables for each chest modality
acc_data = filtered_data['signal']['chest']['ACC']
ecg_data = filtered_data['signal']['chest']['ECG']
emg_data = filtered_data['signal']['chest']['EMG']
eda_data = filtered_data['signal']['chest']['EDA']
temp_data = filtered_data['signal']['chest']['Temp']
resp_data = filtered_data['signal']['chest']['Resp']


In [15]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline

# Assuming 'acc_data' and 'filtered_data['label']' are already defined

# Extract features and labels
X = np.array(acc_data)  # Features (ACC data)
y = np.array(filtered_data['label'])  # Labels

# Check dimensions and adjust if necessary
if X.ndim == 1:
    X = X.reshape(-1, 1)  # Reshape if ACC data is one-dimensional

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define SMOTE and the model
smote = SMOTE(sampling_strategy='auto', random_state=42)
model = LogisticRegression(max_iter=1000)

# Create a pipeline with SMOTE and Logistic Regression
pipeline = Pipeline(steps=[('smote', smote), ('model', model)])

# Train the model
pipeline.fit(X_train_scaled, y_train)

# Predict on the test set
y_pred = pipeline.predict(X_test_scaled)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:")
print(report)
print("Confusion Matrix:")
print(conf_matrix)


Accuracy: 0.96
Classification Report:
              precision    recall  f1-score   support

           1       0.98      0.95      0.96    240492
           2       0.95      0.96      0.95    128782
           3       0.91      0.99      0.95     76104
           4       0.98      0.98      0.98    161312

    accuracy                           0.96    606690
   macro avg       0.95      0.97      0.96    606690
weighted avg       0.96      0.96      0.96    606690

Confusion Matrix:
[[227302   6282   3109   3799]
 [  1226 123281   4275      0]
 [    35    827  75242      0]
 [  3140      0      0 158172]]


In [19]:
# Function to predict label from manual input
def predict_label(acc_values):
    # Standardize the input data
    acc_values_scaled = scaler.transform([acc_values])
    
    # Predict the label
    predicted_label = pipeline.predict(acc_values_scaled)
    
    return predicted_label[0]

# Example manual input values (replace these with actual values)
manual_acc_values = [ 0.87580001, -0.10180002 ,-0.29519999 ]  # Replace with actual ACC values

# Predict the label
predicted_label = predict_label(manual_acc_values)

print(f"Predicted Label: {predicted_label}")


Predicted Label: 2
