In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
import pickle

Different features spaces:
1. Statistical
2. Frequency domain 
3. time domain 
4. waveform features

1. Data Preprocessing:
Ensure that your data from the HRV, GSR, and skin temperature sensors are properly cleaned and preprocessed. This may involve handling missing values, normalizing or scaling the data, and removing any outliers.

2. Feature Engineering:
Extract relevant features from each sensor modality that could be indicative of fatigue. These features could include statistical measures (e.g., mean, standard deviation, skewness), frequency domain features (e.g., spectral power), or time-domain features (e.g., heart rate variability indices).

3. Data Combination:
Once you have extracted features from each sensor modality, combine them into a single feature matrix. This matrix will serve as the input to your binary classifier.

4. Model Selection:
Choose an appropriate binary classification model for your task. Since you mentioned using SVM, you can use an SVM classifier to learn the relationship between the combined sensor features and the target variable (fatigue).

5. Model Training:
Split your combined feature matrix and corresponding labels into training and testing sets. Then, train your SVM classifier on the training data.

6. Model Evaluation:
Evaluate the performance of your trained classifier using appropriate metrics such as accuracy, precision, recall, F1-score, and ROC-AUC score on the testing set. This will give you an idea of how well your model generalizes to unseen data.

7. Deployment:
Once you're satisfied with the performance of your classifier, you can deploy it to make predictions on new data obtained from the sensors in real-time.

# Preprocessing

In [2]:
random_numbers1 = np.array([random.randint(34, 38) for i in range(10000)])

list_2 = np.array([random.randint(2, 30) for i in range(10001)])
# This calculates how the change from last step to this step of body resistance
random_numbers_gsr = np.array([list_2[num2] - list_2[num2-1] for num2 in range(10000)])

## Assigning labels

In [3]:
# Defining labels
labels = np.array(['OK' if temp<38 and value<5 else 'WARNING' for temp,value in zip(random_numbers1,random_numbers_gsr)])
#labels_gsr = np.array(['OK' if value<10 else 'WARNING' for value in random_numbers_gsr])

# Combining the two labels
temp_data = random_numbers1.reshape(-1,1)
gsr_data = random_numbers_gsr.reshape(-1,1)

Sensor_data = np.hstack([temp_data,gsr_data])

In [4]:
# Splitting dataset into training and testing
X_train, X_test, y_train, y_test = train_test_split(Sensor_data, labels, test_size=0.6, random_state=42)


In [5]:
# StandardScaler scales mean to be 0 and variance =1
clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))

# training model
clf.fit(X_train,y_train)

## Prediction step

In [8]:
clf.predict(X_test)



In [7]:
with open('svm_model.pkl', 'wb') as f:
    pickle.dump(clf, f)
