# MS Project by SVM model
### By Haorui Lyu (NetID: haoruil2)

## Import Libraries

In [1]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.metrics import Precision, Recall, F1Score

## Data loading and preprocessing

In [2]:
def normalize(df):
    
    for column in ['x', 'y', 'z']:
        df[column] = (df[column] - df[column].mean()) / df[column].std()
    df = pd.DataFrame(df[['x', 'y', 'z']])
    return df

def segment(df, window_size, step_size):
    segments = []
    for start in range(0, len(df) - window_size + 1, step_size):
        end = start + window_size

        \
        segment = df.iloc[start:end]
        segments.append(segment)
    return segments

In [3]:
def segment_activities(window_size=32, step_size=16):
    # window_size = 160  # 2.5 seconds
    # step_size = 20  # 75% overlap

    activities_path = r'D:\PersonalFiles\MS_Analysis\Activity'
    activity_segments = {}

    # Iterate over each activity's folder
    for activity_name in os.listdir(activities_path):
        print(activity_name)
        activity_folder = os.path.join(activities_path, activity_name)
        if os.path.isdir(activity_folder):
            # Store segments for each activity
            activity_segments[activity_name] = []
            
            # Iterate over each CSV file within the activity's folder
            for filename in os.listdir(activity_folder):
                if filename.endswith('.csv'):
                    file_path = os.path.join(activity_folder, filename)
                    
                    # Read  CSV file
                    df = pd.read_csv(file_path)
                    
                    # Normalize data
                    df_normalized = normalize(df)
                    
                    # Segment the data using a rolling window
                    segments = segment(df_normalized, window_size, step_size)
                    
                    # Append the segments to the activity's list
                    activity_segments[activity_name].extend(segments)
    return activity_segments

## Define F1 Score function

In [4]:
class F1Score(tf.keras.metrics.Metric):
    def __init__(self, name='f1_score', **kwargs):
        super(F1Score, self).__init__(name = name, **kwargs)
        self.precision = Precision()
        self.recall = Recall()

    def update_state(self, y_true, y_pred, sample_weight=None):
        y_pred = tf.round(tf.nn.sigmoid(y_pred))  # Use sigmoid to convert the output to between 0 and 1, then round to the nearest integer
        self.precision.update_state(y_true, y_pred, sample_weight)
        self.recall.update_state(y_true, y_pred, sample_weight)

    def result(self):
        p = self.precision.result()
        r = self.recall.result()
        return 2 * ((p * r) / (p + r + tf.keras.backend.epsilon()))

    def reset_states(self):
        self.precision.reset_states()
        self.recall.reset_states()

## Define SVM model by using TensorFlow

The training data set is very large, and training using the scikit-learn model will be very time-consuming. For this reason, I use TensorFlow to simulate the SVM algorithm to train the model.
1. Use LabelEncoder to convert the activity name into integer form, then convert non-zero labels to 1, and the rest to -1, adapting to the common practice of SVM models.
2. Define a simple neural network model containing a single linear output layer.
3. Use hinge loss to customize the loss function, which is a commonly used loss function when training SVM models.
4. Use stochastic gradient descent (SGD) as the optimizer when compiling the model with a custom hinge loss function and multiple evaluation metrics (accuracy, precision, recall, and F1 score).

In [5]:
def fit_model_SVM_TensorFlow(activity_segments):
    X, y = [], []

    for activity_name, segments in activity_segments.items():
        for segment in segments:
            feature_vector = segment.to_numpy().flatten()
            X.append(feature_vector)
            y.append(activity_name)
    print("Segments in suitable format")

    X = np.array(X)
    y = np.array(y)

    le = LabelEncoder()
    y_encoded = le.fit_transform(y)
    y_encoded = np.where(y_encoded > 0, 1, -1)  # Adjust labels to -1 and 1

    print("Labels encoded")

    X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size = 0.2, random_state = 42)
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    model = tf.keras.Sequential([
        tf.keras.layers.Dense(1, input_shape=(X_train_scaled.shape[1],), activation = None) 
    ])

    def hinge_loss(y_true, y_pred):
        return tf.reduce_mean(tf.maximum(1 - y_true * y_pred, 0))

    model.compile(optimizer='sgd', loss=hinge_loss, metrics=['accuracy', Precision(), Recall(), F1Score()])

    print("Compiled model\nTraining Model")
    model.fit(X_train_scaled, y_train, epochs=10, validation_split=0.2)

    results = model.evaluate(X_test_scaled, y_test)
    print("Test Results for SVM model - Loss: {:.4f}, Accuracy: {:.4f}, Precision: {:.4f}, Recall: {:.4f}, F1 Score: {:.4f}".format(*results))
    
    return model

# Result

## 5 Second Window, 1 Second Overlap Data

In [6]:
activity_segments = segment_activities(160, 36)
model = fit_model_SVM_TensorFlow(activity_segments)

B1_T1_ (WALK)
B1_T2_ (WALK)
B1_TWT_A_ (WALK)
B1_TWT_B_ (WALK)
B2_T1_ (WALK)
B2_T2_ (WALK)
B2_TWT_A_ (WALK)
B2_TWT_B_ (WALK)
digit symbol task_ (SIT)
HR Recovery_ (STAND or SIT)
Montreal Cognitive Assessment_ (SIT)
Motor Behavioral
Motor Behavioral task
Motor behavioral task(1)
Motor Behavioral Task_ (All conditions seen here)
Naughton
Naughton Task
Naughton test
Naughton Test_ (WALK)
SOT_ (STAND)
TM comfortable speed
TM Comfortable speed(1)
TM Comfortable Speed_ (WALK)
TNT
trail making task_ (SIT)
Training ITWT - A
Training ITWT - B
Training ITWT -B
Training_ TWT_A
Training_ TWT_B
Training_TWT_A
Training_TWT_B
TWT_A training
TWT_A Training_ (WALK)
TWT_B training
TWT_B Training_ (WALK)
Segments in suitable format
Labels encoded


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Compiled model
Training Model
Epoch 1/10
[1m3876/3876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 572us/step - accuracy: 0.8968 - f1_score: 0.9799 - loss: 0.1571 - precision: 1.0000 - recall: 0.9135 - val_accuracy: 0.9801 - val_f1_score: 0.9998 - val_loss: 0.0489 - val_precision: 1.0000 - val_recall: 0.9982
Epoch 2/10
[1m3876/3876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 482us/step - accuracy: 0.9803 - f1_score: 0.9998 - loss: 0.0488 - precision: 1.0000 - recall: 0.9988 - val_accuracy: 0.9814 - val_f1_score: 1.0000 - val_loss: 0.0447 - val_precision: 1.0000 - val_recall: 0.9995
Epoch 3/10
[1m3876/3876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 481us/step - accuracy: 0.9807 - f1_score: 1.0000 - loss: 0.0465 - precision: 1.0000 - recall: 0.9992 - val_accuracy: 0.9810 - val_f1_score: 0.9999 - val_loss: 0.0475 - val_precision: 1.0000 - val_recall: 0.9991
Epoch 4/10
[1m3876/3876[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 495us/step - accuracy

## 3 Second Window 3/2 Second Overlap

In [7]:
activity_segments = segment_activities(96, 12)
model = fit_model_SVM_TensorFlow(activity_segments)

B1_T1_ (WALK)
B1_T2_ (WALK)
B1_TWT_A_ (WALK)
B1_TWT_B_ (WALK)
B2_T1_ (WALK)
B2_T2_ (WALK)
B2_TWT_A_ (WALK)
B2_TWT_B_ (WALK)
digit symbol task_ (SIT)
HR Recovery_ (STAND or SIT)
Montreal Cognitive Assessment_ (SIT)
Motor Behavioral
Motor Behavioral task
Motor behavioral task(1)
Motor Behavioral Task_ (All conditions seen here)
Naughton
Naughton Task
Naughton test
Naughton Test_ (WALK)
SOT_ (STAND)
TM comfortable speed
TM Comfortable speed(1)
TM Comfortable Speed_ (WALK)
TNT
trail making task_ (SIT)
Training ITWT - A
Training ITWT - B
Training ITWT -B
Training_ TWT_A
Training_ TWT_B
Training_TWT_A
Training_TWT_B
TWT_A training
TWT_A Training_ (WALK)
TWT_B training
TWT_B Training_ (WALK)
Segments in suitable format
Labels encoded


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Compiled model
Training Model
Epoch 1/10
[1m11668/11668[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 1ms/step - accuracy: 0.9486 - f1_score: 0.9927 - loss: 0.0889 - precision_2: 1.0000 - recall_2: 0.9664 - val_accuracy: 0.9810 - val_f1_score: 1.0000 - val_loss: 0.0427 - val_precision_2: 1.0000 - val_recall_2: 0.9999
Epoch 2/10
[1m11668/11668[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 3ms/step - accuracy: 0.9812 - f1_score: 1.0000 - loss: 0.0421 - precision_2: 1.0000 - recall_2: 0.9997 - val_accuracy: 0.9810 - val_f1_score: 1.0000 - val_loss: 0.0423 - val_precision_2: 1.0000 - val_recall_2: 0.9999
Epoch 3/10
[1m11668/11668[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 2ms/step - accuracy: 0.9810 - f1_score: 1.0000 - loss: 0.0427 - precision_2: 1.0000 - recall_2: 0.9999 - val_accuracy: 0.9811 - val_f1_score: 1.0000 - val_loss: 0.0415 - val_precision_2: 1.0000 - val_recall_2: 1.0000
Epoch 4/10
[1m11668/11668[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

## 1 Second Window 1/2 Second Overlap

In [8]:
activity_segments = segment_activities()
model = fit_model_SVM_TensorFlow(activity_segments)

B1_T1_ (WALK)
B1_T2_ (WALK)
B1_TWT_A_ (WALK)
B1_TWT_B_ (WALK)
B2_T1_ (WALK)
B2_T2_ (WALK)
B2_TWT_A_ (WALK)
B2_TWT_B_ (WALK)
digit symbol task_ (SIT)
HR Recovery_ (STAND or SIT)
Montreal Cognitive Assessment_ (SIT)
Motor Behavioral
Motor Behavioral task
Motor behavioral task(1)
Motor Behavioral Task_ (All conditions seen here)
Naughton
Naughton Task
Naughton test
Naughton Test_ (WALK)
SOT_ (STAND)
TM comfortable speed
TM Comfortable speed(1)
TM Comfortable Speed_ (WALK)
TNT
trail making task_ (SIT)
Training ITWT - A
Training ITWT - B
Training ITWT -B
Training_ TWT_A
Training_ TWT_B
Training_TWT_A
Training_TWT_B
TWT_A training
TWT_A Training_ (WALK)
TWT_B training
TWT_B Training_ (WALK)
Segments in suitable format
Labels encoded
Compiled model
Training Model
Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m8795/8795[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 493us/step - accuracy: 0.9421 - f1_score: 0.9918 - loss: 0.0963 - precision_4: 1.0000 - recall_4: 0.9598 - val_accuracy: 0.9803 - val_f1_score: 1.0000 - val_loss: 0.0426 - val_precision_4: 1.0000 - val_recall_4: 0.9999
Epoch 2/10
[1m8795/8795[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 474us/step - accuracy: 0.9813 - f1_score: 1.0000 - loss: 0.0400 - precision_4: 1.0000 - recall_4: 0.9999 - val_accuracy: 0.9804 - val_f1_score: 1.0000 - val_loss: 0.0405 - val_precision_4: 1.0000 - val_recall_4: 1.0000
Epoch 3/10
[1m8795/8795[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 491us/step - accuracy: 0.9813 - f1_score: 1.0000 - loss: 0.0393 - precision_4: 1.0000 - recall_4: 1.0000 - val_accuracy: 0.9804 - val_f1_score: 1.0000 - val_loss: 0.0405 - val_precision_4: 1.0000 - val_recall_4: 1.0000
Epoch 4/10
[1m8795/8795[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 487us/step - accuracy: 0.9814 - f1_sco