# Import Libraries

In [None]:
import os
import glob
import timeit
import pandas as pd
import numpy as np
from scipy import signal
# from tqdm import tqdm
from tqdm.notebook import tqdm
from pathlib import Path
from math import sqrt
from google.colab import drive
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold

In [None]:
# Mount Google Drive
drive.mount('/content/drive')

# Data File Count & Duration

In [None]:
# Define CSI_data gdrive URL
data_url = '/content/drive/MyDrive/CSI_Training_Data'

raw_np = []
raw_np_duration = []
raw_wp = []
raw_wp_duration = []

# Get the list of CSV files
file_list = glob.glob(data_url + '/*.csv')

print("Total File Count:", len(file_list))
print("==================================================================")

# Process files with 'NP' in their name
print("Processing no-person (NP) files:")
for file_name in tqdm(file_list, desc="No-Person Files", unit="file"):
    if 'NP' in file_name:
        try:
            raw_np.append(file_name)
            extract_duration_np = pd.read_csv(file_name, low_memory=False)
            extract_duration_np = extract_duration_np['real_timestamp'].values
            extract_duration_np = (extract_duration_np[-1] - extract_duration_np[0]) / 60
            raw_np_duration.append(extract_duration_np)
        except Exception as e:
            print(f"Error processing file {file_name}: {e}")
            continue

print("==================================================================")
print("Found {} (no-person) data files.".format(len(raw_np)))
print("With a total duration of {} minutes.".format(sum(raw_np_duration)))
print("==================================================================")

# Process files without 'NP' in their name
print("Processing with-person (WP) files:")
for file_name in tqdm(file_list, desc="With-Person Files", unit="file"):
    if 'NP' not in file_name:
        try:
            raw_wp.append(file_name)
            extract_duration_wp = pd.read_csv(file_name, low_memory=False)
            extract_duration_wp = extract_duration_wp['real_timestamp'].values
            extract_duration_wp = (extract_duration_wp[-1] - extract_duration_wp[0]) / 60
            raw_wp_duration.append(extract_duration_wp)
        except Exception as e:
            print(f"Error processing file {file_name}: {e}")
            continue

print("\n==================================================================")
print("Found {} (with-person) data files.".format(len(raw_wp)))
print("With a total duration of {} minutes.".format(sum(raw_wp_duration)))
print("==================================================================")


# Filter Design


In [None]:
def lowpass(csi_vec: np.array, cutoff: float, fs: float, order: int) -> np.array:
    nyq = 0.5*fs
    normal_cutoff = cutoff/nyq
    b, a = signal.butter(order, normal_cutoff, btype="low", analog=False)
    return signal.filtfilt(b, a, csi_vec)

def running_mean(x: np.array, N: int) -> np.array:
    return pd.Series(x).rolling(window=N, min_periods=1, center=True).mean().to_numpy()

#  Read File Function

In [None]:
def read_csv(data_url, window_size, step_size):

    scaler = StandardScaler()
    data_np_windowed = []
    data_wp_windowed = []

    data_np = []
    data_wp = []

    label_data_np = []
    label_data_wp = []

    file_list = glob.glob(data_url + '/*.csv')

    # File processing progress
    for file_name in tqdm(file_list, desc="Processing Files", unit="file"):
        print(file_name)
        if file_name.find('NP') != -1:
            noPersonData = pd.read_csv(file_name, low_memory=False)
            noPersonData = noPersonData['CSI_DATA'].values
            noPersonData = noPersonData[500:-500]

            # Data processing progress
            for i in tqdm(range(noPersonData.shape[0]), desc="Processing No-Person Data", leave=False):
                try:
                    st = noPersonData[i]
                    st = st[1:-2]
                    data_array = [int(s) for s in st.split(' ')]
                    data_array_mag = []
                    for k in range(0, 128, 2):
                        data_array_mag.append(sqrt(data_array[k]**2 + data_array[k+1]**2))
                    data_np.append(data_array_mag)
                except Exception as e:
                    print(f"Error encountered: {e}")
                    continue
                except ValueError as ve:
                    print(f"ValueError encountered: {ve}")
                    continue
        else:
            withPersonData = pd.read_csv(file_name, low_memory=False)
            withPersonData = withPersonData['CSI_DATA'].values
            withPersonData = withPersonData[500:-500]

            # Data processing progress
            for i in tqdm(range(withPersonData.shape[0]), desc="Processing With-Person Data", leave=False):
                try:
                    st = withPersonData[i]
                    st = st[1:-2]
                    data_array = [int(s) for s in st.split(' ')]
                    data_array_mag = []
                    for k in range(0, 128, 2):
                        data_array_mag.append(sqrt(data_array[k]**2 + data_array[k+1]**2))
                    data_wp.append(data_array_mag)
                except Exception as e:
                    print(f"Error encountered: {e}")
                    continue
                except ValueError as ve:
                    print(f"ValueError encountered: {ve}")
                    continue

    data_np = np.array(data_np)
    data_wp = np.array(data_wp)

    # Window processing progress for no-person data
    for start in tqdm(range(0, data_np.shape[0] - window_size, step_size), desc="Creating No-Person Windows", unit="window"):
        end = start + window_size
        data_np_window = np.empty((0, 64))
        for j in range(start, end):
            data_array_mag = data_np[j]
            data_np_window = np.append(data_np_window, np.array([data_array_mag]), axis=0)
        for i in range(0, 64):
            data_np_window[:, i] = lowpass(data_np_window[:, i], 30, 170, 5)
            data_np_window[:, i] = running_mean(data_np_window[:, i], 10)
        data_np_window = scaler.fit_transform(data_np_window)
        data_np_windowed.append(data_np_window)
        label_data_np.append(0)

    # Window processing progress for with-person data
    for start in tqdm(range(0, data_wp.shape[0] - window_size, step_size), desc="Creating With-Person Windows", unit="window"):
        end = start + window_size
        data_wp_window = np.empty((0, 64))
        for j in range(start, end):
            data_array_mag = data_wp[j]
            data_wp_window = np.append(data_wp_window, np.array([data_array_mag]), axis=0)
        for i in range(0, 64):
            data_wp_window[:, i] = lowpass(data_wp_window[:, i], 30, 170, 5)
            data_wp_window[:, i] = running_mean(data_wp_window[:, i], 10)
        data_wp_window = scaler.fit_transform(data_wp_window)
        data_wp_windowed.append(data_wp_window)
        label_data_wp.append(1)

    data_np = data_np_windowed
    data_wp = data_wp_windowed

    return np.array(data_np), np.array(data_wp), np.array(label_data_np), np.array(label_data_wp)


# Data Extraction

In [None]:
window_size = 200
step_size = 50

save_url_np = 'data_np_window_size={window}_step_size={step}.npy'.format(window = window_size, step = step_size)
save_url_np = Path(save_url_np)
save_url_wp = 'data_wp_window_size={window}_step_size={step}.npy'.format(window = window_size, step = step_size)
save_url_wp = Path(save_url_wp)

if (save_url_np.is_file() and save_url_wp.is_file()):
  data_np = np.load(save_url_np)
  data_wp = np.load(save_url_wp)
  label_data_np = np.zeros((data_np.shape[0],1))
  label_data_wp = np.zeros((data_wp.shape[0],1))
  label_data_np[:,0] = 0
  label_data_wp[:,0] = 1
else:
  file1 = 'data_np_window_size={window}_step_size={step}.npy'.format(window = window_size, step = step_size)
  file2 = 'data_wp_window_size={window}_step_size={step}.npy'.format(window = window_size, step = step_size)
  data_np, data_wp, label_data_np, label_data_wp = read_csv(data_url, window_size=window_size, step_size=step_size)
  np.save(file1,data_np)
  np.save(file2,data_wp)

In [None]:
print(data_np.shape)
print(data_wp.shape)
print(label_data_np.shape)
print(label_data_wp.shape)

# *Train Test Split*

In [None]:
def train_test_split(data_np, data_wp, label_data_np, label_data_wp,train_portion, test_portion):
  x_train = []
  x_valid = []
  x_test = []

  y_train = []
  y_valid = []
  y_test = []

  index1 = np.random.permutation([i for i in range(data_np.shape[0])])
  index2 = np.random.permutation([i for i in range(data_wp.shape[0])])
  split_len1 = int(train_portion * data_np.shape[0])
  split_len1_2 = int((1-test_portion)* data_np.shape[0])
  split_len2 = int(train_portion * data_wp.shape[0])
  split_len2_2 = int((1-test_portion)* data_wp.shape[0])

  x_train.append(data_np[index1[:split_len1],...])
  x_train.append(data_wp[index2[:split_len2],...])

  y_train.append(label_data_np[index1[:split_len1],...])
  y_train.append(label_data_wp[index2[:split_len2],...])

  x_valid.append(data_np[index1[split_len1:split_len1_2],...])
  x_valid.append(data_wp[index2[split_len2:split_len2_2],...])

  y_valid.append(label_data_np[index1[split_len1:split_len1_2],...])
  y_valid.append(label_data_wp[index2[split_len2:split_len2_2],...])

  x_test.append(data_np[index1[split_len1_2:],...])
  x_test.append(data_wp[index2[split_len2_2:],...])

  y_test.append(label_data_np[index1[split_len1_2:],...])
  y_test.append(label_data_wp[index2[split_len2_2:],...])

  x_train = np.concatenate(x_train, axis=0)
  y_train = np.concatenate(y_train, axis=0)
  x_valid = np.concatenate(x_valid, axis=0)
  y_valid = np.concatenate(y_valid, axis=0)
  x_test = np.concatenate(x_test, axis=0)
  y_test = np.concatenate(y_test, axis=0)

  index = np.random.permutation([i for i in range(x_train.shape[0])])
  x_train = x_train[index, ...]
  y_train = y_train[index, ...]

  # x_train = x_train.reshape(x_train.shape[0],x_train.shape[1], 1)
  # x_valid = x_valid.reshape(x_valid.shape[0],x_valid.shape[1], 1)

  return x_train, y_train, x_valid, y_valid, x_test, y_test


In [None]:
x_train, y_train, x_valid, y_valid, x_test, y_test = train_test_split (data_np, data_wp, label_data_np, label_data_wp,train_portion=0.7, test_portion = 0.1)
# y_test = np.transpose(y_test)
# y_train = np.transpose(y_train)
# y_valid = np.transpose(y_valid)

print(y_train.shape)
print(x_train.shape)
print(y_valid.shape)
print(x_valid.shape)
print(y_test.shape)
print(x_test.shape)

# *k-fold Split*

In [None]:
# K-fold Cross Validation
num_folds = 5
# Define per-fold score containers
acc_per_fold = []
loss_per_fold = []

kfold = KFold(n_splits = num_folds, shuffle = True)
inputs  = np.concatenate((data_np, data_wp),axis = 0)
targets = np.concatenate((label_data_np, label_data_wp), axis =0)
#inputs = np.expand_dims(inputs, axis=-1)
#targets = np.expand_dims(targets, axis=-1)
print(inputs.shape)
print(targets.shape)

# Model Definition

In [None]:
# import libraries to def model
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM, GRU, Bidirectional
import tensorflow as tf

# Apply necessay changes to all the model architetures

# Example models
# Create GRU Model
def create_gru_model(units, input_shape, num_classes):
    model = Sequential()
    model.add(GRU(units=units, input_shape=input_shape, return_sequences=True))
    model.add(GRU(units=units, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(100, activation='tanh'))
    model.add(Dense(num_classes-1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy',
                  optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
                  metrics=['accuracy'])
    return model

# Create LSTM Model
def create_lstm_model(units, input_shape, num_classes):
    model = Sequential()
    model.add(LSTM(units=units, input_shape=input_shape, return_sequences=True))
    model.add(LSTM(units=units, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(100, activation='tanh'))
    model.add(Dense(num_classes-1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy',
                  optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
                  metrics=['accuracy'])
    return model

# Create BiLSTM Model
def create_bilstm_model(units, input_shape, num_classes):
    model = Sequential()
    model.add(Bidirectional(LSTM(units=units, return_sequences=True), input_shape=input_shape))
    model.add(Bidirectional(LSTM(units=units, return_sequences=False)))
    model.add(Dropout(0.2))
    model.add(Dense(100, activation='tanh'))
    model.add(Dense(num_classes-1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy',
                  optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
                  metrics=['accuracy'])
    return model

# Choose which model to use based on user input
def select_model(model_type, units, input_shape, num_classes):
    if model_type == 'gru':
        return create_gru_model(units, input_shape, num_classes)
    elif model_type == 'lstm':
        return create_lstm_model(units, input_shape, num_classes)
    elif model_type == 'bilstm':
        return create_bilstm_model(units, input_shape, num_classes)
    else:
        raise ValueError("Invalid model type selected. Choose 'gru', 'lstm', or 'bilstm'.")

# User input to select model type
model_type = input("Enter model type (gru, lstm, bilstm): ").strip().lower()

# Define input shape and other parameters
units = 32
num_classes = 2
input_shape = (x_train.shape[1], x_train.shape[2])

# Create and summarize the selected model
model = select_model(model_type, units, input_shape, num_classes)
model.summary()


In [None]:
# Compute the Number of iterations
batch_size = 128
epochs = 20

from math import ceil
print("Shape of training data is: ", x_train.shape)

iterations = x_train.shape[0]/batch_size
print("Number of iterations per epoch = ", ceil(iterations))

In [None]:
# Model fit function and saving the trained model with a file name (model_type)
def fit_model(model):

    model.summary()

    file_name = f'{model_type}_model'

    callbacks = [
        tf.keras.callbacks.ModelCheckpoint(
            "{name}.keras".format(name=file_name), save_best_only=True, monitor="val_loss"),
        tf.keras.callbacks.ReduceLROnPlateau(
            monitor="val_loss", factor=0.5, patience=20, min_lr=0.0001),
        tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=50, verbose=0),
    ]

    # Fit the model with the callbacks
    history = model.fit(x_train, y_train,
                        epochs=epochs,
                        batch_size=batch_size,
                        validation_data=(x_valid, y_valid),
                        callbacks=callbacks)

    return history

tic=timeit.default_timer()
history = fit_model(model)
toc=timeit.default_timer()
print(f'Time taken for Training: {toc-tic} seconds')

#Evaluate Model

In [None]:
# Plot train loss and validation loss
def plot_acc_loss(history):
    plt.subplot(2, 1, 2)
    plt.plot(history.history['accuracy'],'r')
    plt.plot(history.history['val_accuracy'],'b')
    plt.title('Model Accuracy',size=14,fontweight="bold")
    plt.ylabel('Accuracy',fontweight="bold")
    plt.xlabel('epoch',fontweight="bold")
    plt.legend(['Train accuracy', 'Validation accuracy'], loc='lower right')
    plt.grid()

    plt.subplot(2, 1, 1)
    plt.plot(history.history['loss'],'r')
    plt.plot(history.history['val_loss'],'b')
    plt.title('Model Loss',size=14,fontweight="bold")
    plt.ylabel('Loss',fontweight="bold")
    plt.xlabel('epoch',fontweight="bold")
    plt.legend(['Train loss', 'Validation loss'], loc='upper right')
    plt.grid()

# Function to plot and save with model filename
def plot_and_save(history):
    plt.figure(figsize=(6,6))
    plot_acc_loss(history)  # Replace with your plotting function
    plt.tight_layout()

    # Save the plot with the model filename
    plt.savefig(f'{model_type}_acc_loss.png', dpi=300)
    print(f"Plot saved as: {model_type}_acc_loss.png")

plot_and_save(history)

In [None]:
test_loss, test_acc = model.evaluate(x_test,y_test)
print(x_test.shape[0])
print(int(x_test.shape[0]*test_acc))
print(x_test.shape[0]-int(x_test.shape[0]*test_acc))
scores = model.evaluate(x_test,y_test)
print("Accuracy: %.4f%%" % int(scores[1]*100))

In [None]:
prediction = model.predict(x_test)
pred_thr = np.where(prediction[:,0] >0.5, 1,0)
pred_thr = np.expand_dims(pred_thr, axis=-1)
# print(pred_thr)
print(pred_thr.shape)
count = np.array(pred_thr)
unique, counts = np.unique(count, return_counts=True)
dict(zip(unique, counts))

# *Performance Evaluation Metrics*

In [None]:
from sklearn.metrics import confusion_matrix
bin_pred = prediction.round()
cm = confusion_matrix(y_test, bin_pred)
print(cm)
from sklearn.metrics import ConfusionMatrixDisplay
cmd = ConfusionMatrixDisplay(cm, display_labels=['RWoP','RWP'])
cmd.plot(values_format='.4g')

In [None]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import f1_score
import math

MSE= mean_squared_error(y_test, bin_pred)

RMSE = math.sqrt(MSE)
print("Root Mean Square Error: {:.2f}%".format(RMSE * 100))

f1score = f1_score(y_test, bin_pred)
print('F1-Score: {:.2f}%'.format(f1score * 100))

In [None]:
# from confusion matrix calculate Performance Evaluation Metrics

# Calculate total
total = sum(sum(cm))

# Calculate metrics
ppv = cm[1, 1] / (cm[1, 1] + cm[0, 1]) * 100
npv = cm[0, 0] / (cm[0, 0] + cm[1, 0]) * 100
sensitivity = cm[1, 1] / (cm[1, 1] + cm[1, 0]) * 100
specificity = cm[0, 0] / (cm[0, 0] + cm[0, 1]) * 100
accuracy = (cm[1, 1] + cm[0, 0]) / total * 100

# Print results rounded to two decimal places
print('PPV (Positive Predictive Value): {:.2f}%'.format(ppv))
print('NPV (Negative Predictive Value): {:.2f}%'.format(npv))
print('Sensitivity (Recall): {:.2f}%'.format(sensitivity))
print('Specificity: {:.2f}%'.format(specificity))
print('Accuracy: {:.2f}%'.format(accuracy))


#Cross-Validation

In [None]:
fold_no = 1
model.summary()
for train, test in kfold.split(inputs, targets):

  print('------------------------------------------------------------------------')
  print(f'Training for fold {fold_no} ...')

  file_name = f'{model_type}_model_cross-valid'

  callbacks = [
      tf.keras.callbacks.ModelCheckpoint(
          "{name}.keras".format(name=file_name), save_best_only=True, monitor="val_loss"),
      tf.keras.callbacks.ReduceLROnPlateau(
          monitor="val_loss", factor=0.5, patience=20, min_lr=0.0001),
      tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=50, verbose=0),
  ]

  history = model.fit(
          inputs[train],
          targets[train],
          batch_size=batch_size,
          epochs=2,
          validation_data=(inputs[test], targets[test]),
          callbacks = callbacks
        )
  # Generate generalization metrics
  scores = model.evaluate(inputs[test], targets[test], verbose=0)
  print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')
  acc_per_fold.append(scores[1] * 100)
  loss_per_fold.append(scores[0])

  # Increase fold number
  fold_no = fold_no + 1

# == Provide average scores ==
print('------------------------------------------------------------------------')
print('Score per fold')
for i in range(0, len(acc_per_fold)):
  print('------------------------------------------------------------------------')
  print(f'> Fold {i+1} - Loss: {loss_per_fold[i]} - Accuracy: {acc_per_fold[i]}%')
print('------------------------------------------------------------------------')
print('Average scores for all folds:')
print(f'> Accuracy: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
print(f'> Loss: {np.mean(loss_per_fold)}')
print('------------------------------------------------------------------------')

### **The End**
