In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import tensorflow as tf
import pandas as pd

def create_sequences(X, y, seq_length, points_in_future):
    X_seq = []
    y_seq = []
    for i in range(seq_length, len(X) - points_in_future + 1):
        X_seq.append(X[i-seq_length:i])
        y_seq.append(y[i+points_in_future-1])
    return np.array(X_seq), np.array(y_seq)

def run_pipeline(df, feature_cols, target_col, seq_length, points_in_future, task, model_type):
    assert task in ['classification', 'regression'], "Task must be either 'classification' or 'regression'"
    assert model_type in ['LSTM', 'TCN'], "Model type must be either 'LSTM' or 'TCN'"
    
    # Preprocessing
    target = df[[target_col]]
    features = df[feature_cols]
    feature_scaler = StandardScaler()
    target_scaler = MinMaxScaler()
    scaled_features = feature_scaler.fit_transform(features)
    scaled_target = target_scaler.fit_transform(target)
    features_train, features_test, target_train, target_test = train_test_split(scaled_features, scaled_target, test_size=0.2, random_state=42)
    
    # Create Sequences
    trainX, trainY = create_sequences(features_train, target_train, seq_length, points_in_future)
    testX, testY = create_sequences(features_test, target_test, seq_length, points_in_future)
    
    # Model definition
    if model_type == 'LSTM':
        model = Sequential()
        model.add(LSTM(64, activation='relu', return_sequences=False, input_shape=(trainX.shape[1], trainX.shape[2])))
        model.add(Dropout(0.2))
        if task == 'classification':
            model.add(Dense(4, activation='softmax'))
            model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
        else:
            model.add(Dense(1))
            model.compile(optimizer='adam', loss='mean_absolute_error', metrics=[tf.keras.metrics.MeanAbsoluteError()])
            
    elif model_type == 'TCN':
        model = Sequential()
        model.add(TCN(64, activation='relu', return_sequences=False, input_shape=(trainX.shape[1], trainX.shape[2])))
        model.add(Dropout(0.2))
        if task == 'classification':
            model.add(Dense(4, activation='softmax'))
            model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
        else:
            model.add(Dense(1))
            model.compile(optimizer='adam', loss='mean_absolute_error', metrics=[tf.keras.metrics.MeanAbsoluteError()])
    
    # Train model
    model.fit(trainX, trainY, epochs=5, batch_size=4000, validation_data=(testX, testY))
    
    return model
df=pd.read_csv('CDPCAF.csv')
#df.drop(['Unnamed: 0.2', 'Unnamed: 0', 'Unnamed: 0.1'], axis=1, inplace=True)
df.rename(columns={'0': 'pca_0', '1': 'pca_1', '2': 'pca_2', '3': 'pca_3', '4': 'pca_4','5':'pca_5'}, inplace=True)
df['labels'] = df[['Still', 'Walk', 'Run', 'Cycle']].idxmax(axis=1)
# Replace the column names with desired integer labels
label_mapping = {'Still': 0, 'Walk': 1, 'Run': 2, 'Cycle': 3}
df.replace({'labels': label_mapping}, inplace=True)
feature_cols = ['pca_0', 'pca_1', 'pca_2', 'pca_3', 'pca_4', 'pca_5']
target_col = 'SPM/RPM'
seq_length = 200
points_in_future = 1
task = 'regression'
model_type = 'LSTM'
model = run_pipeline(df, feature_cols, target_col, seq_length, points_in_future, task, model_type)

In [89]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
import pandas as pd
from tcn import TCN
import random
import os

def create_sequences(X, y, seq_length, points_in_future):
    X_seq = []
    y_seq = []
    for i in range(seq_length, len(X) - points_in_future + 1):
        X_seq.append(X[i-seq_length:i])
        y_seq.append(y[i+points_in_future-1])
    return np.array(X_seq), np.array(y_seq)

def run_pipeline(df, feature_cols, target_col, seq_length, points_in_future, task, model_type, lr, batch, epoch, regression_specific=''):
    assert task in ['classification', 'regression'], "Task must be either 'classification' or 'regression'"
    assert model_type in ['LSTM', 'TCN'], "Model type must be either 'LSTM' or 'TCN'"
    
    # Preprocessing
    target = df[[target_col]]
    features = df[feature_cols]
    feature_scaler = StandardScaler()
    target_scaler = MinMaxScaler()
    #if task == 'regression':
    #    scaled_target = target_scaler.fit_transform(target)
    #else:
    scaled_target = np.array(target)
    scaled_features = feature_scaler.fit_transform(features)


    # Find training end index
    train_size = int(len(scaled_target) * 0.8)
    features_train, features_test = scaled_features[:train_size], scaled_features[train_size+seq_length:]
    target_train, target_test = scaled_target[:train_size], scaled_target[train_size+seq_length:] 

    # Create Sequences
    trainX, trainY = create_sequences(features_train, target_train, seq_length, points_in_future)
    testX, testY = create_sequences(features_test, target_test, seq_length, points_in_future)

    train_data = list(zip(trainX, trainY))
    random.shuffle(train_data)
    trainX, trainY = zip(*train_data)
    trainX, trainY = np.array(trainX), np.array(trainY)

    test_data = list(zip(testX, testY))
    random.shuffle(test_data)
    testX, testY = zip(*test_data)
    testX, testY = np.array(testX), np.array(testY)
    
    # Instantiate Adam
    opt = Adam(learning_rate=lr)

    if regression_specific == 'run':
        global run_train, tf_run_labels_train, run_test, tf_run_labels_test
        trainX, trainY = run_train, tf_run_labels_train
        testX, testY = run_test, tf_run_labels_test
    if regression_specific == 'walk':
        global walk_train, tf_walk_labels_train, walk_test, tf_walk_labels_test
        trainX, trainY = walk_train, tf_walk_labels_train
        testX, testY = walk_test, tf_walk_labels_test
    if regression_specific == 'bike':
        global bike_train, tf_bike_labels_train, bike_test, tf_bike_labels_test
        trainX, trainY = bike_train, tf_bike_labels_train
        testX, testY = bike_test, tf_bike_labels_test

    # Model definition
    if model_type == 'LSTM':
        model = Sequential()
        model.add(LSTM(64, activation='relu', return_sequences=False, input_shape=(trainX.shape[1], trainX.shape[2])))
        model.add(Dropout(0.2))
        if task == 'classification':
            model.add(Dense(4, activation='softmax'))
            model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
        else:
            model.add(Dense(1))
            model.compile(optimizer=opt, loss='mean_absolute_error', metrics=[tf.keras.metrics.MeanAbsoluteError()])
            
    elif model_type == 'TCN':
        model = Sequential()
        model.add(TCN(64, activation='relu', return_sequences=False, input_shape=(trainX.shape[1], trainX.shape[2])))
        model.add(Dropout(0.2))
        if task == 'classification':
            model.add(Dense(4, activation='softmax'))
            model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
        else:
            model.add(Dense(1))
            model.compile(optimizer=opt, loss='mean_absolute_error', metrics=['mean_absolute_error'])
    
    # Train model
    model.fit(trainX, trainY, epochs=epoch, batch_size=batch, validation_data=(testX, testY))

    return model
df=pd.read_csv('CDPCAF.csv')
#df.drop(['Unnamed: 0.2', 'Unnamed: 0', 'Unnamed: 0.1'], axis=1, inplace=True)
df.rename(columns={'0': 'pca_0', '1': 'pca_1', '2': 'pca_2', '3': 'pca_3', '4': 'pca_4','5':'pca_5'}, inplace=True)
df['labels'] = df[['Still', 'Walk', 'Run', 'Cycle']].idxmax(axis=1)
# Replace the column names with desired integer labels
label_mapping = {'Still': 0, 'Walk': 1, 'Run': 2, 'Cycle': 3}
df.replace({'labels': label_mapping}, inplace=True)
feature_cols = ['pca_0', 'pca_1', 'pca_2', 'pca_3', 'pca_4', 'pca_5']
target_col = 'SPM/RPM'
seq_length = 200
points_in_future = 1
task = 'regression'
model_type = 'LSTM'
print(df['labels'])
#model = run_pipeline(df, feature_cols, target_col, seq_length, points_in_future, task, model_type)

0         3
1         3
2         3
3         3
4         3
         ..
144770    3
144771    3
144772    3
144773    3
144774    3
Name: labels, Length: 144775, dtype: int64


In [90]:

def grid_search_pipeline(task, model_type, target_col, epochs=2, learning_rates = [0.01, 0.005, 0.002, 0.001], regression_specific=''):
    #epochs = [i+1 for i in range(epochs)]
    epochs = [epochs]
    histories = {}
    if task == 'classification':
        error = 'accuracy'
        error2 = 'val_accuracy'
    else:
        error = 'mean_absolute_error' 
        error2 = 'val_loss'

    for epoch in epochs:
        for lr in learning_rates:
            model = run_pipeline(df, feature_cols, target_col, seq_length, points_in_future, task, model_type, lr, 400, epoch, regression_specific) 
            histories[f'Training Accuracy {epoch}, {lr}'] = {
                    'Epochs': epoch,
                    'LR' : lr,
                    'Training Error': model.history.history[error],
                    'Test Error': model.history.history[error2]
                }
            
    learning_params = [[],[],[],[],[],[],[],[]]
    epoch = [[],[],[],[],[],[],[],[],[],[]]
    for key, value in histories.items():
        epoch_key = value['Epochs']
        epoch[epochs.index(epoch_key)].append(value['Test Error'])
        lr_key = value['LR']
        learning_params[learning_rates.index(lr_key)].append(value['Test Error'])    

    for i in range(10):
        print('Epochs '+str(i+1)+' Max: '+str(min(epoch[i])))
    for j in range(8):
        print('LR '+str(learning_rates[j])+' Max: '+str(min(learning_params[j])))    
            

In [None]:
task = 'regression'
model_type = 'TCN'
target_col = 'SPM/RPM'
grid_search_pipeline(task, model_type, target_col, epochs=2, learning_rates=[0.01, 0.005, 0.003, 0.002, 0.001, 0.0005])
#lr = 0.002, epochs = 1

In [None]:
task = 'classification'
model_type = 'TCN'
target_col = 'labels'
grid_search_pipeline(task, model_type, target_col, learning_rates=[0.03, 0.02, 0.01, 0.005])
#lr = 0.01, epochs = 1

In [None]:
task = 'regression'
model_type = 'LSTM'
target_col = 'SPM/RPM'
grid_search_pipeline(task, model_type, target_col, learning_rates=[0.00001, 0.000001])

In [None]:
task = 'classification'
model_type = 'LSTM'
target_col = 'labels'
grid_search_pipeline(task, model_type, target_col, learning_rates=[0.005, 0.001, 0.0005])

In [None]:
window = 200
slide = 0.1
train_test = 0.8
def make_windows_PCA(window, slide, frame, train_test):
    Viable_Cols = ['0', '1', '2', '3', '4', '5']
    Viable_Cols2 = ['0_max_freq', '1_max_freq', '2_max_freq', '3_max_freq', '4_max_freq', '5_max_freq', ]
    jump = window*slide
    train_samples = []
    sparse_train = []
    test_samples = []
    sparse_test = []
    labels_train = []
    labels_test = []
    if 'SPM/RPM' in frame.columns:
        label = 'SPM/RPM'
    if 'RPM/SPM' in frame.columns:
        label = 'RPM/SPM'
    if 'RPM' in frame.columns:
        label = 'RPM'
    if 'SPM' in frame.columns:
        label = 'SPM'
    for i in range(int(len(frame['0'])/(window*slide))-int((1/slide))):
        sample = frame.loc[(i*jump)+1:(i*jump)+(window), Viable_Cols]
        sample2 = []
        freqs = frame.loc[(i*jump)+200, Viable_Cols2]
        sample2.append(freqs.tolist())
        mins = [np.min(sample[col]) for col in Viable_Cols]
        maxes = [np.max(sample[col]) for col in Viable_Cols]
        stdev = [np.std(sample[col], axis=0) for col in Viable_Cols]
        means = [np.mean(sample[col]) for col in Viable_Cols]
        sample2.append(mins)
        sample2.append(maxes)
        sample2.append(stdev)
        sample2.append(means)
        yval = frame[label][(i*jump)+199]
        if i*jump <= (len(frame['0'])*train_test):
            train_samples.append(sample)
            sparse_train.append(sample2)
            labels_train.append(yval)
        else:
            test_samples.append(sample)
            sparse_test.append(sample2)
            labels_test.append(yval)
    labels_test = labels_test[9:]
    test_samples = test_samples[9:]
    sparse_test = sparse_test[9:]
    return train_samples, sparse_train, test_samples, sparse_test, labels_train, labels_test

def reshaper(frame):
    data_2d = frame.reshape(-1, frame.shape[-1])
    scaler = StandardScaler()
    whitened_data_2d = scaler.fit_transform(data_2d)
    whitened_frame = whitened_data_2d.reshape(frame.shape)
    return whitened_frame

files = os.listdir('./PCA Fourier/')
bike_train_samples, bike_sparse_train, bike_test_samples, bike_sparse_test, bike_labels_train, bike_labels_test = [],[],[],[],[],[]
walk_train_samples, walk_sparse_train, walk_test_samples, walk_sparse_test, walk_labels_train, walk_labels_test = [],[],[],[],[],[]
run_train_samples, run_sparse_train, run_test_samples, run_sparse_test, run_labels_train, run_labels_test = [],[],[],[],[],[]

Biking_Files = [0,1,2,16,17]
for i in Biking_Files:
    print('A')
    frame = pd.read_csv('./PCA Fourier/'+str(files[i]))
    train_samples, sparse_train, test_samples, sparse_test, labels_train, labels_test = make_windows_PCA(window, slide, frame, train_test)
    bike_train_samples.extend(train_samples)
    bike_sparse_train.extend(sparse_train)
    bike_test_samples.extend(test_samples)
    bike_sparse_test.extend(sparse_test)
    bike_labels_train.extend(labels_train)
    bike_labels_test.extend(labels_test)
tf_bike_sparse_train = np.array(bike_sparse_train)
tf_bike_labels_train = np.array(bike_labels_train)
tf_bike_sparse_test = np.array(bike_sparse_test)
tf_bike_labels_test = np.array(bike_labels_test)

whitened_bike_sparse_train = reshaper(tf_bike_sparse_train)
bike_train = reshaper(np.array(bike_train_samples))
whitened_bike_sparse_test = reshaper(tf_bike_sparse_test)
bike_test = reshaper(np.array(bike_test_samples))

Walking_Files = [10,11,12,13,14,18,20,21]
for i in Walking_Files:
    print('B')
    frame = pd.read_csv('./PCA Fourier/'+str(files[i]))
    train_samples, sparse_train, test_samples, sparse_test, labels_train, labels_test = make_windows_PCA(window, slide, frame, train_test)
    walk_train_samples.extend(train_samples)
    walk_sparse_train.extend(sparse_train)
    walk_test_samples.extend(test_samples)
    walk_sparse_test.extend(sparse_test)
    walk_labels_train.extend(labels_train)
    walk_labels_test.extend(labels_test)
tf_walk_sparse_train = np.array(walk_sparse_train)
tf_walk_labels_train = np.array(walk_labels_train)
tf_walk_sparse_test = np.array(walk_sparse_test)
tf_walk_labels_test = np.array(walk_labels_test)

whitened_walk_sparse_train = reshaper(tf_walk_sparse_train)
walk_train = reshaper(np.array(walk_train_samples))
whitened_walk_sparse_test = reshaper(tf_walk_sparse_test)
walk_test = reshaper(np.array(walk_test_samples))

Running_Files = [4,5,6,7]
for i in Running_Files:
    print('C')
    frame = pd.read_csv('./PCA Fourier/'+str(files[i]))
    train_samples, sparse_train, test_samples, sparse_test, labels_train, labels_test = make_windows_PCA(window, slide, frame, train_test)
    run_train_samples.extend(train_samples)
    run_sparse_train.extend(sparse_train)
    run_test_samples.extend(test_samples)
    run_sparse_test.extend(sparse_test)
    run_labels_train.extend(labels_train)
    run_labels_test.extend(labels_test)
tf_run_sparse_train = np.array(run_sparse_train)
tf_run_labels_train = np.array(run_labels_train)
tf_run_sparse_test = np.array(run_sparse_test)
tf_run_labels_test = np.array(run_labels_test)

whitened_run_sparse_train = reshaper(tf_run_sparse_train)
run_train = reshaper(np.array(run_train_samples))
whitened_run_sparse_test = reshaper(tf_run_sparse_test)
run_test = reshaper(np.array(run_test_samples))

In [87]:
mae=0
print(len(whitened_run_sparse_test))
for i in range(len(whitened_run_sparse_test)):
    guess = np.mean(whitened_run_sparse_test[i][0])*60
    real = tf_run_labels_test[i]
    mae += np.abs(real-guess)
avgMae = mae/len(whitened_run_sparse_test)
print("Run MAE: "+str(avgMae))

mae=0
print(len(whitened_walk_sparse_test))
for i in range(len(whitened_walk_sparse_test)):
    guess = np.mean(whitened_walk_sparse_test[i][0])*60
    real = tf_walk_labels_test[i]
    mae += np.abs(real-guess)
avgMae2 = mae/len(whitened_walk_sparse_test)
print("Walk MAE: "+str(avgMae2))

mae=0
print(len(whitened_bike_sparse_test))
for i in range(len(whitened_bike_sparse_test)-1):
    guess = np.mean(whitened_bike_sparse_test[i][0])*60
    real = tf_bike_labels_test[i]
    mae += np.abs(real-guess)
avgMae3 = mae/len(whitened_bike_sparse_test)
print("Bike MAE: "+str(avgMae3))

net=0
count = 0
df=pd.read_csv('CDPCAF.csv')
length = len(df)
start = int(length*0.8)+200
i = start
while i < len(df):
    diff = np.abs(np.mean([df[f'{j}_max_freq'][i] for j in range(6)])*60 - df['SPM/RPM'][i])
    net += diff
    count+=1
    i += 20
print("Full MAE: "+str(net/count))
print(count)

168
Run MAE: 95.81689279130396
364
Walk MAE: 21.215320861860505
131
Bike MAE: 20.013124810009042
Full MAE: 11.75930171963159
1438
