<a href="https://colab.research.google.com/github/JoeyMucci/GradientBoosting/blob/main/GradientBoostingEMG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import jax 
from jax import grad
import jax.numpy as jnp
import numpy as np
import statistics as stat

root_directory = 'drive/MyDrive/ColabNotebooks/EMG Data/'

filename_to_cat = {
    'Slapping.txt' : 1,
    'Sidekicking.txt' : 2,
    'Pushing.txt' : 3,
    'Punching.txt': 4, 
    'Pulling.txt' : 5,
    'Kneeing.txt' : 6,
    'Headering.txt' : 7,
    'Hamering.txt' : 8, 
    'Frontkicking.txt' : 9,
    'Elbowing.txt' : 10,
    'Waving.txt' : 11,
    'Walking.txt' : 12,
    'Running.txt' : 13,
    'Standing.txt' : 14,
    'Jumping.txt' : 15,
    'Hugging.txt' : 16, 
    'Handshaking.txt' : 17, 
    'Seating.txt' : 18, 
    'Bowing.txt' : 19, 
    'Clapping.txt' : 20
}

x = []
y = []

# Preprocess data, then put into x and y
for files in os.walk(root_directory):
  for file in files[2]: # for each activity file
    x1 = []
    x2 = []
    x3 = []
    f = open(root_directory + file, "r")
    for line in f: # read data initially
      x1.append(line.split())
    for i in range(len(x1)): # convert from strings to positive integers
      for j in range(8):
        x1[i][j] = abs(int(x1[i][j]))
    for i in range(len(x1) // 100): # condense data
      x2.append(max(x1[i * 100 : i * 100 + 99]))
    for i in range(50, len(x2)): # take rolling average of last 50
      filler = []
      for j in range(8):
        filler.append(np.mean(x2[i - 50: i][j]))
      x3.append(filler)
    for i in range(len(x3)): # add the processed data to x and corresponding label to y
      x.append(x3[i])
      y.append(filename_to_cat[file])

xtrain = []
ytrain = []
xtest = []
ytest = []

# Split into test and train sets alternating every row
for i in range(len(x)):
  if i % 2 == 0:
    xtrain.append(x[i])
    ytrain.append(y[i])
  else:
    xtest.append(x[i])
    ytest.append(y[i])

In [2]:
# Returns the accuracy of the model
def accuracy(yhat, y):
  sum = 0
  for i in range(len(y)):
    if yhat[i] == y[i]:
      sum += 1
  return sum / len(y)

# Our choice of loss function that we will be optimizing 
def loss_logistic(yhat, y):
  return jnp.sum(jnp.log(1 + jnp.exp(-2 * y * yhat)))

grad_loss = grad(loss_logistic)  # Jax does the gradient for us!

ymodels = []

# For every activity, create a binary model
for activity in range(1, 21):
  # Build training labels
  ytrainclass = []
  for i in range(len(ytrain)):
    if ytrain[i] == activity:
      ytrainclass.append(1)
    else: ytrainclass.append(-1)

  trainmodel = jnp.zeros(len(ytrain))
  testmodel = jnp.zeros(len(ytest))

  # For every feature, do a series of splits at regular intervals
  for f in range(8):
    error = grad_loss(trainmodel, jnp.array(ytrainclass)) # We will do regression on this

    newtrainmodel = np.zeros(len(ytrain))
    newtestmodel = np.zeros(len(ytest))

    errorsums = np.zeros(40)
    errorcounts = np.zeros(40)

    # Calculate the sum and count so average can be calculated later
    for i in range(len(ytrain)):
      for threshold in range(100, 4100, 100):
        if xtrain[i][f] < threshold:
          errorsums[(threshold // 100) - 1] += error[i]
          errorcounts[(threshold // 100) - 1] += 1
          break

    # Apply updates (average of error) to both train and test
    for i in range(len(ytrain)):
      for threshold in range(100, 4100, 100):
        if xtrain[i][f] < threshold:
          newtrainmodel[i] -= errorsums[(threshold // 100) - 1] / errorcounts[(threshold // 100) - 1]
          break

    for i in range(len(ytest)):
      for threshold in range(100, 4100, 100):
        if xtest[i][f] < threshold:
          newtestmodel[i] -= errorsums[(threshold // 100) - 1] / errorcounts[(threshold // 100) - 1]
          break

    # Update both train and test models
    trainmodel = trainmodel + newtrainmodel
    testmodel = testmodel + newtestmodel
  
  ymodels.append(testmodel) # Add test model to list of models after all training is done

predictions = []

# Predict the activity that produces the highest likelihood of matching
for i in range(len(xtest)):
  maxindex = 0
  maxvalue = ymodels[maxindex][i]
  for j in range(1, 20):
    if(ymodels[j][i] > maxvalue):
      maxvalue = ymodels[j][i]
      maxindex = j
  predictions.append(maxindex + 1)

# How likely the model is to predict the correct class from the 20 possible options
# No skill model will produce ~0.05
print(f"The accuracy of the model is:{accuracy(ytest, predictions) * 100 : .3f}")



The accuracy of the model is: 34.440
