<a href="https://colab.research.google.com/github/Rushp0/Electromyography-and-Gradient-Boosting/blob/main/EMG_Dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import Libraries

In [1]:
# data handeling libraries
import pandas as pd
import jax
import jax.numpy as jnp
import os

# Model
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor

# Display plots
import matplotlib.pyplot as plt

# Preprocessing
from sklearn import preprocessing
import sklearn

# Extract EMG Data

In [2]:
! unzip '/content/EMG Physical Action Data Set.zip'
! rm '/content/EMG Physical Action Data Set/readme.txt'

Archive:  /content/EMG Physical Action Data Set.zip
replace __MACOSX/._EMG Physical Action Data Set? [y]es, [n]o, [A]ll, [N]one, [r]ename: N
  inflating: EMG Physical Action Data Set/readme.txt  


# Put data into DataFrame

In [87]:
# create data frames
data = pd.DataFrame()
tempData = pd.DataFrame()
frames = []
y_size = 0
x_size = 0

# walk through all files and add to data frames
for path, directories, files in os.walk('/content/EMG Physical Action Data Set'):
    files = [ fi for fi in files if not fi.endswith(".log") ]
    for file in files:
      p = path+"/"+file
      tempData = pd.read_csv(path+"/"+file, sep="\t", encoding='cp1252')
      tempData.columns = ["ch1", "ch2", "ch3", "ch4", "ch5", "ch6", "ch7", "ch8"]
      tempData['y'] = os.path.dirname(p).split("/")[-2]
      frames.append(tempData)


# combines all data frames
data = pd.concat(frames)
data = data.dropna()
data

Unnamed: 0,ch1,ch2,ch3,ch4,ch5,ch6,ch7,ch8,y
0,85.0,-830.0,-541.0,709.0,-209.0,58.0,-270.0,556.0,Aggressive
1,52.0,-989.0,-604.0,720.0,-321.0,35.0,-248.0,550.0,Aggressive
2,-88.0,-903.0,-617.0,583.0,-580.0,-2.0,-216.0,555.0,Aggressive
3,-158.0,-1003.0,-623.0,689.0,-621.0,-7.0,-207.0,564.0,Aggressive
4,-186.0,-946.0,-466.0,626.0,-866.0,-45.0,-156.0,499.0,Aggressive
...,...,...,...,...,...,...,...,...,...
9994,50.0,18.0,-20.0,-54.0,111.0,34.0,-2180.0,66.0,Normal
9995,74.0,32.0,-76.0,-57.0,106.0,36.0,-2015.0,86.0,Normal
9996,55.0,20.0,6.0,-68.0,102.0,32.0,-1877.0,84.0,Normal
9997,13.0,-2.0,49.0,-74.0,101.0,23.0,-1781.0,91.0,Normal


# Split Data

In [111]:
# Preprocessing
y = data['y']
y = pd.DataFrame(y.drop(index=9204, axis=0))

X = data[["ch1", "ch2", "ch3", "ch4", "ch5", "ch6", "ch7", "ch8"]]
X = X.drop(index=9204, axis=0)

# LabelBinarizer
le = preprocessing.LabelEncoder()
le = le.fit(data['y'])
data['y'] = le.fit_transform(data['y'])


X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, test_size=0.33, random_state=42)

y_train

Unnamed: 0,y
145,1
8904,0
6375,0
2687,1
1267,0
...,...
9503,0
1894,1
2268,1
2577,0


# Gradient Boosting Class

In [112]:
class GradientBoostingClassifier():

  # constructor function
  def __init__(self, n_estimators, learning_rate):
    self.n_estimators = n_estimators
    self.learning_rate = learning_rate
    self.max_depth = 2

    self.trees = []
    for i in range(n_estimators):
      self.trees.append(DecisionTreeRegressor(max_depth=self.max_depth))
  
  def _sigmoid(self, z):
    return jnp.exp(z) / (1+jnp.exp(z))

  # fit function
  """
  X -> Training data
  y - > DataFrame. Predictions. In this case, 0 or 1
  """
  def fit(self, X, y):
    y_hat = jnp.full(jnp.shape(y), 0.5)
    y = jnp.reshape(jnp.array(y["y"]), (-1,1))
    # i = 0
    for wl in range(self.n_estimators):
      
      # calculate residuals
      residuals = y - y_hat
      
      # fit tree to residual
      self.trees[wl].fit(X, residuals)

      # get new weak learner predictions
      new_pred = self.trees[wl].predict(X)
      new_pred = jnp.reshape(new_pred, (-1,1))
      y_hat += new_pred*self.learning_rate
      # print(y_hat)

  def predict(self, X):
    # make predictions
    i = 0
    preds = 0
    for tree in self.trees:
      y_hat = tree.predict(X)
      preds += y_hat
    preds = self._sigmoid(preds)
    preds = preds.tolist()
    for i in range(len(preds)):
      if preds[i] < 0.5:
        preds[i] = 0
      else:
        preds[i] = 1
    return preds

In [94]:
gbs = GradientBoostingClassifier(50, 0.1)
gbs.fit(X_train, y_train)

y_train["pred"] = gbs.predict(X_train)
y_train

Unnamed: 0,y,pred
145,1,1
8904,0,0
6375,0,1
2687,1,1
1267,0,0
...,...,...
9503,0,0
1894,1,0
2268,1,1
2577,0,1


# Performance Results

In [113]:
gbs = GradientBoostingClassifier(100, 0.1)
gbs.fit(X_test, y_test)
output = y_test

output["pred"] = gbs.predict(X_test)
output

Unnamed: 0,y,pred
4521,0,0
4286,1,0
7191,0,1
9258,0,0
848,1,1
...,...,...
7180,1,1
6475,0,0
89,0,0
6826,0,0


In [115]:
acc = sklearn.metrics.accuracy_score(output["y"], output["pred"])
print("The accuracy is: ", acc)

The accuracy is:  0.8247655825373719
