## Imports

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import glob 
import numpy as np
import random
import sklearn.metrics
from collections import Counter
from sklearn.preprocessing import RobustScaler
from scipy import stats
from sklearn import preprocessing
from sklearn.preprocessing import OneHotEncoder
import tensorflow as tf
import matplotlib.pyplot as plt
from keras.models import Sequential, load_model
from keras.layers import Dense, Activation, Embedding, Dropout, TimeDistributed
from keras.layers import LSTM, LSTM, SimpleRNN, GRU, Bidirectional, Conv2D, Reshape
from keras.optimizers import Adam, SGD
from keras.utils import to_categorical
from keras.callbacks import ModelCheckpoint
from imblearn.under_sampling import RandomUnderSampler
from statsmodels import robust
from scipy import  stats
from scipy.stats import entropy
from sklearn.model_selection import train_test_split

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

## Read the Data Files

In [None]:
def upsample_label(temp_x, temp_y):
    y_time_list = temp_y['time'].values
    y_label = temp_y['label'].values
    jdx = 0
    label_list = []
    for index,row in temp_x.iterrows():
        try:
            if row['time'] > y_time_list[jdx]:
                jdx+=1
            label_list.append(y_label[jdx])
        except:
            label_list.append(0)
    return label_list

In [None]:
dir_path = 'drive/MyDrive/TrainingData/'
column_list = ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z','subject', 'time', 'label']
df_data = pd.DataFrame([], columns = column_list)

for idx in glob.glob(dir_path + '*.csv'):
    df_type = idx.split('.')[0].split('__')[1]
    if df_type == 'x':
        subject_name = idx.split('.')[0].split('__')[0].split('/')[-1]
        x_path = idx.split('.')[0].split('__')[0] + '__' + df_type + '.csv'
        x_time_path = idx.split('.')[0].split('__')[0] + '__' + 'x_time' + '.csv'
        y_path = idx.split('.')[0].split('__')[0] + '__' + 'y' + '.csv'
        y_time_path = idx.split('.')[0].split('__')[0] + '__' + 'y_time' + '.csv'
        
        df_x = pd.read_csv(x_path)
        
        sub_name = [subject_name]*df_x.shape[0]
        df_x['subject'] = sub_name
        
        df_x_time = pd.read_csv(x_time_path)
        df_x['time'] = df_x_time
        
        df_y = pd.read_csv(y_path)
        sub_name = [subject_name]*df_y.shape[0]
        df_y['subject'] = sub_name
        
        df_y_time = pd.read_csv(y_time_path)
        df_y['time'] = df_y_time
        df_y.columns = ['label', 'subject', 'time']
        label_list = upsample_label(df_x, df_y)
        df_x['label'] = label_list
        df_x.columns = column_list
        df_data = pd.concat([df_data, df_x], ignore_index=True)

## Scaling Input Features

In [None]:
scale_columns = df_data.columns[:6]

In [None]:
scaler = RobustScaler()

scaler = scaler.fit(df_data[scale_columns])

df_data.loc[:, scale_columns] = scaler.transform(
  df_data[scale_columns].to_numpy()
)

In [None]:
def create_windows(X, y, time_steps=1, step=1):
    Xs, ys = [], []
      
    for i in range(0, len(X) - time_steps, step):
        v = X.iloc[i:(i + time_steps)].values
        labels = y.iloc[i: i + time_steps]
        Xs.append(v)
        ys.append(stats.mode(labels)[0][0])
    return np.array(Xs), np.array(ys).reshape(-1, 1)

In [None]:
def create_test_windows(X, time_steps=1, step=1):
  Xs = []
  for i in range(0, len(X) - time_steps, step):
    v = X.iloc[i:(i + time_steps)].values
    Xs.append(v)
  return np.array(Xs)

In [None]:
TIME_STEPS = 40  # Window Size
STEP = 1

X_data, y_data = create_windows(X=df_data[scale_columns], y=df_data.label, time_steps=TIME_STEPS,step=STEP)

X_data = np.expand_dims(X_data, axis=-1)

## Performing Undersampling

In [None]:
df_label = pd.DataFrame(y_data, columns=['label'])

In [None]:
df_label['index_'] = df_label.index

In [None]:
## Library for performing undersampling 
rus = RandomUnderSampler(sampling_strategy='not minority', random_state=1)
df_balanced, balanced_labels = rus.fit_resample(df_label, df_label['label'])
df_balanced = pd.DataFrame(df_balanced, columns=['label', 'index_'])



In [None]:
X_data = X_data[df_balanced['index_'].values]
y_data = y_data[df_balanced['index_'].values]

In [None]:
val_size = 0.1  # validation data size
X_train, X_val, y_train, y_val = train_test_split(X_data, y_data, test_size=val_size)

In [None]:
## Converting label to OneHot Encoding
enc = OneHotEncoder(handle_unknown='ignore', sparse=False)
enc = enc.fit(y_train)
y_train = enc.transform(y_train)
y_val = enc.transform(y_val)

## Model Defination 

In [None]:
input_shape = (X_train.shape[1], X_train.shape[2], 1)
output_dim = 4
lr = 0.001

model = Sequential()
model.add(Conv2D(128, kernel_size=(5, 1), input_shape=input_shape))
model.add(Activation("relu"))

model.add(Conv2D(128, kernel_size=(5, 1)))
model.add(Activation("relu"))

model.add(Conv2D(128, kernel_size=(5, 1)))
model.add(Activation("relu"))

model.add(Conv2D(128, kernel_size=(5, 1)))
model.add(Activation("relu"))

model.add(Reshape((24, 6*128)))

layer = LSTM(128, activation="tanh", return_sequences=True)
model.add(layer)
#model.add(LSTM(128, activation="tanh", return_sequences=True))

model.add(Dropout(0.5, seed=0))
model.add(LSTM(256, activation="tanh"))
t
model.add(Dropout(0.5, seed=1))
model.add(Dense(output_dim))

model.add(Activation("softmax"))

opt = tf.keras.optimizers.Adam(
    learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=False,
    name='Adam'
)
model.compile(
    loss="categorical_crossentropy", optimizer= opt, metrics=["acc"]
)

In [None]:
X_train = np.asarray(X_train).astype('float32')
X_val = np.asarray(X_val).astype('float32')

## Model Training

In [None]:
hist = model.fit(
    X_train, y_train,
    validation_data = (X_val, y_val),
    epochs=1,
    batch_size=64,
    shuffle=True
)

## Evaluation result on val set 

In [None]:
y_pred = model.predict(X_val)
y_pred = np.argmax(y_pred, axis = 1)
y_val = np.argmax(y_val, axis=1)

In [None]:
acc_score = sklearn.metrics.accuracy_score(y_val, y_pred)
f1_score = sklearn.metrics.f1_score(y_val, y_pred, average='macro')
recall = sklearn.metrics.recall_score(y_val, y_pred, average='macro')
precision = sklearn.metrics.precision_score(y_val, y_pred, average='macro')

In [None]:
## Results on validation set 
print(f'accuracy --> {acc_score}', f'f1_score --> {f1_score}', f'recall --> {recall}', f'precision --> {precision}')

accuracy --> 0.5877966718195872 f1_score --> 0.5755326159081701 recall --> 0.5863732187880732 precision --> 0.6050176559737992


## Inference on Test Set

In [None]:
from scipy.stats import mode
dir_path = 'drive/MyDrive/TestData/'
column_list = ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z','subject', 'time']
df_test_data = pd.DataFrame([], columns = column_list)

## Reading prediction file 
for idx in glob.glob(dir_path + '*.csv'):
    df_type = idx.split('.')[0].split('__')[1]

    if df_type == 'x':
      subject_name = idx.split('.')[0].split('__')[0].split('/')[-1]
      x_path = idx.split('.')[0].split('__')[0] + '__' + df_type + '.csv'
      x_time_path = idx.split('.')[0].split('__')[0] + '__' + 'x_time' + '.csv'
      y_time_path = idx.split('.')[0].split('__')[0] + '__' + 'y_time' + '.csv'
      df_x = pd.read_csv(x_path, names=column_list[:6])
      df_x = df_x.iloc[1:]
      df_x.loc[:, scale_columns] = scaler.transform(
              df_x[scale_columns].to_numpy()
      )

      ## Converting windows for test data 
      sampled_test = create_test_windows(df_x[scale_columns],TIME_STEPS, STEP )
      print(len(sampled_test))

      ## Making inference on test data 
      sampled_test = np.expand_dims(sampled_test, -1)
      pred_test = model.predict(sampled_test)
      pred_test = np.argmax(pred_test, axis=1)
      pred_test = list(pred_test)
      pred_test = np.asarray(pred_test)
      pred_test = np.concatenate([pred_test, np.asarray([pred_test[-1]]*120)])
      pred_test = pd.DataFrame(pred_test, columns=['label'])

      ## Sampling the prediction to match the test set sampling frequency. 
      df_mode = pred_test['label'].rolling(window=4, min_periods=1).apply(lambda x: mode(x)[0])[::4]

      ## Saving the test set prediction
      y_label = []

      y_time = pd.read_csv(y_time_path).shape[0]
      print(y_time, len(pred_test), len(y_label))
    
      pred_df = pd.DataFrame(df_mode.values)
      pred_df.to_csv('drive/MyDrive/' + subject_name + '__y_prediction.csv', index=False, columns=None)