# Importing the Required Libraries

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

import keras
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import MaxPooling1D, Flatten, Dense, Activation, Dropout,LSTM, BatchNormalization,Reshape

from keras.optimizers import Adam
from sklearn.metrics import classification_report

# Preprocessing Data

In [None]:
# Converting the data into X, Y, Z coordinates to feed as Input
def create_dataframe_X(x_file, x_time_file):
    # Read X data file and rename columns
    df_x = pd.read_csv(x_file, sep=',', names=['X_acc', 'Y_acc', 'Z_acc', 'X_gyr', 'Y_gyr', 'Z_gyr'])

    # Read X time file and rename column
    df_time = pd.read_csv(x_time_file, names=['Time stamp'])

    # Concatenate data frames horizontally
    result = pd.concat([df_x, df_time], axis=1)

    return result


# Concatenating the Labels and time steps for Y label
def create_dataframe_Y(y_file, y_time_file):
    # Read Y label file and rename column
    df_y = pd.read_csv(y_file, names=['Label'])

    # Read Y time file and rename column
    df_time = pd.read_csv(y_time_file, names=['Time stamp'])

    # Concatenate data frames horizontally
    result = pd.concat([df_y, df_time], axis=1)

    return result

def combine_frames(x_frame, y_frame):
    # Get labels and concatenate X and Y frames horizontally
    labels = y_frame['Label'].tolist()
    combined_frame = pd.concat([x_frame, y_frame], axis=1)

    # Extract time stamps from X and Y frames
    time_stamp_y = y_frame['Time stamp'].tolist()
    time_stamp_x = x_frame['Time stamp'].tolist()

    # Map labels to time stamps in X frame
    labels_for_x = []
    count = 0
    for i in range(len(time_stamp_y)):
        while count < len(time_stamp_x) and time_stamp_x[count] <= time_stamp_y[i]:
            labels_for_x.append(labels[i])
            count += 1

    # Create a new data frame with the mapped labels and concatenate it to the combined frame
    label_df = pd.DataFrame(labels_for_x, columns=['Label'])
    combined_frame = pd.concat([combined_frame, label_df], axis=1)

    # Drop rows with missing values and the Time stamp column
    combined_frame = combined_frame.dropna().drop(columns=['Time stamp'])

    return combined_frame

# Read Dataset

In [2]:
data=pd.read_csv('/content/drive/MyDrive/NeuralNets/Combined_data.csv')

In [None]:
data.head()

Unnamed: 0,X_acc,Y_acc,Z_acc,X_gyr,Y_gyr,Z_gyr,Label
0,4.435275,8.196063,2.974488,0.014215,-0.039157,-0.016744,0.0
1,4.18692,8.344455,2.908057,0.005771,-0.00448,-0.003345,0.0
2,4.544637,8.408659,2.89,0.007967,0.022412,0.001159,0.0
3,4.849308,8.411614,2.900692,0.027778,-0.01067,-0.014223,0.0
4,4.50919,8.118649,2.847298,0.021577,-0.045498,-0.021111,0.0


# Apply Window of 30

In [3]:
import numpy as np
import pandas as pd

# Assuming your input data is stored in a Pandas DataFrame called `data`
# with shape (n, 6)

# Convert the DataFrame to a NumPy array
data_val = data.values

# Reshape the array to have shape (n, 30, 6)
n = data_val.shape[0]
time_steps = 30
num_features = 6

# Remove the last column of `data_val`
data_label= data_val[:, -1]
data_val = data_val[:, :-1]

# Reshape the data
reshaped_data = np.empty((n, time_steps, num_features))
reshaped_label = np.empty((n, 1))

# Iterate over the original data and fill in the reshaped array
for i in range(n - time_steps + 1):
    reshaped_data[i] = data_val[i:i+time_steps]
    reshaped_label[i]=data_label[i + time_steps - 1]

# Train test split

In [4]:
# Assigning Dependent and Independent Variables to X and y respectively
X= reshaped_data                            
y= reshaped_label    

In [None]:
import numpy as np

# create a sample numpy array
#arr = np.array([1, 2, 3, 4, 5])

# save the array to a file to reduce RAM usage overall 
np.save('/content/drive/MyDrive/NeuralNets/Competition/X.npy', X)
np.save('/content/drive/MyDrive/NeuralNets/Competition/y.npy', y)

In [None]:
# Loading the Saved processed array directly without the previous cells
X = np.load('/content/drive/MyDrive/NeuralNets/Competition/X_60.npy')
y = np.load('/content/drive/MyDrive/NeuralNets/Competition/y_60.npy')

In [5]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [20]:
# Categorical Encoding of the class labels
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [None]:
# save the array to a file
np.save('/content/drive/MyDrive/NeuralNets/Competition/X_60.npy', X)
np.save('/content/drive/MyDrive/NeuralNets/Competition/y_60.npy', y)

# Tabel Data

In [7]:
opt = keras.optimizers.Adam(learning_rate=0.01)

In [8]:
np.unique(y)

array([0., 1., 2., 3.])

In [9]:
# convert y to a 1D Python list
y_list = y.ravel().tolist()

In [None]:
# y_list

# Hyperparameters Defining

In [11]:
from tensorflow.keras.callbacks import EarlyStopping
from keras.callbacks import ReduceLROnPlateau

callback = tf.keras.callbacks.EarlyStopping(monitor='acc', patience=2)

# Define a learning rate decay method:
lr_decay = ReduceLROnPlateau(monitor='loss', 
                             patience=1, verbose=0, 
                             factor=0.5, min_lr=1e-8)

from sklearn.utils import class_weight

class_weights = class_weight.compute_class_weight('balanced', classes=np.unique(y_list), y=y_list)
print(class_weights)


[0.33394202 5.75284421 4.50395459 1.64038231]


# Model Designed

In [12]:
# LSTM
model1 = Sequential()
model1.add(LSTM(64, input_shape=X_train[0].shape, return_sequences=True))
model1.add(LSTM(32))

model1.add(Dropout(0.5))

model1.add(Flatten())

model1.add(Dense(100, activation='relu'))
model1.add(Dense(25, activation='relu'))
model1.add(Dense(4, activation='softmax'))

model1.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])  

model1.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 30, 64)            18176     
                                                                 
 lstm_1 (LSTM)               (None, 32)                12416     
                                                                 
 dropout (Dropout)           (None, 32)                0         
                                                                 
 flatten (Flatten)           (None, 32)                0         
                                                                 
 dense (Dense)               (None, 100)               3300      
                                                                 
 dense_1 (Dense)             (None, 25)                2525      
                                                                 
 dense_2 (Dense)             (None, 4)                 1

In [None]:
from tensorflow import keras
model1 = keras.models.load_model('/content/drive/MyDrive/NeuralNets/Competition/LSTM_impr1_win60_2')

In [13]:
model1.fit(X_train, y_train, epochs=1, verbose=1,validation_steps=0.3, callbacks=[callback, lr_decay],
          class_weight=dict(enumerate(class_weights)))



<keras.callbacks.History at 0x7fbfa8274370>

# Save best trained Model

In [None]:
model1.save("/content/drive/MyDrive/NeuralNets/Competition/LSTM_impr1_win60")

# Model Evaluation 

In [14]:
# evaluate model
_, accuracy = model1.evaluate(X_test, y_test, batch_size=32, verbose=1)



In [16]:
from sklearn.metrics import f1_score

# Using model1 that has been trained on `X_train` and `y_train`
y_pred = model1.predict(X_test)



In [17]:
y_pred = np.argmax(y_pred, axis=1)

In [21]:
y_test = np.argmax(y_test, axis=1)

In [22]:
# `y_true` contains the true labels for `X_test`
# Metric used for Evaluation F1 score (Macro)

f1 = f1_score(y_test, y_pred, average='macro')
print("F1 Score:", f1)

F1 Score: 0.8887076826603033


In [None]:
# model1.save("/content/drive/MyDrive/NeuralNets/Competition/LSTM_wind30")

# Run in Loop (Training) 

In [None]:
from tensorflow import keras
model1 = keras.models.load_model('/content/drive/MyDrive/NeuralNets/Competition/LSTM_impr_2')

In [None]:
from sklearn.metrics import f1_score

# Loaded model named model1 that has been trained on `X_train` and `y_train` and saved
y_pred = model1.predict(X_test)



In [None]:
y_test = np.argmax(y_test, axis=1)

In [None]:
y_pred = np.argmax(y_pred, axis=1)

In [None]:
f1 = f1_score(y_test, y_pred, average='macro')
print("F1 Score:", f1)

F1 Score: 0.9639431200991896


In [None]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.99      0.98      0.99    332155
           1       0.92      1.00      0.96     19192
           2       0.90      1.00      0.95     24747
           3       0.97      0.96      0.97     67777

    accuracy                           0.98    443871
   macro avg       0.95      0.98      0.96    443871
weighted avg       0.98      0.98      0.98    443871



In [None]:
model1.save("/content/drive/MyDrive/NeuralNets/Competition/LSTM_impr1_win60_2")



# Predictions

In [None]:
from tensorflow import keras
model = keras.models.load_model('/content/drive/MyDrive/NeuralNets/Competition/LSTM_wind30')

In [None]:
# Get the predictions of the test set given in /TestData
subject9_X = create_dataframe_X('/content/drive/MyDrive/NeuralNets/TestData/subject_009_01__x.csv', '/content/drive/MyDrive/NeuralNets/TestData/subject_009_01__x_time.csv')
subject10_X = create_dataframe_X('/content/drive/MyDrive/NeuralNets/TestData/subject_010_01__x.csv', '/content/drive/MyDrive/NeuralNets/TestData/subject_010_01__x_time.csv')
subject11_X = create_dataframe_X('/content/drive/MyDrive/NeuralNets/TestData/subject_011_01__x.csv', '/content/drive/MyDrive/NeuralNets/TestData/subject_011_01__x_time.csv')
subject12_X = create_dataframe_X('/content/drive/MyDrive/NeuralNets/TestData/subject_012_01__x.csv', '/content/drive/MyDrive/NeuralNets/TestData/subject_012_01__x_time.csv')

subject9_X = subject9_X.drop(columns = ['Time stamp'])
subject10_X = subject10_X.drop(columns = ['Time stamp'])
subject11_X = subject11_X.drop(columns = ['Time stamp'])
subject12_X = subject12_X.drop(columns = ['Time stamp'])

In [None]:
import numpy as np
import pandas as pd

# Assuming your input data is stored in a Pandas DataFrame called `df`
# with shape (n, 6)

def windowing(data):
  # Convert the DataFrame to a NumPy array
  data_val = data.values

  # Reshape the array to have shape (n, 30, 6)
  n = data_val.shape[0]
  time_steps = 30
  num_features = 6

  # Reshape the data
  reshaped_data = np.empty((n, time_steps, num_features))
  # reshaped_label = np.empty((n, 1))

  # Iterate over the original data and fill in the reshaped array
  for i in range(n - time_steps + 1):
      reshaped_data[i] = data_val[i:i+time_steps]
      # reshaped_label[i]=data_label[i + time_steps - 1]
  
  return reshaped_data


In [None]:
def df_modes(df):
  # calculate the mode for every 4 elements
  modes = []
  for i, row in df.iterrows():
      if i % 4 == 0 and i + 4 <= len(df):
          mode = df.loc[i:i+3].mode().iloc[0]
          modes.append(mode)

  # convert the result to a DataFrame
  modes_df = pd.DataFrame(modes, columns=df.columns)

  return modes_df

# Subject 9

In [None]:
df_9=pd.read_csv('/content/drive/MyDrive/NeuralNets/TestData/subject_009_01__y_time.csv',header=None)

In [None]:
subject9_X.shape

(37991, 6)

In [None]:
# Apply the windowing function
sub9_win_X=windowing(subject9_X)

In [None]:
sub9_win_X.shape

(37991, 60, 6)

In [None]:
# Predictions using the trained model
y_pred_sub9=model1.predict(sub9_win_X)



In [None]:
# find the index of the maximum value for each array (row)
result_sub9 = np.argmax(y_pred_sub9, axis=1)

In [None]:
result_sub9.shape

(37991,)

In [None]:
df9=pd.DataFrame(result_sub9)

In [None]:
modes_df9= df_modes(df9)

In [None]:
print("Shapes: y_time ={},  Modes Df={}".format(df_9.shape, modes_df9.shape))

Shapes: y_time =(9498, 1),  Modes Df=(9497, 1)


# Subject 10

In [None]:
df_10=pd.read_csv('/content/drive/MyDrive/NeuralNets/TestData/subject_010_01__y_time.csv',header=None)

In [None]:
# Apply the windowing function
sub10_win_X=windowing(subject10_X)

# Predictions using the trained model
y_pred_sub10=model1.predict(sub10_win_X)

# find the index of the maximum value for each array (row)
result_sub10 = np.argmax(y_pred_sub10, axis=1)

df10=pd.DataFrame(result_sub10)

modes_df10= df_modes(df10)




In [None]:
print("Shapes: y_time ={},  Modes Df={}".format(df_10.shape, modes_df10.shape))

Shapes: y_time =(12270, 1),  Modes Df=(12270, 1)


# Subject 11

In [None]:
df_11=pd.read_csv('/content/drive/MyDrive/NeuralNets/TestData/subject_011_01__y_time.csv',header=None)

In [None]:
# Apply the windowing function
sub11_win_X=windowing(subject11_X)

# Predictions using the trained model
y_pred_sub11=model1.predict(sub11_win_X)

# find the index of the maximum value for each array (row)
result_sub11 = np.argmax(y_pred_sub11, axis=1)

df11=pd.DataFrame(result_sub11)

modes_df11= df_modes(df11)



In [None]:
print("Shapes: y_time ={},  Modes Df={}".format(df_11.shape, modes_df11.shape))

Shapes: y_time =(12940, 1),  Modes Df=(12940, 1)


# Subject 12

In [None]:
df_12=pd.read_csv('/content/drive/MyDrive/NeuralNets/TestData/subject_012_01__y_time.csv',header=None)

In [None]:
# Apply the windowing function
sub12_win_X=windowing(subject12_X)

# Predictions using the trained model
y_pred_sub12=model1.predict(sub12_win_X)

# find the index of the maximum value for each array (row)
result_sub12 = np.argmax(y_pred_sub12, axis=1)

df12=pd.DataFrame(result_sub12)

modes_df12= df_modes(df12)



In [None]:
print("Shapes: y_time ={},  Modes Df={}".format(df_12.shape, modes_df12.shape))

Shapes: y_time =(11330, 1),  Modes Df=(11329, 1)


# Saving Subject Df to CSV

In [None]:
modes_df9.to_csv('/content/drive/MyDrive/NeuralNets/subject_009_01__y.csv' ,index=False)

modes_df10.to_csv('/content/drive/MyDrive/NeuralNets/subject_010_01__y.csv',header=False, index=False)

modes_df11.to_csv('/content/drive/MyDrive/NeuralNets/subject_011_01__y.csv',header=False, index=False)

modes_df12.to_csv('/content/drive/MyDrive/NeuralNets/subject_012_01__y.csv',index=False)

# Verification of Shapes of Predictions

In [None]:
import pandas as pd

In [None]:
df_x=pd.read_csv('/content/drive/MyDrive/NeuralNets/subject_012_01__y.csv',header=None)
df_x.shape

(11330, 1)