<a href="https://colab.research.google.com/github/Assistive-Technology-Create-Team/plumshum.github.io/blob/new_data_acquisition_and_machine_learning/modified_machine_learning_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Use this [FallALlID2.csv file](https://drive.google.com/file/d/1Oi4Y_-EtZxU9mOAn-v5a93EDW43gpS1n/view?usp=sharing). Make sure to upload your own version to your Google Drive. 

1. Install Packages
2. Mount Drive
3. Change Google Colab Runtime to use GPU

4. Run All the remaining cells except for the last one.
5. The 3rd to last cell has a line

```
var = 0
x_train, x_test, y_train, y_test = data_split(df, var)
```
   and the 2nd to alst cell has a line
```
model = train_and_accurary_model(model, x_train, x_test, y_train, y_test, var)

```
Every time you finish training a model, change the variable `var` by 1 until 6. `var` represent a different amount of features used. 

6. Every time a model is saved, it should be saved in your local machine.

In [None]:
#Run to install packages. Will take a few minutes
%pip install numpy
%pip install pandas
%pip install os-sys
%pip install matplotlib
%pip install seaborn
%pip install scikit-learn
%pip install "tensorflow-gpu<2.10"
%pip install "tensorflow<2.10"
%pip install "keras<2.10"

In [2]:
# If you are on Google Colab run this
#from google.colab import drive
#drive.mount('/content/drive')

In [3]:
import numpy as np
import pandas as pd
import glob
import os
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn 
import tensorflow as tf
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout, Flatten, Conv1D, MaxPooling1D
import sklearn.model_selection
from sklearn.preprocessing import StandardScaler

#If you do not see 1 Physical GPUs, 1 Logical GPUs, then you are most likely not using a GPU
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")

In [4]:
def data_collection():

    # import csv file
    # ON GOOGLE COLAB
        # read csv file from your google drive. find the file in your drive and copy the path and replace
        # the path in the read_csv function with the path to your file
    df = pd.read_csv('FallAllD2.csv')
    # convert all columns to float32
    df = df.astype('float32')
    print("finished collecting data")
    return df

In [6]:
def data_label(df):
    # add a new column called "IsFall" that is 1 if the ActivityID > 100, and 0 if it is not
    df['IsFall'] = df['ActivityID'].apply(lambda x: 1 if x > 100 else 0)
    return df

def data_split(df, num):
    df = data_label(df)
    x = df[['Device','Acc_x','Acc_y','Acc_z', 'Gyr_x', 'Gyr_y', 'Gyr_z', 'Bar_x', 'Bar_y']]
    # split the data into features and labels
    if num == 0: x = df[['Device','Acc_x','Acc_y','Acc_z', 'Gyr_x', 'Gyr_y', 'Gyr_z', 'Bar_x', 'Bar_y']]
    elif num == 1: x = df[['Device','Acc_x','Acc_y','Acc_z', 'Gyr_x', 'Gyr_y', 'Gyr_z']]
    elif num == 2: x = df[['Device','Gyr_x', 'Gyr_y', 'Gyr_z', 'Bar_x', 'Bar_y']]
    elif num == 3: x = df[['Device','Acc_x','Acc_y','Acc_z', 'Bar_x', 'Bar_y']]
    elif num == 4: x = df[['Device','Acc_x','Acc_y','Acc_z']]
    elif num == 5: x = df[['Device','Gyr_x', 'Gyr_y', 'Gyr_z']]
    elif num == 6: x = df[['Device','Bar_x', 'Bar_y']]
    y = df['IsFall']

    #print("x is:", x)
    
    scaler = StandardScaler()
    x = scaler.fit_transform(x)
    x = x.reshape((x.shape[0], 1, x.shape[1]))
    
    #Spliting Data
    x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(x,y,test_size = 0.2)
    print('x y shape: ', x_train.shape, y_train.shape)

    print("data split")

    return x_train, x_test, y_train, y_test

In [7]:
from sklearn.metrics import confusion_matrix
def model_create(x_train, layer1_input = 512, layer2_input = 128):
  model = Sequential()
  model.add(LSTM(layer1_input, input_shape=(x_train.shape[1], x_train.shape[2])))
  model.add(Dropout(0.2))
  
  # add a Flatten layer using x_train as input shape
  #model.add(Flatten(input_shape=x_train.shape[1:]))

  model.add(Dense(layer2_input, activation='relu'))
  model.add(Dropout(0.3))

  # for activity classification, we need 136 neurons in the output layer and categorical crossentropy as the loss function
  #model.add(Dense(136, activation='softmax'))
  #model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
  
  # for IsFall classification, we need 1 neuron in the output layer and binary crossentropy as the loss function
  model.add(Dense(1, activation='sigmoid'))
  model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
  return model

def train_and_accurary_model(model, x_train, x_test, y_train, y_test, num):

  # train the model
  model.fit(x_train, y_train, epochs=10, batch_size=256, validation_split=0.1)

  # evaluate the model
  test_loss, test_Acc = model.evaluate(x_test, y_test)
  print('Test accuracy:', test_Acc)
  model.summary()
  print("Confusion Matrix")
  y_pred = model.predict(x_test)
  y_pred = (y_pred > 0.5)
  confusion_mtx = confusion_matrix(y_test, y_pred)
  confusion_mtx_percent = confusion_mtx / confusion_mtx.sum(axis=1)[:, np.newaxis]
  print(confusion_mtx_percent)

  # Save Model to Local Machine
  file_name = 'model' + str(num) + '_device2_aggregate.h5'

  model.save(file_name)
  #from google.colab import files
  #files.download(file_name)
  print("Model downloaded to local machine")

  # Save Model to Google Drive
  #model.save('/content/drive/My Drive/' + file_name )
  #print("Model saved to Google Drive")
  
  return model


In [None]:
df = data_collection()
original_df = df.copy()

In [None]:
df = original_df.copy()

# find the range of Bar_x
print(df['Bar_x'].min(), df['Bar_x'].max())
# drop all rows where Bar_x is 'nan'
df = df[df['Bar_x'].notna()]
# keep a copy of df
df_copy = df.copy()

In [None]:
df = df_copy.copy()
# 1: Neck, 2: Wrist, 3: Waist
df = df[df['Device'] == 2]

var = 0 # change this number from 0 - 6
x_train, x_test, y_train, y_test = data_split(df, var) 
print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

Ommited One Code Cell. Seemed Repetitive
```
df = df_copy.copy()
# 1: Neck, 2: Wrist, 3: Waist
df = df[df['Device'] == 2]

var = 0 #change this number from 0 - 6
x_train, x_test, y_train, y_test = data_split(df, var) 
print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

# Create and Train Model
teacher_model = model_create(x_train)
print("Model Created")
teacher_model = train_and_accurary_model(teacher_model, x_train, x_test, y_train, y_test, var)
print("Model Trained")
```

In [None]:
# Create and Train Teacher model Model
teacher_model = model_create(x_train, layer1_input=256, layer2_input=64)
print("Model Created")
teacher_model = train_and_accurary_model(teacher_model, x_train, x_test, y_train, y_test, var)
print("Model Trained")

In [None]:
df_acc = df_copy.copy()
df_acc = df_acc[df_acc['Device'] == 2]

In [None]:
def student_data_split(df, num):
    df = data_label(df)
    x = df[['Device','Acc_x','Acc_y','Acc_z', 'Gyr_x', 'Gyr_y', 'Gyr_z', 'Bar_x', 'Bar_y']]
    
    if num == 1: 
        # set 'Bar_x' and 'Bar_y' to 0
        df['Bar_x'] = 0
        df['Bar_y'] = 0

    if num == 2: 
        # set 'Acc_x', 'Acc_y', 'Acc_z' to 0
        df['Acc_x'] = 0
        df['Acc_y'] = 0
        df['Acc_z'] = 0

    if num == 3:
        # set 'Gyr_x', 'Gyr_y', 'Gyr_z' to 0
        df['Gyr_x'] = 0
        df['Gyr_y'] = 0
        df['Gyr_z'] = 0
    
    if num == 4:
        # set 'Bar_x' and 'Bar_y' to 0
        df['Bar_x'] = 0
        df['Bar_y'] = 0
        # set 'Gyr_x', 'Gyr_y', 'Gyr_z' to 0
        df['Gyr_x'] = 0
        df['Gyr_y'] = 0
        df['Gyr_z'] = 0
    
    if num == 5:
        # set 'Bar_x' and 'Bar_y' to 0
        df['Bar_x'] = 0
        df['Bar_y'] = 0
        # set 'Acc_x', 'Acc_y', 'Acc_z' to 0
        df['Acc_x'] = 0
        df['Acc_y'] = 0
        df['Acc_z'] = 0

    if num == 6:
        # set 'Gyr_x', 'Gyr_y', 'Gyr_z' to 0
        df['Gyr_x'] = 0
        df['Gyr_y'] = 0
        df['Gyr_z'] = 0
        # set 'Acc_x', 'Acc_y', 'Acc_z' to 0
        df['Acc_x'] = 0
        df['Acc_y'] = 0
        df['Acc_z'] = 0

    scaler = StandardScarler()
    x = scaler.fit_transform(x)
    x = x.reshape(x.shape[0], 1, x.shape[1])

    #convert df to float32
    df = df.astype('float32')

    y = df['IsFall']

    #Splitting Data
    x_train, x_test, y_train, y_test =sklearn.model_selection.train_test_split(x,y,test_size = 0.2)  

var = 6
student_x_train, student_x_test, student_y_train, student_y_test = student_data_split(df_acc, var) 
print(student_x_train.shape, student_y_train.shape)
print(student_x_test.shape, student_y_test.shape)


In [None]:
# Knowledge distillation
temperature = 5.0
teacher_logits = tf.keras.layers.Lambda(lambda x: x / temperature)(teacher_model.layers[-2].output)
teacher_model_logits = tf.keras.models.Model(teacher_model.inputs, teacher_logits)

# Function to create a knowledge distillation loss
def knowledge_distillation_loss(y_true, y_pred_logits):
    y_pred = tf.nn.sigmoid(y_pred_logits / temperature)
    y_true = tf.nn.sigmoid(y_true / temperature)
    return KLDivergence()(y_true, y_pred)

# Custom loss function that combines knowledge distillation loss and the original binary crossentropy loss
def combined_loss(y_true, y_pred):
    kd_loss = knowledge_distillation_loss(y_true, y_pred)
    original_loss = BinaryCrossentropy(from_logits=True)(y_true, y_pred)
    return kd_loss + original_loss

student_model = Sequential()
student_model.add(LSTM(256, input_shape=(student_x_train.shape[1], student_x_train.shape[2])))
student_model.add(Dropout(0.2))
student_model.add(Dense(64, activation='relu'))
student_model.add(Dropout(0.3))
student_model.add(Dense(1, activation='sigmoid'))

student_model.compile(optimizer='adam', loss=combined_loss, metrics=['accuracy'])

student_model.fit(student_x_train, student_y_train, epochs=2, batch_size=256, validation_split=0.1)

# evaluate the model
test_loss, test_Acc = student_model.evaluate(student_x_test, student_y_test)
print('Test accuracy:', test_Acc)
#student_model.summary()
#print("Confusion Matrix")
y_pred = student_model.predict(student_x_test)
y_pred = (y_pred > 0.5)
confusion_mtx = confusion_matrix(student_y_test, y_pred)
confusion_mtx_percent = confusion_mtx / confusion_mtx.sum(axis=1)[:, np.newaxis]
#print(confusion_mtx_percent)

# Save Model to Local Machine
file_name = 'student_model' + str(var) + '_device2_aggregate.h5'

student_model.save(file_name)
#from google.colab import files
#files.download(file_name)  # Download to local machine
print("Model downloaded to local machine")

print("Student Model Trained")



Do not run the cell below

In [None]:
# CODE DOESN'T WORK YET
# Get model from folder all_models. take model summaries and put them into a txt file. download txt file to local machine
def get_model_summaries():
    import os
    from keras.models import load_model
    from keras.utils.vis_utils import plot_model
    from keras.utils import print_summary
    
    # get all files in the folder
    files = os.listdir('all models')
    print(files)
    
    # open a file to write the model summaries to
    f = open("model_summaries.txt", "w")
    
    # loop through all files in the folder
    for file in files:
        # load the model
        model = load_model('all_models/' + file)
        # get the model summary
        model.summary(print_fn=lambda x: f.write(x + '\n'))
        # get the model plot
        plot_model(model, to_file='model_plots/' + file + '.png', show_shapes=True, show_layer_names=True)
        # write a line break
        f.write('\n\n')
    f.close()
    

In [None]:
# create table with row for models, and column for accuracy
# create a table with 7 rows and 1 column
table = np.zeros((7,1))
# create a dataframe with the table
df = pd.DataFrame(table, columns=['Accuracy'])
# add a column called "Model" with the model numbers
df['Model'] = ['ABG', 'AG', 'GB', 'AB','A', 'G', 'B']
# set the index to be the "Model" column
df = df.set_index('Model')
df['Accuracy'] = [0.9933268427848816,
                  0.8989095091819763,
                  0.967968761920929,
                  0.9861978888511658,
                  0.8534993529319763,
                  0.8021810054779053,
                  0.9472493529319763]
