In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# groundTruth ignore the third column
# the first number is the starting frame of an eating action, the 2nd number is the ending frame of an eating action

# video data is 30 frames per second (need to convert frame numbers into sample numbers)
# sensor sampling rate is 50 Hz 

# ... we can synchronize two dataset by setting the last frame time to the last UNIX time stamp in IMU or EMG file.

# ok, so when one second goes by, the sensors took in 50 readings (50 Hz), and in that time, the video took in 30 frames
# so we have to divide up the 50 data points (for each sensor) amoung the 30 frames taken in that one second
# hence why they say multiply by 50 and divide by 30 for the frame.
# if you just divide frame number by 30, then you get seconds. (30 frames = 1 second)

In [23]:
groundTruth_Users = os.listdir("groundTruth")
MyoData_Users = os.listdir("MyoData")

In [24]:
user_id = []
for i in range(len(MyoData_Users)):
    if MyoData_Users[i] in groundTruth_Users:
        user_id.append(MyoData_Users[i])
        
directory_clean_data = "clean_data/"
if not os.path.exists(directory_clean_data):
        os.makedirs(directory_clean_data)

In [18]:
def get_paths(user, fork_or_spoon= 'fork'):
    parent_path = "MyoData/" + user + '/' + fork_or_spoon
    dir_list = os.listdir(parent_path)
    EMG_txt_path = parent_path + "/" + dir_list[0]
    IMU_txt_path = parent_path + "/" + dir_list[1]
    video_info_path = parent_path + "/" + dir_list[2]
    # get ground truth
    ground_truth_parent_path = "groundTruth/" + user + '/' + fork_or_spoon
    ground_truth_dir_list = os.listdir(ground_truth_parent_path)
    ground_truth_path = ground_truth_parent_path + "/" + ground_truth_dir_list[0]
    return (EMG_txt_path, IMU_txt_path, video_info_path, ground_truth_path)

def mul(x, y):
    try:
        return np.round(pd.to_numeric(x) * y)
    except:
        return x

def get_dataframe(user = "user10", data_for="EMG", fork_or_spoon="fork"):
    paths = get_paths(user, fork_or_spoon)
    if data_for == "EMG":
        df = pd.read_csv(paths[0], sep=",", header=None)
        df.columns = ['UNIX time stamp', 'EMG 1', 'EMG 2', 'EMG 3', 'EMG 4', 'EMG 5', 'EMG 6', 'EMG 7', 'EMG 8']
        return df.assign(eating=False)
    elif( data_for == "IMU"):
        df = pd.read_csv(paths[1], sep=",", header=None)
        df.columns = ['UNIX time stamp', 'Orientation X', 'Orientation Y', 'Orientation Z', 'Orientation W', 'Accelerometer X', 'Accelerometer Y', 'Accelerometer Z', 'Gyroscope X', 'Gyroscope Y','Gyroscope Z'] 
        return df.assign(eating=False)
    elif( data_for == "ground_truth"):
        df = pd.read_csv(paths[3], sep=",", header=None)
        df.columns = ["start","stop", "noise"] 
        df.drop(['noise'],axis=1,inplace=True)
        return df.applymap(lambda x: mul(x, 100/30))
    else:
        df = pd.read_csv(paths[2], header=None)
        df.columns = ["starting frame", "ending frame"]
        return df
    
def save_clean_data(df_ground_truth, df, name, user, fork_or_spoon="fork"):
    for index, row in df_ground_truth.iterrows():
        eating_start = row[0] # note: maybe add one here. depends on if index at 0 or 1. I assumed 0
        eating_stop = row[1]
        df.loc[ eating_start:eating_stop , 'eating'] = True
    

    df_eating = df[df['eating'] == True]
    df_not_eating = df[df['eating'] == False]
    
    directory = "clean_data/" + user + "/" + fork_or_spoon
    if not os.path.exists(directory):
        os.makedirs(directory)
    
    #df_eating.to_csv(sep=',', index=False, header=False)
    df_eating.reset_index(drop=True,inplace=True)
    df_not_eating.reset_index(drop=True,inplace=True)
    min_data_points = min(df_eating.shape[0], df_not_eating.shape[0])
    df_eating = df_eating.iloc[:min_data_points]
    df_not_eating = df_not_eating.iloc[:min_data_points]
    
    df_eating.to_csv(path_or_buf=directory+"/eating_" + name + ".txt", sep=',')
    df_not_eating.to_csv(path_or_buf=directory+"/not_eating_" + name + ".txt", sep=',')
    df.to_csv(path_or_buf=directory+"/master_df" + name + ".txt", sep=',')
    

In [21]:
# This code takes ~2 mins to run. SO only run when necessary
def clean_data_for_each_user():
    
    for user in user_id:
        for fork_or_spoon in ['spoon','fork']:
            try:
                df_EMG = get_dataframe(user, data_for="EMG", fork_or_spoon=fork_or_spoon)
                df_IMU = get_dataframe(user, data_for="IMU", fork_or_spoon=fork_or_spoon)
                df_ground_truth = get_dataframe(user, data_for="ground_truth", fork_or_spoon=fork_or_spoon)

                save_clean_data(df_ground_truth, df_EMG, name="EMG", user=user, fork_or_spoon=fork_or_spoon)
                save_clean_data(df_ground_truth, df_IMU, name="IMU", user=user, fork_or_spoon=fork_or_spoon)
            except:
                print("folder name for users don't match or error in csv for ---> " + user)
                print("removing from list of users to use in the project")
                user_id.remove(user)

In [32]:
user = "user10"
fork_or_spoon = 'fork'
try:
    df_EMG = get_dataframe(user, data_for="EMG", fork_or_spoon=fork_or_spoon)
    df_IMU = get_dataframe(user, data_for="IMU", fork_or_spoon=fork_or_spoon)
    df_ground_truth = get_dataframe(user, data_for="ground_truth", fork_or_spoon=fork_or_spoon)

    save_clean_data(df_ground_truth, df_EMG, name="EMG", user=user, fork_or_spoon=fork_or_spoon)
    save_clean_data(df_ground_truth, df_IMU, name="IMU", user=user, fork_or_spoon=fork_or_spoon)
except:
    print("folder name for users don't match or error in csv for ---> " + user)
    user_id.remove(user)

In [22]:
# This code takes ~4 mins to run. SO only run when necessary

clean_data_for_each_user()

error for user27
error for user29


In [None]:
# diff_array = []
# diff_EMG = []
# diff_IMU = []
# for user in user_id:
#     last_row_EMG = get_dataframe(user, data_for="EMG")[-1:].index[0]
#     last_row_IMU = get_dataframe(user, data_for="IMU")[-1:].index[0]
#     diff_array.append(last_row_EMG / last_row_IMU)
#     last_frame = get_dataframe(data_for = "other")['ending frame'][0]
#     diff_EMG.append(np.abs(last_row_EMG - last_frame))
#     diff_IMU.append(np.abs(last_row_IMU - last_frame))

In [60]:
# used for deleting extra text files that were not in a directory

# import os
# for user in os.listdir("clean_data"):
    
#     filelist = [ f for f in os.listdir("clean_data/" + user) if f.endswith(".txt") ]
#     for f in filelist:
#         os.remove(os.path.join("clean_data/" + user, f))
# #os.remove("demofile.txt")
# for user in os.listdir("clean_data"):
#     print(os.listdir("clean_data/" + user))

import os
for user in os.listdir("clean_data"):
    
    filelist = [ f for f in os.listdir("clean_data/" + user) if f.endswith(".ipynb_checkpoints") ]
    for f in filelist:
        print(user)
        #os.remove(os.path.join("clean_data/" + user, f))
#os.remove("demofile.txt")
print("\n\n\n")
for user in os.listdir("clean_data"):
    print(os.listdir("clean_data/" + user))





[]
['spoon', 'fork']
['spoon', 'fork']
['spoon', 'fork']
['spoon', 'fork']
['spoon', 'fork']
['spoon', 'fork']
['spoon', 'fork']
['spoon', 'fork']
['spoon', 'fork']
['spoon', 'fork']
['spoon', 'fork']
['spoon', 'fork']
['spoon', 'fork']
['spoon', 'fork']
['spoon', 'fork']
['spoon', 'fork']
['spoon', 'fork']
['spoon', 'fork']
['spoon', 'fork']
['spoon', 'fork']
['spoon', 'fork']
['spoon', 'fork']
['spoon', 'fork']
['spoon', 'fork']
['spoon', 'fork']
['spoon', 'fork']
['spoon', 'fork']
['spoon', 'fork']
['spoon', 'fork']
['spoon', 'fork']
