In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import pickle
from tqdm import tqdm

## Load Pickle File

In [2]:
dataset_path = "C:/Users/wyiti/Documents/NUS/MedTech_Morphine/Morphine-22-23/ML/Datasets/01-02-2023"

In [3]:
os.chdir(dataset_path)
os.listdir()

['jump_data_1', 'normal_data_1']

In [4]:
with open("normal_data_1", 'rb') as file:
    healthy_data = pickle.load(file)

with open("jump_data_1", 'rb') as file:
    anomalous_data = pickle.load(file)

## Relevant Function

In [5]:
# Keywords to extract for each set
keywords = ['Ax: ', 'Ay: ', 'Az: ', 'gx: ', 'gy: ', 'gz: ', 'temp: ', 'timing for this set: ']

# length of each keyword
length_keywords = [len(x) for x in keywords]

In [6]:
# function to process one datapoint
def process_one_set_of_datapoint(output_set):
    indexes = [output_set.find(keyword) for keyword in keywords]
    df_row = []
    curr_data_index = int(output_set[:indexes[0]].strip())
    df_row.append(curr_data_index) # append in the index of the new input
    for i, index in enumerate(indexes):
        if i == len(indexes) - 1:
            x = float(output_set[index+length_keywords[i]:].strip())
            df_row.append(x)
        else:
            x = float(output_set[index+length_keywords[i]: indexes[i+1]].strip())
            df_row.append(x)
    return df_row

In [7]:
gps_keywords_list = ['Latitude: ', '(*10^-7) Longitude: ', '(*10^-7) Altitude: ', '(mm) Satellite-in-view: ', 'timing for this set: ']
gps_keywords_length = [len(keyword) for keyword in gps_keywords_list]
#gps_keywords_length

In [8]:
# feed in gps_datapoints = gps_data['GPS Datapoints']
def process_gps_datapoints(gps_datapoints):
    gps_datapoints = gps_datapoints[0]
    indices = [gps_datapoints.find(keyword) for keyword in gps_keywords_list]
    # Latitude, Longtiude, Altitude, Satellite-in-view, timing for this set
    gps_datapoints_list = []
    for i, index in enumerate(indices):
        if i == len(indices) - 1:
            gps_datapoints_list.append(float(gps_datapoints[index + gps_keywords_length[i]:].strip()))
        else:
            gps_datapoints_list.append(float(gps_datapoints[index + gps_keywords_length[i]:indices[i+1]].strip()))
    return gps_datapoints_list

In [9]:
# feed in data['Split Circuit']['GPS']
def process_gps(gps):
    global gps_df
    processed_gps_data = []
    gps_accounter = gps['GPS Accounter']
    gps_datapoints = gps['GPS Datapoints']
    gps_loopSpeedArr = gps['GPS LoopSpeedArr'][0]
    gps_uploadSpeedArr = gps['GPS UploadSpeedArr'][0]
    
    processed_gps_datapoints = process_gps_datapoints(gps_datapoints)
    
    processed_gps_data.append(gps_accounter)
    processed_gps_data.extend(processed_gps_datapoints)
    processed_gps_data.extend([gps_loopSpeedArr, gps_uploadSpeedArr])
    
    # added two more lines
    new_gps_df = pd.DataFrame([processed_gps_data],
                             columns = ['accounter', 'latitude', 'longitude', 'altitude', 'satelliteInView', 'timingForThisSet', 'LoopSpeed', 'UploadSpeed'])
    gps_df = pd.concat([gps_df, new_gps_df], ignore_index = True)
    #return processed_gps_data

In [10]:
# feed in mpu6050_output = split_circuit_data['MPU6050']
def process_mpu6050(mpu6050, timeDifference):
    global mpu6050_df
    accounter = mpu6050['MPU6050 Accounter']
    mpu6050_datapoints = mpu6050['MPU6050 Datapoints'][0]
    mpu6050_loopSpeedArr = mpu6050['MPU6050 LoopSpeedArr'][0]
    mpu6050_uploadSpeedArr = mpu6050['MPU6050 UploadSpeedArr'][0]
    mpu6050_output_sets = mpu6050_datapoints.split('Set: ')[1:]
    
    for output in mpu6050_output_sets:
        data = [accounter, mpu6050_loopSpeedArr,mpu6050_uploadSpeedArr]
        datapoint = process_one_set_of_datapoint(output)
        data.extend(datapoint)
        data.append(timeDifference)
        new_df = pd.DataFrame([data], 
                              columns = ['accounter', 'LoopSpeed', 'UploadSpeed', 'set_index', 'Ax', 'Ay', 'Az', 'gx', 'gy', 'gz', 'temp', 'timingForThisSet', 'timeDifference'])
        mpu6050_df = pd.concat([mpu6050_df, new_df], ignore_index = True)

In [11]:
# overall function to read split circuit, feed in split_circuit_data = data['Split Circuit']
def process_split_ciruit_data(split_circuit_data):
    keys = split_circuit_data.keys()
    for key in keys:
        if key == 'GPS':
            gps_data = split_circuit_data['GPS']
            process_gps(gps_data)
        elif key == 'GPS Button':
            #print(split_circuit_data['GPS Button'])
            continue
        elif key == 'MPU6050':
            mpu6050_output = split_circuit_data['MPU6050']
            process_mpu6050(mpu6050_output, None)
            #print('Loaded MPU6050 Data into mpu6050_df')

## Creating Dataframes

In [32]:
gps_df = pd.DataFrame(columns = ['accounter', 'latitude', 'longitude', 'altitude', 'satelliteInView', 'timingForThisSet', 'LoopSpeed', 'UploadSpeed'])

In [33]:
mpu6050_df = pd.DataFrame(columns = ['accounter', 'LoopSpeed', 'UploadSpeed', 'set_index', 'Ax', 'Ay', 'Az', 'gx', 'gy', 'gz', 'temp', 'timingForThisSet', 'timeDifference'])
#display(mpu6050_df)

## Conversion

In [14]:
# convert anomalous data
for ad in tqdm(anomalous_data):
    process_split_ciruit_data(ad)

100%|██████████████████████████████████████████████████████████████████████████████████| 22/22 [00:00<00:00, 25.69it/s]


In [29]:
gps_df.to_csv('C:\\Users\\wyiti\\Documents\\NUS\\MedTech_Morphine\\Morphine-22-23\\ML\\Datasets\\01-02-2023\\anomalous_gps_data.csv')

In [31]:
mpu6050_df.to_csv('C:\\Users\\wyiti\\Documents\\NUS\\MedTech_Morphine\\Morphine-22-23\\ML\\Datasets\\01-02-2023\\anomalous_mpu6050_data.csv')

In [36]:
# convert healthy data
for hd in tqdm(healthy_data):
    process_split_ciruit_data(hd)

100%|████████████████████████████████████████████████████████████████████████████| 3984/3984 [1:28:29<00:00,  1.33s/it]


In [40]:
gps_df.to_csv('C:\\Users\\wyiti\\Documents\\NUS\\MedTech_Morphine\\Morphine-22-23\\ML\\Datasets\\01-02-2023\\healthy_gps_data.csv')

In [41]:
mpu6050_df.to_csv('C:\\Users\\wyiti\\Documents\\NUS\\MedTech_Morphine\\Morphine-22-23\\ML\\Datasets\\01-02-2023\\healthy_mpu6050_data.csv')

## Checking Data 

In [26]:
#pd.set_option('display.max_rows', 100)

In [37]:
gps_df

Unnamed: 0,accounter,latitude,longitude,altitude,satelliteInView,timingForThisSet,LoopSpeed,UploadSpeed
0,27,0.0,0.0,0.0,0.0,0.0,821,118
1,27,0.0,0.0,0.0,0.0,0.0,921,118
2,27,0.0,0.0,0.0,0.0,0.0,921,118
3,27,0.0,0.0,0.0,0.0,0.0,921,118
4,27,0.0,0.0,0.0,0.0,0.0,821,118
...,...,...,...,...,...,...,...,...
3979,227,0.0,0.0,0.0,0.0,0.0,921,100
3980,227,0.0,0.0,0.0,0.0,0.0,921,100
3981,227,0.0,0.0,0.0,0.0,0.0,921,100
3982,227,0.0,0.0,0.0,0.0,0.0,820,100


In [38]:
mpu6050_df

Unnamed: 0,accounter,LoopSpeed,UploadSpeed,set_index,Ax,Ay,Az,gx,gy,gz,temp,timingForThisSet,timeDifference
0,564,260,198,0,9.66,3.11,-2.33,0.38,0.17,-0.11,33.80,4.0,
1,564,260,198,1,9.64,3.10,-2.21,0.39,0.18,-0.12,33.75,2.0,
2,564,260,198,2,9.63,3.19,-2.22,0.41,0.17,-0.12,33.71,2.0,
3,564,260,198,3,9.61,3.17,-2.30,0.42,0.18,-0.12,33.71,3.0,
4,564,260,198,4,9.60,3.11,-2.26,0.43,0.18,-0.11,33.75,3.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
79675,4554,191,131,15,10.17,2.50,-2.53,-1.80,-0.54,0.21,31.40,3.0,
79676,4554,191,131,16,10.26,2.65,-2.41,-1.80,-0.56,0.22,31.40,3.0,
79677,4554,191,131,17,10.22,2.79,-2.41,-1.80,-0.58,0.23,31.40,3.0,
79678,4554,191,131,18,10.37,2.81,-2.03,-1.81,-0.60,0.25,31.45,3.0,


In [43]:
mpu6050_df.groupby('accounter').count()

Unnamed: 0_level_0,LoopSpeed,UploadSpeed,set_index,Ax,Ay,Az,gx,gy,gz,temp,timingForThisSet,timeDifference
accounter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
564,20,20,20,20,20,20,20,20,20,20,20,0
565,20,20,20,20,20,20,20,20,20,20,20,0
566,20,20,20,20,20,20,20,20,20,20,20,0
567,20,20,20,20,20,20,20,20,20,20,20,0
568,20,20,20,20,20,20,20,20,20,20,20,0
...,...,...,...,...,...,...,...,...,...,...,...,...
4550,20,20,20,20,20,20,20,20,20,20,20,0
4551,20,20,20,20,20,20,20,20,20,20,20,0
4552,20,20,20,20,20,20,20,20,20,20,20,0
4553,20,20,20,20,20,20,20,20,20,20,20,0
