# Adding Non-Exercise Data to Database

The already existing database for exercise data is used.

In [1]:
import sqlite3
import pandas as pd
import numpy as np
import os
import re
import functionsMasterProjectMeinhart as fmpm

## Get the csv-file names of the non-exercise data (from a defined folder)

In [2]:
# folder with non-exercise csv-files
file_dir = 'E:\Physio_Data_Split_nonEx'

# load all file names of desired folder
nonEx_files = []
for (dirpath, dirnames, filenames) in os.walk(file_dir):
    nonEx_files.extend(filenames)
    break

nonEx_files

['subject01_00_nonEx.csv',
 'subject01_01_nonEx.csv',
 'subject01_02_nonEx.csv',
 'subject01_03_nonEx.csv',
 'subject01_04_nonEx.csv',
 'subject01_05_nonEx.csv',
 'subject01_06_nonEx.csv',
 'subject01_07_nonEx.csv',
 'subject01_08_nonEx.csv',
 'subject01_09_nonEx.csv',
 'subject01_10_nonEx.csv',
 'subject01_11_nonEx.csv',
 'subject01_12_nonEx.csv',
 'subject01_13_nonEx.csv',
 'subject01_14_nonEx.csv',
 'subject01_15_nonEx.csv',
 'subject01_16_nonEx.csv',
 'subject01_17_nonEx.csv',
 'subject01_18_nonEx.csv',
 'subject01_19_nonEx.csv',
 'subject01_20_nonEx.csv',
 'subject01_21_nonEx.csv',
 'subject01_22_nonEx.csv',
 'subject01_23_nonEx.csv',
 'subject01_24_nonEx.csv',
 'subject01_25_nonEx.csv',
 'subject01_26_nonEx.csv',
 'subject01_27_nonEx.csv',
 'subject01_28_nonEx.csv',
 'subject01_29_nonEx.csv',
 'subject01_30_nonEx.csv',
 'subject02_00_nonEx.csv',
 'subject02_01_nonEx.csv',
 'subject02_02_nonEx.csv',
 'subject02_03_nonEx.csv',
 'subject02_04_nonEx.csv',
 'subject02_05_nonEx.csv',
 

## What we know about the exercise data

The repetitions of the different exercises have a duration mean of appr. 2.8 s with a standard deviation of 0.6 s (see Creating_Database_using_sqlite.ipynb).

Hence, in the following cells the non-exercise data from the csv-files are split with the same mean and standard deviation. As an additional constraint, the split time ranges must have a length between 1 s and 5 s.

The start and stop times of the generated time ranges are saved then to a database.

In [3]:
# defining the mean duration
dur_mean = 2.8 # [s]

# defining the standard deviation of the duration
dur_std = 0.6 # [s]

min_dur = 1 # [s]
max_dur = 5 # [s]

In [4]:
np.random.normal(loc=dur_mean, scale=dur_std)

2.6843390173828787

## Split non-exercise files and write times to database

In [13]:
# !!! Execute this code only once --> writing to database !!!

# connect with existing database
db_name = 'DataBase_Physio.db'
conn = sqlite3.connect(db_name)
cur = conn.cursor()

# create a new entry for non-exercise data in the paradigm table
paradigm_sql = 'INSERT INTO paradigms (abbreviation) VALUES (?)'
abbr_nonEx = 'NE' # abbreviation for non exercise data
cur.execute(paradigm_sql, (abbr_nonEx,))

# get the paradigm-id of non-exercise data
paradigm_sql = 'SELECT id FROM paradigms WHERE abbreviation = ?'
cur.execute(paradigm_sql, (abbr_nonEx,))
paradigm_id = cur.fetchone()[0]


# going through all non-exercise files
for ii in range(len(nonEx_files)):
    
    # get subject ID from filename
    subject_id = int(re.split('[t_.]',nonEx_files[ii])[1])
    
    # generate the entry in "exercises", and get the corresponding id
    number_repetitions = 0 # because it is not exercise data
    exercise_sql = 'INSERT INTO exercises (subject_id, paradigm_id, num_rep, csv_file) VALUES (?,?,?,?)'
    cur.execute(exercise_sql, (subject_id, paradigm_id, number_repetitions, nonEx_files[ii]))
    exercise_id = cur.lastrowid
    
    # join path to current csv-file
    nonEx_csv_path = os.path.join(file_dir, nonEx_files[ii])

    # load the signal data
    data = fmpm.get_sensor_data(nonEx_csv_path, signals=['Acc','Gyr','Mag'], sampling_rate=256)
    
    # get total duration of loaded signals in seconds
    signal_total_dur = data['time'][-1]

    # split the signals into blocks, considering the mean duration and standard deviation
    # of the exercise data --> generating randomized time ranges from 1 s to 5 s
    # --> if time is out of range, check if the last block is longer than
    # one second and take it, otherwise omit the last block
    start_time = 0 # [s] first start time is always zero
    block_sequence_number = 1 # sequence number for database
    # generate the entry in "repetitions" --> sequence number of the differt block used now
    repetitions_sql = 'INSERT INTO repetitions (sequence_num, exercise_id, start_time, stop_time) VALUES (?,?,?,?)'
    end_of_signal = False
    while end_of_signal is False:
        current_block_length = np.random.normal(loc=dur_mean, scale=dur_std) # [s]
        if current_block_length < min_dur:
            current_block_length = min_dur
        elif current_block_length > max_dur:
            current_block_length = max_dur
        stop_time = start_time + current_block_length

        # check if stop time is out of range
        if stop_time > signal_total_dur:
            end_of_signal = True
            # check if remaining block is longer than min_dur (1 s)
            if signal_total_dur-start_time > min_dur:
                stop_time = signal_total_dur # set stop time to total signal duration
            else:
                break # omit the remaining last block
        
        # write start and stop times to database (with additional required entries)
        cur.execute(repetitions_sql, (block_sequence_number, exercise_id, start_time, stop_time))
        
        block_sequence_number += 1 # increase sequence number of block
        start_time = stop_time # set start time for next run

conn.commit()
conn.close()


## Show content of database

In [5]:
# Connect to the existing database (already renamed)
db_name = 'DataBase_Physio_with_nonEx.db'
conn = sqlite3.connect(db_name)
cur = conn.cursor()

# extract the data from the database
query_sql = """
    SELECT e.subject_id,
        p.abbreviation,
        e.num_rep,
        r.sequence_num,
        r.start_time, r.stop_time,
        e.csv_file
    FROM subjects s
    INNER JOIN exercises e
    ON s.id = e.subject_id
    INNER JOIN paradigms p
    ON p.id = e.paradigm_id
    INNER JOIN repetitions r
    ON e.id = r.exercise_id
    """
df = pd.read_sql_query(query_sql, conn)
conn.close()
df

Unnamed: 0,subject_id,abbreviation,num_rep,sequence_num,start_time,stop_time,csv_file
0,1,RF,5,1,1.1482924107142871,3.699122023809525,subject01_RF_05.csv
1,1,RF,5,2,3.699122023809525,6.49581473214286,subject01_RF_05.csv
2,1,RF,5,3,6.49581473214286,9.384706101190478,subject01_RF_05.csv
3,1,RF,5,4,9.384706101190478,12.073833705357146,subject01_RF_05.csv
4,1,RF,5,5,12.073833705357146,14.809060639880954,subject01_RF_05.csv
5,2,RF,5,1,0.6509139384920637,3.911928323412699,subject02_RF_05.csv
6,2,RF,5,2,3.911928323412699,7.031159474206351,subject02_RF_05.csv
7,2,RF,5,3,7.031159474206351,10.398511284722224,subject02_RF_05.csv
8,2,RF,5,4,10.398511284722224,13.588634052579367,subject02_RF_05.csv
9,2,RF,5,5,13.588634052579367,16.49519035218254,subject02_RF_05.csv
