In [1]:
import pandas as pd
import sqlite3
import os
import time


subject = 'IorNYll1lS' #! The ID of the subject on Back4app
folder_path = 'database extractor/data'
df_file = '20240413_165217_14ChannelSensorDatabase_mci017.db'
db_file_path = f'{folder_path}/{df_file}'


In [2]:
# Create a connection to the SQLite database
conn = sqlite3.connect(db_file_path)

start_session = 156
end_session = 195

# Write your SQL query (replace 'your_table_name' with the actual table name)
sql_query = f'SELECT * FROM USER_SESSION_TABLE WHERE SESSION_ID BETWEEN {start_session} AND {end_session}'

# Use pandas to read data from the database into a DataFrame
df = pd.read_sql(sql_query, conn)

# Close the database connection
conn.close()


In [48]:
start_time = time.time()
print("--- %s seconds ---" % (time.time() - start_time))

# Create a connection to the SQLite database
conn = sqlite3.connect(db_file_path)

# Write SQL queries for each table
right_insole_query = f'SELECT * FROM RIGHT_INSOLE_RAW WHERE SESSION_ID BETWEEN {start_session} AND {end_session}'
left_insole_query = f'SELECT * FROM LEFT_INSOLE_RAW WHERE SESSION_ID BETWEEN {start_session} AND {end_session}'

# Use pandas to read data from each table into DataFrames
right_insole_df = pd.read_sql(right_insole_query, conn)
print("--- Right Query %s seconds ---" % (time.time() - start_time))
left_insole_df = pd.read_sql(left_insole_query, conn)
print("--- Left Query %s seconds ---" % (time.time() - start_time))

# Close the database connection
conn.close()

# Group the DataFrames by SESSION_ID
grouped_right_insole = right_insole_df.groupby('SESSION_ID').agg(list).reset_index()
grouped_left_insole = left_insole_df.groupby('SESSION_ID').agg(list).reset_index()
print("--- Complete in %s seconds ---" % (time.time() - start_time))


--- 0.0 seconds ---
--- Right Query 15.796732902526855 seconds ---
--- Left Query 29.92012619972229 seconds ---
--- Complete in 35.07804322242737 seconds ---


In [49]:
import time
start_time = time.time()
print("--- %s seconds ---" % (time.time() - start_time))


# Create a connection to the SQLite database
conn = sqlite3.connect(db_file_path)

# Read USER_SESSION_TABLE to get STARTDATE for each SESSION_ID
user_session_query = 'SELECT SESSION_ID, START_DATE FROM USER_SESSION_TABLE'
user_session_df = pd.read_sql(user_session_query, conn)
print("--- Read %s seconds ---" % (time.time() - start_time))

# Merge user_session_df with grouped_right_insole and grouped_left_insole
merged_right_insole = pd.merge(grouped_right_insole, user_session_df, on='SESSION_ID', how='inner')
merged_left_insole = pd.merge(grouped_left_insole, user_session_df, on='SESSION_ID', how='inner')


print("--- Merged in %s seconds ---" % (time.time() - start_time))
# Close the database connection
conn.close()


# Root folder path
root_folder_path = 'database extractor/data/mci006/'

# Filter data for the specified SESSION_ID range
filtered_right_insole = merged_right_insole[(merged_right_insole['SESSION_ID'] >= start_session) & (merged_right_insole['SESSION_ID'] <= end_session)]
filtered_left_insole = merged_left_insole[(merged_left_insole['SESSION_ID'] >= start_session) & (merged_left_insole['SESSION_ID'] <= end_session)]

print("--- Filtered in %s seconds ---" % (time.time() - start_time))
# Iterate through each row and save data as TXT in the specified folder
for index, session in filtered_right_insole.iterrows():
    session_id = session['SESSION_ID']
    start_date = str(session['START_DATE'])
    date_folder = start_date[:4] + '-' + start_date[4:6] + '-' + start_date[6:]
    array_data = session.drop(['SESSION_ID', 'START_DATE', 'RAW_ID'])
    session_df = pd.DataFrame(array_data).T
    folder_path = os.path.join(root_folder_path, date_folder)
    
    # Convert each array to pandas Series
    series_list = {col: pd.Series(data) for col, data in array_data.items()}
    if len(series_list['READING_1']) < 2 / 0.05: #! data can be less than 2 seconds, which is not necessary
        print(f'Session {session_id} less than 2 seconds, Skip')
        print("-----------------------------------------------------------")
        continue
    # Create a new DataFrame by concatenating the Series
    session_df = pd.DataFrame(series_list)
    
    os.makedirs(folder_path, exist_ok=True)  # Create folder if it doesn't exist
    filename = os.path.join(folder_path, f'S{session_id}_{start_date}_rawDataRight_{subject}.txt')
    session_df.to_csv(filename, sep=',', header=False, index=False)

print("--- %s seconds ---" % (time.time() - start_time))
for index, session in filtered_left_insole.iterrows():
    session_id = session['SESSION_ID']
    print(f'Session {session_id}')
    start_date = str(session['START_DATE'])
    date_folder = start_date[:4] + '-' + start_date[4:6] + '-' + start_date[6:]
    array_data = session.drop(['SESSION_ID', 'START_DATE', 'RAW_ID'])
    session_df = pd.DataFrame(array_data).T
    folder_path = os.path.join(root_folder_path, date_folder)
    
    # Convert each array to pandas Series
    series_list = {col: pd.Series(data) for col, data in array_data.items()}
    if len(series_list['READING_1']) < 2 / 0.05: #! data can be less than 2 seconds, which is not necessary
        print(f'Session {session_id} less than 2 seconds, Skip')
        print("-----------------------------------------------------------")
        continue
    # Create a new DataFrame by concatenating the Series
    session_df = pd.DataFrame(series_list)
    
    os.makedirs(folder_path, exist_ok=True)  # Create folder if it doesn't exist
    filename = os.path.join(folder_path, f'S{session_id}_{start_date}_rawDataLeft_{subject}.txt')
    session_df.to_csv(filename, sep=',', header=False, index=False)
    
print("--- Completed in %s seconds ---" % (time.time() - start_time))

--- 0.0 seconds ---
--- Read 0.004000663757324219 seconds ---
--- Merged in 0.010000467300415039 seconds ---
--- Filtered in 1.5944843292236328 seconds ---
--- 13.795521259307861 seconds ---
Session 156
Session 157
Session 158
Session 159
Session 160
Session 161
Session 162
Session 163
Session 164
Session 165
Session 166
Session 167
Session 168
Session 169
Session 170
Session 171
Session 172
Session 173
Session 174
Session 175
Session 176
Session 177
Session 178
Session 179
Session 180
Session 181
Session 182
Session 183
Session 184
Session 185
Session 186
Session 187
Session 188
Session 189
Session 190
Session 191
Session 192
Session 193
Session 194
Session 195
--- Completed in 25.942578077316284 seconds ---
