## Extraction of .bin file from preprocessed directory 

In [21]:
# Import necessary modules
import numpy as np
import pandas as pd
import os
from itertools import product  # This line imports the product function
from sqlalchemy import create_engine #The line imports the create_engine function from the SQLAlchemy library.

## All movement file to one single dataframe 

In [None]:
# Directory where the binary files are stored
movement_dir = 'preprocessed/movement/'

# Task, wrist, sensor, and axis options
tasks = ["Relaxed1", "Relaxed2", "RelaxedTask1", "RelaxedTask2", "StretchHold",
         "HoldWeight", "DrinkGlas", "CrossArms", "TouchNose", "Entrainment1", "Entrainment2"]
wrists = ["Left", "Right"]
sensors = ["Accelerometer", "Gyroscope"]
axes = ["X", "Y", "Z"]

# Generate the channel names by combining task, wrist, sensor, and axis
channels = [f"{task}_{wrist}_{sensor}_{axis}" for task, wrist, sensor, axis in 
            product(tasks, wrists, sensors, axes)]
expected_data_points = len(channels)  # Number of channels

# Function to process and extract data from a binary file
def extract_data(file_path, channels, expected_data_points):
    data = np.fromfile(file_path, dtype=np.float32)
    total_data_points = len(data)
    print(f"Processing file '{file_path}' with {total_data_points} data points.")
    
    # Check if the total data points divide evenly by the number of channels
    if total_data_points % expected_data_points != 0:
        print(f"Warning: Data points ({total_data_points}) do not divide evenly by {expected_data_points}.")
        # Truncate to the largest divisible portion
        valid_length = (total_data_points // expected_data_points) * expected_data_points
        data = data[:valid_length]
        print(f"Data truncated to {valid_length} points.")
    
    # Calculate the number of time steps
    num_time_steps = len(data) // expected_data_points
    print(f"Number of time steps: {num_time_steps}")
    
    # Reshape the data into a 2D array
    reshaped_data = data.reshape((num_time_steps, expected_data_points))
    
    # Create a DataFrame with channel names as column headers
    data_df = pd.DataFrame(reshaped_data, columns=channels)
    
    # Add an ID column to identify the file's data points
    data_df.insert(0, 'ID', range(1, num_time_steps + 1))

    # Add a column to specify the file name for identification
    data_df['File'] = os.path.basename(file_path)
    
    return data_df

# Process each binary file in the directory and store results in a dictionary
processed_data = {}
for file_name in sorted(os.listdir(movement_dir)):
    if file_name.endswith('.bin'):
        file_path = os.path.join(movement_dir, file_name)
        result_df = extract_data(file_path, channels, expected_data_points)
        
        # Store the DataFrame in the dictionary
        processed_data[file_name] = result_df
        print(f"Data for '{file_name}' processed and stored in memory.")

# Optional: Combine all data into a single DataFrame
combined_df = pd.concat(processed_data.values(), ignore_index=True)
print("Combined data stored in memory as a single DataFrame.")

In [25]:
combined_df 

Unnamed: 0,ID,Relaxed1_Left_Accelerometer_X,Relaxed1_Left_Accelerometer_Y,Relaxed1_Left_Accelerometer_Z,Relaxed1_Left_Gyroscope_X,Relaxed1_Left_Gyroscope_Y,Relaxed1_Left_Gyroscope_Z,Relaxed1_Right_Accelerometer_X,Relaxed1_Right_Accelerometer_Y,Relaxed1_Right_Accelerometer_Z,...,Entrainment2_Left_Gyroscope_X,Entrainment2_Left_Gyroscope_Y,Entrainment2_Left_Gyroscope_Z,Entrainment2_Right_Accelerometer_X,Entrainment2_Right_Accelerometer_Y,Entrainment2_Right_Accelerometer_Z,Entrainment2_Right_Gyroscope_X,Entrainment2_Right_Gyroscope_Y,Entrainment2_Right_Gyroscope_Z,File
0,1,-0.007183,-0.007979,-0.007813,-0.006712,-0.007526,-0.006415,-0.008140,-0.005024,-0.005753,...,0.001522,0.000559,0.001519,-0.002423,-0.002487,-0.000593,0.000311,-0.001722,-0.000819,001_ml.bin
1,2,0.001043,0.001997,0.002999,0.003058,0.001165,-0.000729,0.001272,-0.000653,0.000377,...,0.004418,0.001537,0.000647,0.000701,-0.000202,-0.000140,-0.002978,-0.003918,-0.003884,001_ml.bin
2,3,-0.001907,0.000037,0.001010,0.002977,0.002096,0.000236,0.000322,-0.000555,-0.001443,...,-0.006298,-0.006403,-0.004579,-0.000845,0.000921,0.000764,0.003550,0.003449,0.003356,001_ml.bin
3,4,0.002346,0.002297,0.002318,0.003289,-0.000583,0.001389,0.003343,0.002417,0.002434,...,-0.000019,-0.001957,-0.001020,-0.003008,-0.002099,-0.001175,-0.000244,-0.000305,0.000652,001_ml.bin
4,5,-0.000287,-0.000247,-0.001204,-0.003151,-0.003194,-0.000343,-0.000421,0.001448,0.003319,...,-0.000363,-0.000385,0.001572,0.003541,0.004588,0.005647,0.004855,0.003061,0.001238,001_ml.bin
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
457739,972,-0.028380,-0.030496,-0.027287,-0.025132,-0.021923,-0.016598,-0.013408,-0.014465,-0.010212,...,0.085307,0.081067,0.074700,0.060897,0.048153,0.032222,0.020531,0.006726,-0.028336,469_ml.bin
457740,973,-0.051722,-0.078292,-0.091057,-0.111241,-0.126104,-0.137775,-0.145190,-0.150486,-0.145166,...,0.026759,0.029935,0.047994,0.048008,0.053333,0.065027,0.088408,0.117115,0.141585,469_ml.bin
457741,974,0.157558,0.154404,0.144858,0.127850,0.102335,0.085313,0.083160,0.077828,0.067208,...,-0.005010,-0.000765,0.002415,0.017275,0.030004,0.044854,0.055456,0.059699,0.054413,469_ml.bin
457742,975,0.064008,0.081033,0.104426,0.105513,0.105531,0.098107,0.095981,0.098097,0.086409,...,0.035347,0.048082,0.056569,0.062938,0.062925,0.072472,0.067174,0.055518,0.059804,469_ml.bin


## Establish the database connection to create a database and export the table 

In [None]:
# Establish the database connection
engine = create_engine('postgresql://postgres:eysk123@localhost/PADS')

# Use pandas to create the table and populate data
combined_df.to_sql('movement_bin', engine, index=False, if_exists='replace', schema='public')

In [29]:
# Query the database 
query = "SELECT * FROM public.movement_bin;"
result = pd.read_sql(query, engine)

# Display the query results
result

Unnamed: 0,ID,Relaxed1_Left_Accelerometer_X,Relaxed1_Left_Accelerometer_Y,Relaxed1_Left_Accelerometer_Z,Relaxed1_Left_Gyroscope_X,Relaxed1_Left_Gyroscope_Y,Relaxed1_Left_Gyroscope_Z,Relaxed1_Right_Accelerometer_X,Relaxed1_Right_Accelerometer_Y,Relaxed1_Right_Accelerometer_Z,...,Entrainment2_Left_Gyroscope_X,Entrainment2_Left_Gyroscope_Y,Entrainment2_Left_Gyroscope_Z,Entrainment2_Right_Accelerometer_X,Entrainment2_Right_Accelerometer_Y,Entrainment2_Right_Accelerometer_Z,Entrainment2_Right_Gyroscope_X,Entrainment2_Right_Gyroscope_Y,Entrainment2_Right_Gyroscope_Z,File
0,1,-0.007183,-0.007979,-0.007813,-0.006712,-0.007526,-0.006415,-0.008140,-0.005024,-0.005753,...,0.001522,0.000559,0.001519,-0.002423,-0.002487,-0.000593,0.000311,-0.001722,-0.000819,001_ml.bin
1,2,0.001043,0.001997,0.002999,0.003058,0.001165,-0.000729,0.001272,-0.000653,0.000377,...,0.004418,0.001537,0.000647,0.000701,-0.000202,-0.000140,-0.002978,-0.003918,-0.003884,001_ml.bin
2,3,-0.001907,0.000037,0.001010,0.002977,0.002096,0.000236,0.000322,-0.000555,-0.001443,...,-0.006298,-0.006403,-0.004579,-0.000845,0.000921,0.000764,0.003550,0.003449,0.003356,001_ml.bin
3,4,0.002346,0.002297,0.002318,0.003289,-0.000583,0.001389,0.003343,0.002417,0.002434,...,-0.000019,-0.001957,-0.001020,-0.003008,-0.002099,-0.001175,-0.000244,-0.000305,0.000652,001_ml.bin
4,5,-0.000287,-0.000247,-0.001204,-0.003151,-0.003194,-0.000343,-0.000421,0.001448,0.003319,...,-0.000363,-0.000385,0.001572,0.003541,0.004588,0.005647,0.004855,0.003061,0.001238,001_ml.bin
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
457739,972,-0.028380,-0.030496,-0.027287,-0.025132,-0.021923,-0.016598,-0.013408,-0.014465,-0.010212,...,0.085307,0.081067,0.074700,0.060897,0.048153,0.032222,0.020531,0.006726,-0.028336,469_ml.bin
457740,973,-0.051722,-0.078292,-0.091057,-0.111241,-0.126104,-0.137775,-0.145190,-0.150486,-0.145166,...,0.026759,0.029935,0.047994,0.048008,0.053333,0.065027,0.088408,0.117115,0.141585,469_ml.bin
457741,974,0.157558,0.154404,0.144858,0.127850,0.102335,0.085313,0.083160,0.077828,0.067208,...,-0.005010,-0.000765,0.002415,0.017275,0.030004,0.044854,0.055456,0.059699,0.054413,469_ml.bin
457742,975,0.064008,0.081033,0.104426,0.105513,0.105531,0.098107,0.095981,0.098097,0.086409,...,0.035347,0.048082,0.056569,0.062938,0.062925,0.072472,0.067174,0.055518,0.059804,469_ml.bin


In [31]:
combined_df.tail()

Unnamed: 0,ID,Relaxed1_Left_Accelerometer_X,Relaxed1_Left_Accelerometer_Y,Relaxed1_Left_Accelerometer_Z,Relaxed1_Left_Gyroscope_X,Relaxed1_Left_Gyroscope_Y,Relaxed1_Left_Gyroscope_Z,Relaxed1_Right_Accelerometer_X,Relaxed1_Right_Accelerometer_Y,Relaxed1_Right_Accelerometer_Z,...,Entrainment2_Left_Gyroscope_X,Entrainment2_Left_Gyroscope_Y,Entrainment2_Left_Gyroscope_Z,Entrainment2_Right_Accelerometer_X,Entrainment2_Right_Accelerometer_Y,Entrainment2_Right_Accelerometer_Z,Entrainment2_Right_Gyroscope_X,Entrainment2_Right_Gyroscope_Y,Entrainment2_Right_Gyroscope_Z,File
457739,972,-0.02838,-0.030496,-0.027287,-0.025132,-0.021923,-0.016598,-0.013408,-0.014465,-0.010212,...,0.085307,0.081067,0.0747,0.060897,0.048153,0.032222,0.020531,0.006726,-0.028336,469_ml.bin
457740,973,-0.051722,-0.078292,-0.091057,-0.111241,-0.126104,-0.137775,-0.14519,-0.150486,-0.145166,...,0.026759,0.029935,0.047994,0.048008,0.053333,0.065027,0.088408,0.117115,0.141585,469_ml.bin
457741,974,0.157558,0.154404,0.144858,0.12785,0.102335,0.085313,0.08316,0.077828,0.067208,...,-0.00501,-0.000765,0.002415,0.017275,0.030004,0.044854,0.055456,0.059699,0.054413,469_ml.bin
457742,975,0.064008,0.081033,0.104426,0.105513,0.105531,0.098107,0.095981,0.098097,0.086409,...,0.035347,0.048082,0.056569,0.062938,0.062925,0.072472,0.067174,0.055518,0.059804,469_ml.bin
457743,976,0.060893,0.060911,0.033316,0.008893,0.003569,0.002492,-0.003895,-0.017715,-0.027292,...,-0.113293,-0.104795,-0.093112,-0.083557,-0.08038,-0.07933,-0.078285,-0.077235,-0.074051,469_ml.bin


## Questionnarie bin files to single Dataframe 

In [33]:
# Define the preprocessed folder and channels
preprocessed_dir = 'preprocessed/'
questionnaire_dir = os.path.join(preprocessed_dir, 'questionnaire')
channels = [
    'Dribbling', 'Taste/smelling', 'Swallowing', 'Vomiting', 'Constipation',
    'Bowel inconsistence', 'Bowel emptying incomplete', 'Urgency', 'Nocturia', 'Pains',
    'Weight', 'Remembering', 'Loss of interest', 'Hallucinations', 'Concentrating',
    'Sad, blues', 'Anxiety', 'Sex drive', 'Sex difficulty', 'Dizzy',
    'Falling', 'Daytime sleepiness', 'Insomnia', 'Intense vivid dreams', 'Acting out during dreams',
    'Restless legs', 'Swelling', 'Sweating', 'Diplopia', 'Delusions'
]

# Construct the path for the file_list.csv
file_list_path = os.path.join(preprocessed_dir, 'file_list.csv')

# Check if file_list.csv exists
if not os.path.exists(file_list_path):
    print(f"Error: The file '{file_list_path}' does not exist!")
else:
    # Read the file list CSV
    df = pd.read_csv(file_list_path)

    # Initialize an empty DataFrame to hold all data
    all_data_df = pd.DataFrame(columns=['id'] + channels)

    # Process all .bin files in the questionnaire directory
    for _, row in df.iterrows():
        file_idx = int(row['id'])
        bin_file_path = os.path.join(questionnaire_dir, f'{file_idx:03d}_ml.bin')

        if not os.path.exists(bin_file_path):
            print(f"Warning: The binary file '{bin_file_path}' does not exist, skipping...")
            continue

        # Read the binary file data as np.float32
        data = np.fromfile(bin_file_path, dtype=np.float32)

        # Add the data to the cumulative DataFrame
        row_data = pd.DataFrame([[file_idx] + data.tolist()], columns=['id'] + channels)
        all_data_df = pd.concat([all_data_df, row_data], ignore_index=True)


  all_data_df = pd.concat([all_data_df, row_data], ignore_index=True)


In [None]:
'''# Save the consolidated DataFrame to a single CSV file
    consolidated_csv_path = os.path.join(preprocessed_dir, 'all_questionnaire_data.csv')
    all_data_df.to_csv(consolidated_csv_path, index=False)
    print(f"Consolidated data saved as '{consolidated_csv_path}'")'''

In [35]:
# Establish the database connection
engine = create_engine('postgresql://postgres:eysk123@localhost/PADS')

# Use pandas to create the table and populate data
all_data_df.to_sql('questionnarie_bin', engine, index=False, if_exists='replace', schema='public')

469

In [37]:
# Query the database 
query = "SELECT * FROM public.questionnarie_bin;"
result_1 = pd.read_sql(query, engine)

# Display the query results
result_1

Unnamed: 0,id,Dribbling,Taste/smelling,Swallowing,Vomiting,Constipation,Bowel inconsistence,Bowel emptying incomplete,Urgency,Nocturia,...,Falling,Daytime sleepiness,Insomnia,Intense vivid dreams,Acting out during dreams,Restless legs,Swelling,Sweating,Diplopia,Delusions
0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,...,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0
2,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,4,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,...,1.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0
4,5,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
464,465,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
465,466,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
466,467,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
467,468,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,...,1.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0


## Read the file_list csv files 

In [39]:
# Define the preprocessed folder and file index
preprocessed_dir = 'preprocessed/'
# Construct the path for the file_list.csv
file_list_path = os.path.join(preprocessed_dir, 'file_list.csv')

# Check if the file exists
if not os.path.exists(file_list_path):
    print(f"Error: The file '{file_list_path}' does not exist!")
else:
    # Read the file list CSV from the preprocessed directory
    df = pd.read_csv(file_list_path)  

In [41]:
df

Unnamed: 0,resource_type,id,study_id,condition,disease_comment,age_at_diagnosis,age,height,weight,gender,handedness,appearance_in_kinship,appearance_in_first_grade_kinship,effect_of_alcohol_on_tremor,label
0,patient,1,PADS,Healthy,-,56,56,173,78,male,right,True,True,Unknown,0
1,patient,2,PADS,Other Movement Disorders,Left-Sided resting tremor and hypokinesia with...,69,81,193,104,male,right,False,,No effect,2
2,patient,3,PADS,Healthy,-,45,45,170,78,female,right,False,,Unknown,0
3,patient,4,PADS,Parkinson's,IPS akinetic-rigid type,63,67,161,90,female,right,False,,No effect,1
4,patient,5,PADS,Parkinson's,IPS tremordominant type,65,75,172,86,male,left,False,,Unknown,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
464,patient,465,PADS,Parkinson's,IPS mixed type,62,65,175,80,male,right,True,False,No effect,1
465,patient,466,PADS,Healthy,-,84,84,172,74,female,right,True,True,No effect,0
466,patient,467,PADS,Parkinson's,"Essential Tremor, starting IPS tremordominant ...",55,57,190,100,male,right,False,,Improvement,1
467,patient,468,PADS,Parkinson's,IPS mixed type,73,76,198,118,male,right,False,,No effect,1


In [43]:
# Establish the database connection
engine = create_engine('postgresql://postgres:eysk123@localhost/PADS')

# Use pandas to create the table and populate data
df.to_sql('patient_bin', engine, index=False, if_exists='replace', schema='public')

469

In [45]:
# Query the database 
query = "SELECT * FROM public.patient_bin;"
result_2 = pd.read_sql(query, engine)

# Display the query results
result_2

Unnamed: 0,resource_type,id,study_id,condition,disease_comment,age_at_diagnosis,age,height,weight,gender,handedness,appearance_in_kinship,appearance_in_first_grade_kinship,effect_of_alcohol_on_tremor,label
0,patient,1,PADS,Healthy,-,56,56,173,78,male,right,True,True,Unknown,0
1,patient,2,PADS,Other Movement Disorders,Left-Sided resting tremor and hypokinesia with...,69,81,193,104,male,right,False,,No effect,2
2,patient,3,PADS,Healthy,-,45,45,170,78,female,right,False,,Unknown,0
3,patient,4,PADS,Parkinson's,IPS akinetic-rigid type,63,67,161,90,female,right,False,,No effect,1
4,patient,5,PADS,Parkinson's,IPS tremordominant type,65,75,172,86,male,left,False,,Unknown,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
464,patient,465,PADS,Parkinson's,IPS mixed type,62,65,175,80,male,right,True,False,No effect,1
465,patient,466,PADS,Healthy,-,84,84,172,74,female,right,True,True,No effect,0
466,patient,467,PADS,Parkinson's,"Essential Tremor, starting IPS tremordominant ...",55,57,190,100,male,right,False,,Improvement,1
467,patient,468,PADS,Parkinson's,IPS mixed type,73,76,198,118,male,right,False,,No effect,1
