# Specific time-point analysis


In [3]:
import os
import pandas as pd

# Define the root folder path
root_folder = '/Users/dhwanishah/Desktop/MS/VR-Study/VRTL/filtered_camera_data'

# Define the subjects, sessions, and cameras of interest
subjects = ['VRTL3', 'VRTL9', 'VRTL10', 'VRTL11']
sessions = ['S4', 'S5', 'S6']
cameras = ['Camera5', 'Camera6', 'Camera7']

# Iterate over each subject
for subject in subjects:
    subject_folder = os.path.join(root_folder, subject)
    
    # Ensure the subject folder exists
    if not os.path.exists(subject_folder):
        continue
    
    # Iterate over each file in the subject folder
    for file_name in os.listdir(subject_folder):
        # Check if the file matches the session and camera criteria
        if any(session in file_name for session in sessions) and any(camera in file_name for camera in cameras):
            file_path = os.path.join(subject_folder, file_name)
            
            # Load the CSV file
            try:
                df = pd.read_csv(file_path)
                
                # Assuming the timestamp column is named 'timestamp'
                # Print the first and last timestamps if the column exists
                if ' SysTime' in df.columns:
                    first_timestamp = df[' SysTime'].iloc[0]
                    last_timestamp = df[' SysTime'].iloc[-1]
                    
                    print(f"File: {file_name}")
                    print(f"  First timestamp: {first_timestamp}")
                    print(f"  Last timestamp: {last_timestamp}\n")
                else:
                    print(f"File: {file_name} - No 'SysTime' column found.")
            except Exception as e:
                print(f"Could not process file: {file_name} - Error: {e}")


File: VRTL3S6_Camera5.csv
  First timestamp: 4/19/2024 10:27:52 AM
  Last timestamp: 4/19/2024 10:28:54 AM

File: VRTL3S6_Camera6.csv
  First timestamp: 4/19/2024 10:28:54 AM
  Last timestamp: 4/19/2024 10:29:57 AM

File: VRTL3S6_Camera7.csv
  First timestamp: 4/19/2024 10:29:57 AM
  Last timestamp: 4/19/2024 10:30:58 AM

File: VRTL3S5_Camera7.csv
  First timestamp: 4/19/2024 10:21:34 AM
  Last timestamp: 4/19/2024 10:22:35 AM

File: VRTL3S5_Camera6.csv
  First timestamp: 4/19/2024 10:20:32 AM
  Last timestamp: 4/19/2024 10:21:34 AM

File: VRTL3S5_Camera5.csv
  First timestamp: 4/19/2024 10:19:29 AM
  Last timestamp: 4/19/2024 10:20:32 AM

File: VRTL3S4_Camera5.csv
  First timestamp: 4/18/2024 12:49:55 PM
  Last timestamp: 4/18/2024 12:51:00 PM

File: VRTL3S4_Camera6.csv
  First timestamp: 4/18/2024 12:51:00 PM
  Last timestamp: 4/18/2024 12:52:04 PM

File: VRTL3S4_Camera7.csv
  First timestamp: 4/18/2024 12:52:04 PM
  Last timestamp: 4/18/2024 12:53:06 PM

File: VRTL9S5_Camera5.csv
  

In [5]:
import os
import pandas as pd

# Define the root folder path
root_folder = '/Users/dhwanishah/Desktop/MS/VR-Study/VRTL/filtered_camera_data'

# Define the subjects, sessions, and cameras of interest
subjects = ['VRTL3', 'VRTL9', 'VRTL10', 'VRTL11']
sessions = ['S4', 'S5', 'S6']
cameras = ['Camera5', 'Camera6', 'Camera7']

# Initialize a list to store the results
file_data = []

# Iterate over each subject
for subject in subjects:
    subject_folder = os.path.join(root_folder, subject)
    
    # Ensure the subject folder exists
    if not os.path.exists(subject_folder):
        continue
    
    # Iterate over each file in the subject folder
    for file_name in os.listdir(subject_folder):
        # Check if the file matches the session and camera criteria
        if any(session in file_name for session in sessions) and any(camera in file_name for camera in cameras):
            file_path = os.path.join(subject_folder, file_name)
            
            # Load the CSV file
            try:
                df = pd.read_csv(file_path)
                
                # Check if ' SysTime' column exists
                if ' SysTime' in df.columns:
                    # Extract the first and last timestamps
                    first_timestamp = df[' SysTime'].iloc[0]
                    last_timestamp = df[' SysTime'].iloc[-1]
                    
                    # Append file information to the list
                    file_data.append({
                        "File name": file_name,
                        "Start timestamp": first_timestamp,
                        "End timestamp": last_timestamp
                    })
                else:
                    print(f"File: {file_name} - 'SysTime' column not found.")
            except Exception as e:
                print(f"Could not process file: {file_name} - Error: {e}")

# Convert to a DataFrame for display
output_df = pd.DataFrame(file_data)

# Print the DataFrame in a table format
print(output_df)


0     4/19/2024 10:27:52 AM
1     4/19/2024 10:28:54 AM
2     4/19/2024 10:29:57 AM
3     4/19/2024 10:21:34 AM
4     4/19/2024 10:20:32 AM
5     4/19/2024 10:19:29 AM
6     4/18/2024 12:49:55 PM
7     4/18/2024 12:51:00 PM
8     4/18/2024 12:52:04 PM
9      5/10/2024 3:13:03 PM
10     5/10/2024 3:14:05 PM
11     5/10/2024 3:15:08 PM
12     5/10/2024 3:23:38 PM
13     5/10/2024 3:22:36 PM
14     5/10/2024 3:21:33 PM
15      5/9/2024 2:46:49 PM
16      5/9/2024 2:45:44 PM
17      5/9/2024 2:44:39 PM
18    5/16/2024 11:46:15 AM
19    5/16/2024 11:45:13 AM
20    5/16/2024 11:44:10 AM
21    5/17/2024 10:43:48 AM
22    5/17/2024 10:44:52 AM
23    5/17/2024 10:45:56 AM
24    5/17/2024 10:54:31 AM
25    5/17/2024 10:53:29 AM
26    5/17/2024 10:52:27 AM
27    5/17/2024 12:33:00 PM
28    5/17/2024 12:34:03 PM
29    5/17/2024 12:31:58 PM
30    5/17/2024 12:23:38 PM
31    5/17/2024 12:25:44 PM
32    5/17/2024 12:24:41 PM
33    5/16/2024 12:12:14 PM
34    5/16/2024 12:13:18 PM
35    5/16/2024 12:1

In [6]:
import os
import pandas as pd

# Define the root folder path and output file path
root_folder = '/Users/dhwanishah/Desktop/MS/VR-Study/VRTL/filtered_camera_data'
output_file = '/Users/dhwanishah/Desktop/MS/VR-Dhwani/camera_timestamps_check2.xlsx'

# Define the subjects, sessions, and cameras of interest
subjects = ['VRTL3', 'VRTL9', 'VRTL10', 'VRTL11']
sessions = ['S4', 'S5', 'S6']
cameras = ['Camera5', 'Camera6', 'Camera7']

# Initialize a list to store the results
file_data = []

# Iterate over each subject
for subject in subjects:
    subject_folder = os.path.join(root_folder, subject)
    
    # Ensure the subject folder exists
    if not os.path.exists(subject_folder):
        continue
    
    # Iterate over each file in the subject folder
    for file_name in os.listdir(subject_folder):
        # Check if the file matches the session and camera criteria
        if any(session in file_name for session in sessions) and any(camera in file_name for camera in cameras):
            file_path = os.path.join(subject_folder, file_name)
            
            # Load the CSV file
            try:
                df = pd.read_csv(file_path)
                
                # Check if ' SysTime' column exists
                if ' SysTime' in df.columns:
                    # Extract the first and last timestamps
                    first_timestamp = df[' SysTime'].iloc[0]
                    last_timestamp = df[' SysTime'].iloc[-1]
                    
                    # Append file information to the list
                    file_data.append({
                        "File name": file_name,
                        "Start timestamp": first_timestamp,
                        "End timestamp": last_timestamp
                    })
                else:
                    print(f"File: {file_name} - 'SysTime' column not found.")
            except Exception as e:
                print(f"Could not process file: {file_name} - Error: {e}")

# Convert the data to a DataFrame
output_df = pd.DataFrame(file_data)

# Save the DataFrame to an Excel file
output_df.to_excel(output_file, index=False)

print(f"Data has been saved to {output_file}")


Data has been saved to /Users/dhwanishah/Desktop/MS/VR-Dhwani/camera_timestamps_check2.xlsx


In [17]:
import os
import pytz
import pandas as pd

# Function to convert EST/EDT time to UTC
def convert_est_edt_to_utc(eastern_time):
    eastern = pytz.timezone('America/New_York')
    eastern_time = eastern.localize(eastern_time)
    utc_time = eastern_time.astimezone(pytz.utc)
    return utc_time

# Define the root folder path
root_folder = '/Users/dhwanishah/Desktop/MS/VR-Study/VRTL/filtered_camera_data'
output_file = '/Users/dhwanishah/Desktop/MS/VR-Dhwani/camera_timestamps_.xlsx'

# Initialize a list to store file information
file_data = []

# Define the subjects, sessions, and cameras of interest
subjects = ['VRTL3', 'VRTL9', 'VRTL10', 'VRTL11']
sessions = ['S4', 'S5', 'S6']
cameras = ['Camera5', 'Camera6', 'Camera7']

# Iterate over each subject
for subject in subjects:
    subject_folder = os.path.join(root_folder, subject)
    
    # Ensure the subject folder exists
    if not os.path.exists(subject_folder):
        continue
    
    # Iterate over each file in the subject folder
    for file_name in os.listdir(subject_folder):
        # Check if the file matches the session and camera criteria
        if any(session in file_name for session in sessions) and any(camera in file_name for camera in cameras):
            file_path = os.path.join(subject_folder, file_name)
            
            # Load the CSV file
            try:
                df = pd.read_csv(file_path)
                
                # Check if 'SysTime' column exists
                if ' SysTime' in df.columns:
                    print(f"Processing File: {file_name}")
                    
                    # Convert and print the first and last timestamps in UTC
                    first_timestamp_est = pd.to_datetime(df[' SysTime'].iloc[0])
                    last_timestamp_est = pd.to_datetime(df[' SysTime'].iloc[-1])

                    first_timestamp_utc = convert_est_edt_to_utc(first_timestamp_est)
                    last_timestamp_utc = convert_est_edt_to_utc(last_timestamp_est)
                    
                    # Append file information to the list
                    # file_data.append({
                    #     "File name": file_name,
                    #     "Start time": first_timestamp_utc,
                    #     "End time": last_timestamp_utc
                    # })
                    print(f"  First Timestamp (UTC): {first_timestamp_utc}")
                    print(f"  Last Timestamp (UTC): {last_timestamp_utc}\n")
                else:
                    print(f"File: {file_name} - 'SysTime' column not found. Available columns: {df.columns.tolist()}")
            except Exception as e:
                print(f"Could not process file: {file_name} - Error: {e}")

# # Convert the data to a DataFrame and save to an Excel file
# output_df = pd.DataFrame(file_data)
# output_df.to_excel(output_file, index=False)

# print(f"Data has been saved to {output_file}")

Processing File: VRTL3S6_Camera5.csv
  First Timestamp (UTC): 2024-04-19 14:27:52+00:00
  Last Timestamp (UTC): 2024-04-19 14:28:54+00:00

Processing File: VRTL3S6_Camera6.csv
  First Timestamp (UTC): 2024-04-19 14:28:54+00:00
  Last Timestamp (UTC): 2024-04-19 14:29:57+00:00

Processing File: VRTL3S6_Camera7.csv
  First Timestamp (UTC): 2024-04-19 14:29:57+00:00
  Last Timestamp (UTC): 2024-04-19 14:30:58+00:00

Processing File: VRTL3S5_Camera7.csv
  First Timestamp (UTC): 2024-04-19 14:21:34+00:00
  Last Timestamp (UTC): 2024-04-19 14:22:35+00:00

Processing File: VRTL3S5_Camera6.csv
  First Timestamp (UTC): 2024-04-19 14:20:32+00:00
  Last Timestamp (UTC): 2024-04-19 14:21:34+00:00

Processing File: VRTL3S5_Camera5.csv
  First Timestamp (UTC): 2024-04-19 14:19:29+00:00
  Last Timestamp (UTC): 2024-04-19 14:20:32+00:00

Processing File: VRTL3S4_Camera5.csv
  First Timestamp (UTC): 2024-04-18 16:49:55+00:00
  Last Timestamp (UTC): 2024-04-18 16:51:00+00:00

Processing File: VRTL3S4_Ca

In [24]:
import os
import pytz
import pandas as pd

# Function to convert EST/EDT time to UTC
def convert_est_edt_to_utc(eastern_time):
    eastern = pytz.timezone('America/New_York')
    eastern_time = eastern.localize(eastern_time)
    utc_time = eastern_time.astimezone(pytz.utc)
    return utc_time

# Define the root folder path and Excel file output
root_folder = '/Users/dhwanishah/Desktop/MS/VR-Study/VRTL/filtered_camera_data'
output_file = '/Users/dhwanishah/Desktop/MS/VR-Dhwani/camera_timestamps_.xlsx'

# Define the subjects, sessions, and cameras of interest
subjects = ['VRTL3', 'VRTL9', 'VRTL10', 'VRTL11']
sessions = ['S4', 'S5', 'S6']
cameras = ['Camera5', 'Camera6', 'Camera7']

# Initialize a list to store file information
file_data = []

# Iterate over each subject
for subject in subjects:
    subject_folder = os.path.join(root_folder, subject)
    
    # Ensure the subject folder exists
    if not os.path.exists(subject_folder):
        continue
    
    # Iterate over each file in the subject folder
    for file_name in os.listdir(subject_folder):
        # Check if the file matches the session and camera criteria
        if any(session in file_name for session in sessions) and any(camera in file_name for camera in cameras):
            file_path = os.path.join(subject_folder, file_name)
            
            # Load the CSV file
            try:
                df = pd.read_csv(file_path)
                
                # Check if 'SysTime' column exists
                if ' SysTime' in df.columns:
                    # Convert the first and last timestamps in UTC
                    first_timestamp_est = pd.to_datetime(df[' SysTime'].iloc[0])
                    last_timestamp_est = pd.to_datetime(df[' SysTime'].iloc[-1])

                    first_timestamp_utc = convert_est_edt_to_utc(first_timestamp_est)
                    last_timestamp_utc = convert_est_edt_to_utc(last_timestamp_est)
                    
                    # Append file information to the list
                    file_data.append({
                        "File name": file_name,
                        "Start time": first_timestamp_utc,
                        "End time": last_timestamp_utc
                    })
                else:
                    print(f"File: {file_name} - 'SysTime' column not found. Available columns: {df.columns.tolist()}")
            except Exception as e:
                print(f"Could not process file: {file_name} - Error: {e}")

# Convert the data to a DataFrame and save to an Excel file
output_df = pd.DataFrame(file_data)
output_df.to_excel(output_file, index=False)

print(f"Data has been saved to {output_file}")


ValueError: Excel does not support datetimes with timezones. Please ensure that datetimes are timezone unaware before writing to Excel.

In [1]:
import os
import pytz
import pandas as pd

# Function to convert EST/EDT time to UTC
def convert_est_edt_to_utc(eastern_time):
    eastern = pytz.timezone('America/New_York')
    eastern_time = eastern.localize(eastern_time)
    utc_time = eastern_time.astimezone(pytz.utc)
    return utc_time

# Define the root folder path and Excel file output
# Define the root folder path and Excel file output
root_folder = '/Users/dhwanishah/Desktop/MS/VR-Study/VRTL/filtered_camera_data'
output_file = '/Users/dhwanishah/Desktop/MS/VR-Dhwani/camera_timestamps_check.xlsx'


# Define the subjects, sessions, and cameras of interest
subjects = ['VRTL3', 'VRTL9', 'VRTL10', 'VRTL11']
sessions = ['S4', 'S5', 'S6']
cameras = ['Camera5', 'Camera6', 'Camera7']

# Initialize a list to store file information
file_data = []

# Iterate over each subject
for subject in subjects:
    subject_folder = os.path.join(root_folder, subject)
    
    # Ensure the subject folder exists
    if not os.path.exists(subject_folder):
        continue
    
    # Iterate over each file in the subject folder
    for file_name in os.listdir(subject_folder):
        # Check if the file matches the session and camera criteria
        if any(session in file_name for session in sessions) and any(camera in file_name for camera in cameras):
            file_path = os.path.join(subject_folder, file_name)
            
            # Load the CSV file
            try:
                df = pd.read_csv(file_path)
                
                # Check if 'SysTime' column exists
                if ' SysTime' in df.columns:
                    # Convert the first and last timestamps in UTC
                    first_timestamp_est = pd.to_datetime(df[' SysTime'].iloc[0])
                    last_timestamp_est = pd.to_datetime(df[' SysTime'].iloc[-1])

                    # Convert to UTC and remove timezone info
                    first_timestamp_utc = convert_est_edt_to_utc(first_timestamp_est).replace(tzinfo=None)
                    last_timestamp_utc = convert_est_edt_to_utc(last_timestamp_est).replace(tzinfo=None)
                    
                    # Append file information to the list
                    file_data.append({
                        "File name": file_name,
                        "Start time": first_timestamp_utc,
                        "End time": last_timestamp_utc
                    })
                else:
                    print(f"File: {file_name} - 'SysTime' column not found. Available columns: {df.columns.tolist()}")
            except Exception as e:
                print(f"Could not process file: {file_name} - Error: {e}")

# Convert the data to a DataFrame and save to an Excel file
output_df = pd.DataFrame(file_data)

# Check if the DataFrame has data before attempting to save
if not output_df.empty:
    output_df.to_excel(output_file, index=False)
    print(f"Data has been saved to {output_file}")
else:
    print("No data to save.")


Data has been saved to /Users/dhwanishah/Desktop/MS/VR-Dhwani/camera_timestamps_check.xlsx


# Make csv files of specific timepoints

In [13]:
import warnings
warnings.filterwarnings("ignore")
import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import avro
import avro.schema
from avro.datafile import DataFileReader
from avro.io import DatumReader
import pandas as pd
from datetime import datetime
import pytz
import neurokit2 as nk

Define paths

In [36]:
file_ref = pd.read_excel('/Users/dhwanishah/Desktop/MS/VR-Dhwani/camera_timestamps_check.xlsx')
input_dir = '/Users/dhwanishah/Desktop/MS/VR-Dhwani/subject-avros'
output_dir = '/Users/dhwanishah/Desktop/MS/VR-Dhwani/subject-csvs-new-eda-camera'

Define Functions

In [37]:
def extract_eda_from_avro(file_path):
    with open(file_path, 'rb') as avro_file:
        reader = DataFileReader(avro_file, DatumReader())
        record = next(reader)
        rawData = record.get('rawData', None)
        eda = rawData.get('eda', None)
        bvp = rawData.get('bvp', None)
        systolicPeaks = rawData.get('systolicPeaks', None)
    return eda, bvp, systolicPeaks

In [38]:
def convert_est_edt_to_utc(eastern_time):
    eastern = pytz.timezone('America/New_York')
    eastern_time = eastern.localize(eastern_time)
    utc_time = eastern_time.astimezone(pytz.utc)
    return utc_time

# Convert 'Start' and 'End' columns to datetime format if they are strings
file_ref['Start'] = pd.to_datetime(file_ref['Start'], errors='coerce')
file_ref['End'] = pd.to_datetime(file_ref['End'], errors='coerce')

Process data

In [43]:
for index, row in file_ref.iterrows():

    # Process only if the SubID is 'sub-11'
    if row['SubID'] != 'sub-10':
        continue  # Skip rows that are not 'sub-11'

    input_path = (os.path.join(input_dir, row['SubID'], row['Avro_File name-1']))
    output_path = f"{output_dir}/{row['SubID']}/session-{row['Session']}/camera-{row['Camera']}"
    

    start_time = convert_est_edt_to_utc(row['Start']).replace(tzinfo=None)
    start_time = pd.to_datetime(start_time, unit='s', utc=True)
    
    end_time = convert_est_edt_to_utc(row['End']).replace(tzinfo=None)
    end_time = pd.to_datetime(end_time, unit='s', utc=True)
    
    eda_data, bvp_data, systolicPeaks_data = extract_eda_from_avro(input_path)
    values = eda_data['values']
    timestamp_start = eda_data['timestampStart'] 
    sampling_frequency = eda_data['samplingFrequency']
    # print(f"sampling frequency:{sampling_frequency}")
    
    # If eda data is split into 2 files, load in the values from file2
    if isinstance(row['Avro_File name-2'], str):
        input_path2 = (os.path.join(input_dir, row['SubID'], row['Avro_File name-2']))
        eda_data2, bvp_data2, systolicPeaks_data2 = extract_eda_from_avro(input_path2)
        # eda_data2 = extract_eda_from_avro(input_path2)
        values2 = eda_data2['values']
        values.extend(values2)
    
    
    
    # Convert timestamp_start from microseconds to seconds
    timestamp_start_seconds = timestamp_start / 1_000_000
    
    # Generate a list of timestamps
    time_seconds = list(range(len(values)))
    timestamps = [timestamp_start_seconds + t / sampling_frequency for t in time_seconds]
    
    # Convert timestamps to datetime objects
    datetime_objects = pd.to_datetime(timestamps, unit='s', utc=True)
    
    # Trim the DataFrame based on start and end time points of the experimental session
    df = pd.DataFrame({'Values': values, 'Timestamp': datetime_objects})
    df = df[(df['Timestamp'] >= start_time) & (df['Timestamp'] <= end_time)].reset_index(drop=True)

    # Update BVP values and timestamps after trimming
    values = df['Values']
    datetime_objects_trimmed = df['Timestamp']

    # Smooth EDA values with dynamic kernel size
    smoothing_size = min(3, len(values))  # Set to 3 or lower if values array is shorter
    values_smoothed = nk.signal_smooth(values, method='convolution', kernel='boxcar', size=smoothing_size)

    # Create a DataFrame with timestamps, raw, EDA values, and smoothed EDA values
    # df = pd.DataFrame({'timestamp': datetime_objects,
    #                    'eda_raw': values, 
    #                    'eda_smoothed': values_smoothed})
    
    

    # Process the smoothed eda signal; add processed signals to DataFrame 
    # signals, info = nk.eda_process(df['eda_smoothed'], sampling_rate=4)
    signals, info = nk.eda_process(values_smoothed, sampling_rate=sampling_frequency)
    df = pd.concat([df, signals], axis=1)
    print(df)

    # Save the DataFrame as a csv
    os.makedirs(os.path.dirname(output_path), exist_ok=True)  # Create directories if they donâ€™t exist

    df.to_csv(f"{output_path}.csv", index=False)

       Values                           Timestamp   EDA_Raw  EDA_Clean  \
0    0.046688 2024-05-16 15:46:15.242014170+00:00  0.047201   0.047201   
1    0.048229 2024-05-16 15:46:15.491996288+00:00  0.047182   0.047182   
2    0.046630 2024-05-16 15:46:15.741978407+00:00  0.047067   0.047067   
3    0.046342 2024-05-16 15:46:15.991960764+00:00  0.046726   0.046726   
4    0.047206 2024-05-16 15:46:16.241942883+00:00  0.046812   0.046812   
..        ...                                 ...       ...        ...   
239  0.044873 2024-05-16 15:47:14.987761974+00:00  0.045982   0.045982   
240  0.046428 2024-05-16 15:47:15.237744331+00:00  0.045871   0.045871   
241  0.046313 2024-05-16 15:47:15.487726450+00:00  0.046313   0.046313   
242  0.046198 2024-05-16 15:47:15.737708807+00:00  0.046289   0.046289   
243  0.046356 2024-05-16 15:47:15.987690926+00:00  0.046303   0.046303   

     EDA_Tonic  EDA_Phasic  SCR_Onsets  SCR_Peaks  SCR_Height  SCR_Amplitude  \
0     0.047183    0.000018     

TypeError: NeuroKit error: signal_smooth(): 'size' should be between 1 and length of the signal.