## 1. The "58" cohort.

### 1.1 Determine the starting point for each patient from the *"58"* cohort.

#### 1.1.1 "58" cohort, CDGR ventilator, start times list.

In [17]:
import pandas as pd
import os

# Define the directory containing the CSV files
directory = '/nfs/turbo/med-kayvan-lab/Projects/PARDS/02-Data/02-EVENTS/a-CDGR+AVEAA+SVU(58)_20240205/a-58_Extension/'

# Initialize empty lists to store start times, file names, and first hour data
start_times = []

# Iterate over each CSV file in the directory
for i in range(1, 233):
    filename = f'a_58_ext_{i:03}.csv'
    file_path = os.path.join(directory, filename)
    
    # Read the CSV file into a DataFrame
    df = pd.read_csv(file_path)
    
    # Convert 'Time' column to datetime format, handle errors if any
    try:
        df['Time'] = pd.to_datetime(df['Time'], format='%Y-%m-%d %H:%M:%S.%f')
    except Exception as e:
        print(f"Error converting 'Time' column to datetime format in file: {filename}")
        print(e)
        start_times.append({'Start Time': None, 'File Name': filename})
        continue

    # Check if the DataFrame has non-null values for the required columns
    if df[['CDGR - FiO₂', 'CDGR - Paw', 'CDGR - eVT', 'CDGR - Flow', 'CDGR - iPress Wave']].notna().all(axis=1).any():
        # Find the index where the condition is met for the first time
        start_index = df[['CDGR - FiO₂', 'CDGR - Paw', 'CDGR - eVT', 'CDGR - Flow', 'CDGR - iPress Wave']].notna().all(axis=1).idxmax()
        # Find the starting time for the first hour
        start_time = df.loc[start_index, 'Time']
        # Append start time and file name to the list
        start_times.append({'Start Time': start_time, 'File Name': filename})

    else:
        start_times.append({'Start Time': None, 'File Name': filename})

# Convert the lists to DataFrames
start_times_df = pd.DataFrame(start_times)
# Add a new column "Group" with group numbers
group_size = 4
start_times_df['Group'] = (start_times_df.index // group_size) + 1

# Save the DataFrames to CSV files
start_times_df.to_csv('start_times_58_CDGR.csv', index=False)


#### 1.1.2 "58" cohort, AVEAA ventilator, start times list.

In [2]:
import pandas as pd
import os

# Define the directory containing the CSV files
directory = '/nfs/turbo/med-kayvan-lab/Projects/PARDS/02-Data/02-EVENTS/a-CDGR+AVEAA+SVU(58)_20240205/a-58_Extension/'

# Initialize empty lists to store start times, file names, and first hour data
start_times = []

# Iterate over each CSV file in the directory
for i in range(1, 233):
    filename = f'a_58_ext_{i:03}.csv'
    file_path = os.path.join(directory, filename)
    
    # Read the CSV file into a DataFrame
    df = pd.read_csv(file_path)
    
    # Convert 'Time' column to datetime format, handle errors if any
    try:
        df['Time'] = pd.to_datetime(df['Time'], format='%Y-%m-%d %H:%M:%S.%f')
    except Exception as e:
        print(f"Error converting 'Time' column to datetime format in file: {filename}")
        print(e)
        start_times.append({'Start Time': None, 'File Name': filename})
        continue

    # Check if the DataFrame has non-null values for the required columns
    if df[['AVEA - eVT', 'AVEA - FiO₂', 'AVEA - Paw', 'AVEA - Air Flow Wave']].notna().all(axis=1).any():
        # Find the index where the condition is met for the first time
        start_index = df[['AVEA - eVT', 'AVEA - FiO₂', 'AVEA - Paw', 'AVEA - Air Flow Wave']].notna().all(axis=1).idxmax()
        # Find the starting time for the first hour
        start_time = df.loc[start_index, 'Time']
        # Append start time and file name to the list
        start_times.append({'Start Time': start_time, 'File Name': filename})

    else:
        start_times.append({'Start Time': None, 'File Name': filename})

# Convert the lists to DataFrames
start_times_df = pd.DataFrame(start_times)
# Add a new column "Group" with group numbers
group_size = 4
start_times_df['Group'] = (start_times_df.index // group_size) + 1

# Save the DataFrames to CSV files
start_times_df.to_csv('start_times_58_AVEAA.csv', index=False)

Error converting 'Time' column to datetime format in file: a_58_ext_039.csv
time data 10:00.4 doesn't match format specified


In [3]:
start_times_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 232 entries, 0 to 231
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   Start Time  5 non-null      datetime64[ns]
 1   File Name   232 non-null    object        
 2   Group       232 non-null    int64         
dtypes: datetime64[ns](1), int64(1), object(1)
memory usage: 5.6+ KB


### 1.2 Create the sample_events csv file depend on the start time for the *"58"* cohort.

#### 1.2.1 "58" cohort, CDGR ventilator, 39 patients.

##### 1.2.1.1 "58" cohort, CDGR ventilator, 39 patients, 1st hour sample_events csv file.

In [49]:
import pandas as pd

# Read the CSV files
events_df = pd.read_csv("sample_events_PIDandMRN(58by4)_ext.csv")
start_times_df = pd.read_csv("start_times_58_CDGR.csv")

# Concatenate along columns axis
concatenated_df = pd.concat([events_df, start_times_df], axis=1)

# Drop rows where "Start Time" is empty
concatenated_df = concatenated_df.dropna(subset=['Start Time'])

# Keep only the first row for each group
concatenated_df = concatenated_df.groupby('Group').first().reset_index()

# Drop the "Time Start" column
concatenated_df = concatenated_df.drop(columns=['Time Start', 'Time Stop', 'File Name'])

# Rename the "Start Time" column as "Time Start"
concatenated_df = concatenated_df.rename(columns={'Start Time': 'Time Start'})
# Convert "Time Start" column to datetime type
concatenated_df['Time Start'] = pd.to_datetime(concatenated_df['Time Start'])

# Add a new column "Time Stop" which is "Time Start" plus one hour
concatenated_df['Time Stop'] = concatenated_df['Time Start'] + pd.Timedelta(hours=1)

# Save the modified DataFrame to a new CSV file
concatenated_df.to_csv("sample_events_PIDandMRN(58_CDGR_39_1st).csv", index=False)

##### 1.2.1.2 "58" cohort, CDGR ventilator, 39 patients, 2nd hour sample_events csv file.

In [50]:
concatenated_df.head()

Unnamed: 0,Group,#,Patient ID,MRN,Time Start,Time Stop
0,1,3,2951,101336219,2023-10-04 17:31:21,2023-10-04 18:31:21
1,2,7,12800,101569175,2023-10-17 21:56:41,2023-10-17 22:56:41
2,3,11,12833,100765255,2023-10-30 15:54:21,2023-10-30 16:54:21
3,4,15,20146,101562844,2023-10-06 15:17:35,2023-10-06 16:17:35
4,9,36,65281,40667398,2023-11-07 00:00:02,2023-11-07 01:00:02


In [1]:
import pandas as pd

# Read the CSV file
df = pd.read_csv("sample_events_PIDandMRN(58_CDGR_39_1st).csv")

# Convert "Time Start" column to datetime type
df['Time Start'] = pd.to_datetime(df['Time Start'])

# Add 12 hours to the existing "Time Start" column to create the new "Time Start" column
df['New Time Start'] = df['Time Start'] + pd.Timedelta(hours=1)

# Add 1 hour to the new "Time Start" column to create the new "Time Stop" column
df['New Time Stop'] = df['New Time Start'] + pd.Timedelta(hours=1)

# Delete the old "Time Start" and "Time Stop" columns
df.drop(columns=['Time Start', 'Time Stop'], inplace=True)

# Rename the new columns as "Time Start" and "Time Stop"
df.rename(columns={'New Time Start': 'Time Start', 'New Time Stop': 'Time Stop'}, inplace=True)

# Save the modified DataFrame to a new CSV file
df.to_csv("sample_events_PIDandMRN(58_CDGR_39_2nd).csv", index=False)

In [2]:
df.head()

Unnamed: 0,Group,#,Patient ID,MRN,Time Start,Time Stop
0,1,3,2951,101336219,2023-10-04 18:31:21,2023-10-04 19:31:21
1,2,7,12800,101569175,2023-10-17 22:56:41,2023-10-17 23:56:41
2,3,11,12833,100765255,2023-10-30 16:54:21,2023-10-30 17:54:21
3,4,15,20146,101562844,2023-10-06 16:17:35,2023-10-06 17:17:35
4,9,36,65281,40667398,2023-11-07 01:00:02,2023-11-07 02:00:02


##### 1.2.1.3 "58" cohort, CDGR ventilator, 39 patients, 12th hour sample_events csv file.

In [51]:
import pandas as pd

# Read the CSV file
df = pd.read_csv("sample_events_PIDandMRN(58_CDGR_39_1st).csv")

# Convert "Time Start" column to datetime type
df['Time Start'] = pd.to_datetime(df['Time Start'])

# Add 12 hours to the existing "Time Start" column to create the new "Time Start" column
df['New Time Start'] = df['Time Start'] + pd.Timedelta(hours=12)

# Add 1 hour to the new "Time Start" column to create the new "Time Stop" column
df['New Time Stop'] = df['New Time Start'] + pd.Timedelta(hours=1)

# Delete the old "Time Start" and "Time Stop" columns
df.drop(columns=['Time Start', 'Time Stop'], inplace=True)

# Rename the new columns as "Time Start" and "Time Stop"
df.rename(columns={'New Time Start': 'Time Start', 'New Time Stop': 'Time Stop'}, inplace=True)

# Save the modified DataFrame to a new CSV file
df.to_csv("sample_events_PIDandMRN(58_CDGR_39_12th).csv", index=False)

In [52]:
df.head()

Unnamed: 0,Group,#,Patient ID,MRN,Time Start,Time Stop
0,1,3,2951,101336219,2023-10-05 05:31:21,2023-10-05 06:31:21
1,2,7,12800,101569175,2023-10-18 09:56:41,2023-10-18 10:56:41
2,3,11,12833,100765255,2023-10-31 03:54:21,2023-10-31 04:54:21
3,4,15,20146,101562844,2023-10-07 03:17:35,2023-10-07 04:17:35
4,9,36,65281,40667398,2023-11-07 12:00:02,2023-11-07 13:00:02


#### 1.2.2 "58" cohort, AVEAA ventilator, 4 patients.

##### 1.2.2.1 "58" cohort, AVEAA ventilator, 4 patients, 1st hour sample_events csv file.

In [1]:
import pandas as pd

# Read the CSV files
events_df = pd.read_csv("sample_events_PIDandMRN(58by4)_ext.csv")
start_times_df = pd.read_csv("start_times_58_AVEAA.csv")

# Concatenate along columns axis
concatenated_df = pd.concat([events_df, start_times_df], axis=1)

# Drop rows where "Start Time" is empty
concatenated_df = concatenated_df.dropna(subset=['Start Time'])

# Keep only the first row for each group
concatenated_df = concatenated_df.groupby('Group').first().reset_index()

# Drop the "Time Start" column
concatenated_df = concatenated_df.drop(columns=['Time Start', 'Time Stop', 'File Name'])

# Rename the "Start Time" column as "Time Start"
concatenated_df = concatenated_df.rename(columns={'Start Time': 'Time Start'})
# Convert "Time Start" column to datetime type
concatenated_df['Time Start'] = pd.to_datetime(concatenated_df['Time Start'])

# Add a new column "Time Stop" which is "Time Start" plus one hour
concatenated_df['Time Stop'] = concatenated_df['Time Start'] + pd.Timedelta(hours=1)

# Save the modified DataFrame to a new CSV file
concatenated_df.to_csv("sample_events_PIDandMRN(58_AVEAA_4_1st).csv", index=False)

In [2]:
concatenated_df

Unnamed: 0,Group,#,Patient ID,MRN,Time Start,Time Stop
0,17,67,132310,101675324,2023-11-01 13:10:30,2023-11-01 14:10:30
1,27,107,606288,101149608,2023-12-19 21:50:13,2023-12-19 22:50:13
2,58,231,1026178,101890492,2023-12-18 21:09:53,2023-12-18 22:09:53


##### 1.2.2.2 "58" cohort, AVEAA ventilator, 4 patients, 2nd hour sample_events csv file.

In [5]:
import pandas as pd

# Read the CSV file
df = pd.read_csv("sample_events_PIDandMRN(58_AVEAA_4_1st).csv")

# Convert "Time Start" column to datetime type
df['Time Start'] = pd.to_datetime(df['Time Start'])

# Add 12 hours to the existing "Time Start" column to create the new "Time Start" column
df['New Time Start'] = df['Time Start'] + pd.Timedelta(hours=1)

# Add 1 hour to the new "Time Start" column to create the new "Time Stop" column
df['New Time Stop'] = df['New Time Start'] + pd.Timedelta(hours=1)

# Delete the old "Time Start" and "Time Stop" columns
df.drop(columns=['Time Start', 'Time Stop'], inplace=True)

# Rename the new columns as "Time Start" and "Time Stop"
df.rename(columns={'New Time Start': 'Time Start', 'New Time Stop': 'Time Stop'}, inplace=True)

# Save the modified DataFrame to a new CSV file
df.to_csv("sample_events_PIDandMRN(58_AVEAA_4_2nd).csv", index=False)

In [6]:
df

Unnamed: 0,Group,#,Patient ID,MRN,Time Start,Time Stop
0,10,39,76598,101532084,2023-12-18 21:14:00,2023-12-18 22:14:00
1,17,67,132310,101675324,2023-11-01 14:10:00,2023-11-01 15:10:00
2,27,107,606288,101149608,2023-12-19 22:50:00,2023-12-19 23:50:00
3,58,231,1026178,101890492,2023-12-18 22:09:00,2023-12-18 23:09:00


##### 1.2.2.3 "58" cohort, AVEAA ventilator, 4 patients, 12th hour sample_events csv file.

In [7]:
import pandas as pd

# Read the CSV file
df = pd.read_csv("sample_events_PIDandMRN(58_AVEAA_4_1st).csv")

# Convert "Time Start" column to datetime type
df['Time Start'] = pd.to_datetime(df['Time Start'])

# Add 12 hours to the existing "Time Start" column to create the new "Time Start" column
df['New Time Start'] = df['Time Start'] + pd.Timedelta(hours=12)

# Add 1 hour to the new "Time Start" column to create the new "Time Stop" column
df['New Time Stop'] = df['New Time Start'] + pd.Timedelta(hours=1)

# Delete the old "Time Start" and "Time Stop" columns
df.drop(columns=['Time Start', 'Time Stop'], inplace=True)

# Rename the new columns as "Time Start" and "Time Stop"
df.rename(columns={'New Time Start': 'Time Start', 'New Time Stop': 'Time Stop'}, inplace=True)

# Save the modified DataFrame to a new CSV file
df.to_csv("sample_events_PIDandMRN(58_AVEAA_4_12th).csv", index=False)

In [8]:
df

Unnamed: 0,Group,#,Patient ID,MRN,Time Start,Time Stop
0,10,39,76598,101532084,2023-12-19 08:14:00,2023-12-19 09:14:00
1,17,67,132310,101675324,2023-11-02 01:10:00,2023-11-02 02:10:00
2,27,107,606288,101149608,2023-12-20 09:50:00,2023-12-20 10:50:00
3,58,231,1026178,101890492,2023-12-19 09:09:00,2023-12-19 10:09:00


## 2. The "151" cohort.

### 2.1 Determine the starting point for each patient from the *"151"* cohort.

#### 2.1.1 "151" cohort, CDGR ventilator, start times list.

In [4]:
import pandas as pd
import os

# Define the directory containing the CSV files
directory = '/nfs/turbo/med-kayvan-lab/Projects/PARDS/02-Data/02-EVENTS/b-CDGR+AVEAA+SVU(151)_20240205/b-151-Extension/'

# Initialize empty lists to store start times, file names, and first hour data
start_times = []

# Iterate over each CSV file in the directory
for i in range(1, 605):
    filename = f'b_151_ext_{i:03}.csv'
    file_path = os.path.join(directory, filename)
    
    # Read the CSV file into a DataFrame
    df = pd.read_csv(file_path)
    
    # Convert 'Time' column to datetime format, handle errors if any
    try:
        df['Time'] = pd.to_datetime(df['Time'], format='%Y-%m-%d %H:%M:%S.%f')
    except Exception as e:
        print(f"Error converting 'Time' column to datetime format in file: {filename}")
        print(e)
        start_times.append({'Start Time': None, 'File Name': filename})
        continue

    # Check if the DataFrame has non-null values for the required columns
    if df[['CDGR - FiO₂', 'CDGR - Paw', 'CDGR - eVT', 'CDGR - Flow', 'CDGR - iPress Wave']].notna().all(axis=1).any():
        # Find the index where the condition is met for the first time
        start_index = df[['CDGR - FiO₂', 'CDGR - Paw', 'CDGR - eVT', 'CDGR - Flow', 'CDGR - iPress Wave']].notna().all(axis=1).idxmax()
        # Find the starting time for the first hour
        start_time = df.loc[start_index, 'Time']
        # Append start time and file name to the list
        start_times.append({'Start Time': start_time, 'File Name': filename})

    else:
        start_times.append({'Start Time': None, 'File Name': filename})

# Convert the lists to DataFrames
start_times_df = pd.DataFrame(start_times)
# Add a new column "Group" with group numbers
group_size = 4
start_times_df['Group'] = (start_times_df.index // group_size) + 1

# Save the DataFrames to CSV files
start_times_df.to_csv('start_times_151_CDGR.csv', index=False)

Error converting 'Time' column to datetime format in file: b_151_ext_602.csv
time data 00:01.1 doesn't match format specified


#### 2.1.2 "151" cohort, AVEAA ventilator, start times list.

In [5]:
import pandas as pd
import os

# Define the directory containing the CSV files
directory = '/nfs/turbo/med-kayvan-lab/Projects/PARDS/02-Data/02-EVENTS/b-CDGR+AVEAA+SVU(151)_20240205/b-151-Extension/'

# Initialize empty lists to store start times, file names, and first hour data
start_times = []

# Iterate over each CSV file in the directory
for i in range(1, 605):
    filename = f'b_151_ext_{i:03}.csv'
    file_path = os.path.join(directory, filename)
    
    # Read the CSV file into a DataFrame
    df = pd.read_csv(file_path)
    
    # Convert 'Time' column to datetime format, handle errors if any
    try:
        df['Time'] = pd.to_datetime(df['Time'], format='%Y-%m-%d %H:%M:%S.%f')
    except Exception as e:
        print(f"Error converting 'Time' column to datetime format in file: {filename}")
        print(e)
        start_times.append({'Start Time': None, 'File Name': filename})
        continue

    # Check if the DataFrame has non-null values for the required columns
    if df[['AVEA - eVT', 'AVEA - FiO₂', 'AVEA - Paw', 'AVEA - Air Flow Wave']].notna().all(axis=1).any():
        # Find the index where the condition is met for the first time
        start_index = df[['AVEA - eVT', 'AVEA - FiO₂', 'AVEA - Paw', 'AVEA - Air Flow Wave']].notna().all(axis=1).idxmax()
        # Find the starting time for the first hour
        start_time = df.loc[start_index, 'Time']
        # Append start time and file name to the list
        start_times.append({'Start Time': start_time, 'File Name': filename})

    else:
        start_times.append({'Start Time': None, 'File Name': filename})

# Convert the lists to DataFrames
start_times_df = pd.DataFrame(start_times)
# Add a new column "Group" with group numbers
group_size = 4
start_times_df['Group'] = (start_times_df.index // group_size) + 1

# Save the DataFrames to CSV files
start_times_df.to_csv('start_times_151_AVEAA.csv', index=False)

Error converting 'Time' column to datetime format in file: b_151_ext_602.csv
time data 00:01.1 doesn't match format specified


### 2.2 Create the sample_events csv file depend on the start time for the *"151"* cohort.

#### 2.2.1 "151" cohort, CDGR ventilator, 11 patients.

##### 2.2.1.1 "151" cohort, CDGR ventilator, 11 patients, 1st hour sample_events csv file.

In [9]:
import pandas as pd

# Read the CSV files
events_df = pd.read_csv("sample_events_PIDandMRN(151by4)_ext.csv")
start_times_df = pd.read_csv("start_times_151_CDGR.csv")

# Concatenate along columns axis
concatenated_df = pd.concat([events_df, start_times_df], axis=1)

# Drop rows where "Start Time" is empty
concatenated_df = concatenated_df.dropna(subset=['Start Time'])

# Keep only the first row for each group
concatenated_df = concatenated_df.groupby('Group').first().reset_index()

# Drop the "Time Start" column
concatenated_df = concatenated_df.drop(columns=['Time Start', 'Time Stop', 'File Name'])

# Rename the "Start Time" column as "Time Start"
concatenated_df = concatenated_df.rename(columns={'Start Time': 'Time Start'})
# Convert "Time Start" column to datetime type
concatenated_df['Time Start'] = pd.to_datetime(concatenated_df['Time Start'])

# Add a new column "Time Stop" which is "Time Start" plus one hour
concatenated_df['Time Stop'] = concatenated_df['Time Start'] + pd.Timedelta(hours=1)

# Save the modified DataFrame to a new CSV file
concatenated_df.to_csv("sample_events_PIDandMRN(151_CDGR_11_1st).csv", index=False)

In [12]:
concatenated_df.head()

##### 2.2.1.2 "151" cohort, CDGR ventilator, 11 patients, 2nd hours ample_events csv file.

In [14]:
import pandas as pd

# Read the CSV file
df = pd.read_csv("sample_events_PIDandMRN(151_CDGR_11_1st).csv")

# Convert "Time Start" column to datetime type
df['Time Start'] = pd.to_datetime(df['Time Start'])

# Add 12 hours to the existing "Time Start" column to create the new "Time Start" column
df['New Time Start'] = df['Time Start'] + pd.Timedelta(hours=1)

# Add 1 hour to the new "Time Start" column to create the new "Time Stop" column
df['New Time Stop'] = df['New Time Start'] + pd.Timedelta(hours=1)

# Delete the old "Time Start" and "Time Stop" columns
df.drop(columns=['Time Start', 'Time Stop'], inplace=True)

# Rename the new columns as "Time Start" and "Time Stop"
df.rename(columns={'New Time Start': 'Time Start', 'New Time Stop': 'Time Stop'}, inplace=True)

# Save the modified DataFrame to a new CSV file
df.to_csv("sample_events_PIDandMRN(151_CDGR_11_2nd).csv", index=False)

In [15]:
df.head()

Unnamed: 0,Group,#,Patient ID,MRN,Time Start,Time Stop
0,47,186,68235,101659723,2023-09-22 07:39:08,2023-09-22 08:39:08
1,49,195,84948,41994058,2023-09-14 20:09:43,2023-09-14 21:09:43
2,60,239,124851,101381014,2023-09-18 14:53:46,2023-09-18 15:53:46
3,67,267,157876,101138190,2023-09-13 21:32:27,2023-09-13 22:32:27
4,68,271,161628,100808011,2023-09-16 19:27:24,2023-09-16 20:27:24


##### 2.2.1.3 "151" cohort, CDGR ventilator, 11 patients, 12th hours ample_events csv file.

In [16]:
import pandas as pd

# Read the CSV file
df = pd.read_csv("sample_events_PIDandMRN(151_CDGR_11_1st).csv")

# Convert "Time Start" column to datetime type
df['Time Start'] = pd.to_datetime(df['Time Start'])

# Add 12 hours to the existing "Time Start" column to create the new "Time Start" column
df['New Time Start'] = df['Time Start'] + pd.Timedelta(hours=12)

# Add 1 hour to the new "Time Start" column to create the new "Time Stop" column
df['New Time Stop'] = df['New Time Start'] + pd.Timedelta(hours=1)

# Delete the old "Time Start" and "Time Stop" columns
df.drop(columns=['Time Start', 'Time Stop'], inplace=True)

# Rename the new columns as "Time Start" and "Time Stop"
df.rename(columns={'New Time Start': 'Time Start', 'New Time Stop': 'Time Stop'}, inplace=True)

# Save the modified DataFrame to a new CSV file
df.to_csv("sample_events_PIDandMRN(151_CDGR_11_12th).csv", index=False)

In [17]:
df.head()

Unnamed: 0,Group,#,Patient ID,MRN,Time Start,Time Stop
0,47,186,68235,101659723,2023-09-22 18:39:08,2023-09-22 19:39:08
1,49,195,84948,41994058,2023-09-15 07:09:43,2023-09-15 08:09:43
2,60,239,124851,101381014,2023-09-19 01:53:46,2023-09-19 02:53:46
3,67,267,157876,101138190,2023-09-14 08:32:27,2023-09-14 09:32:27
4,68,271,161628,100808011,2023-09-17 06:27:24,2023-09-17 07:27:24


#### 2.2.2 "151" cohort, AVEAA ventilator, 4 patients.

##### 2.2.2.1 "151" cohort, AVEAA ventilator, 4 patients, 1st hour sample_events csv file.

In [5]:
import pandas as pd

# Read the CSV files
events_df = pd.read_csv("sample_events_PIDandMRN(151by4)_ext.csv")
start_times_df = pd.read_csv("start_times_151_AVEAA.csv")

# Concatenate along columns axis
concatenated_df = pd.concat([events_df, start_times_df], axis=1)

# Drop rows where "Start Time" is empty
concatenated_df = concatenated_df.dropna(subset=['Start Time'])

# Keep only the first row for each group
concatenated_df = concatenated_df.groupby('Group').first().reset_index()

# Drop the "Time Start" column
concatenated_df = concatenated_df.drop(columns=['Time Start', 'Time Stop', 'File Name'])

# Rename the "Start Time" column as "Time Start"
concatenated_df = concatenated_df.rename(columns={'Start Time': 'Time Start'})
# Convert "Time Start" column to datetime type
concatenated_df['Time Start'] = pd.to_datetime(concatenated_df['Time Start'])

# Add a new column "Time Stop" which is "Time Start" plus one hour
concatenated_df['Time Stop'] = concatenated_df['Time Start'] + pd.Timedelta(hours=1)

# Save the modified DataFrame to a new CSV file
concatenated_df.to_csv("sample_events_PIDandMRN(151_AVEAA_4_1st).csv", index=False)

In [6]:
concatenated_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   Group       4 non-null      int64         
 1   #           4 non-null      int64         
 2   Patient ID  4 non-null      int64         
 3   MRN         4 non-null      int64         
 4   Time Start  4 non-null      datetime64[ns]
 5   Time Stop   4 non-null      datetime64[ns]
dtypes: datetime64[ns](2), int64(4)
memory usage: 320.0 bytes


##### 2.2.2.2 "151" cohort, AVEAA ventilator, 4 patients, 2nd hour sample_events csv file.

In [18]:
import pandas as pd

# Read the CSV file
df = pd.read_csv("sample_events_PIDandMRN(151_AVEAA_4_1st).csv")

# Convert "Time Start" column to datetime type
df['Time Start'] = pd.to_datetime(df['Time Start'])

# Add 12 hours to the existing "Time Start" column to create the new "Time Start" column
df['New Time Start'] = df['Time Start'] + pd.Timedelta(hours=1)

# Add 1 hour to the new "Time Start" column to create the new "Time Stop" column
df['New Time Stop'] = df['New Time Start'] + pd.Timedelta(hours=1)

# Delete the old "Time Start" and "Time Stop" columns
df.drop(columns=['Time Start', 'Time Stop'], inplace=True)

# Rename the new columns as "Time Start" and "Time Stop"
df.rename(columns={'New Time Start': 'Time Start', 'New Time Stop': 'Time Stop'}, inplace=True)

# Save the modified DataFrame to a new CSV file
df.to_csv("sample_events_PIDandMRN(151_AVEAA_4_2nd).csv", index=False)

In [19]:
df.head()

Unnamed: 0,Group,#,Patient ID,MRN,Time Start,Time Stop
0,43,170,58546,101344103,2023-09-20 11:28:07.000,2023-09-20 12:28:07.000
1,83,331,327496,100229346,2023-09-19 00:29:49.000,2023-09-19 01:29:49.000
2,125,499,734887,101775254,2023-09-13 17:03:47.000,2023-09-13 18:03:47.000
3,148,591,890448,101840209,2023-09-13 15:54:56.128,2023-09-13 16:54:56.128


##### 2.2.2.3 "151" cohort, AVEAA ventilator, 4 patients, 1st hour sample_events csv file.

In [21]:
import pandas as pd

# Read the CSV file
df = pd.read_csv("sample_events_PIDandMRN(151_AVEAA_4_1st).csv")

# Convert "Time Start" column to datetime type
df['Time Start'] = pd.to_datetime(df['Time Start'])

# Add 12 hours to the existing "Time Start" column to create the new "Time Start" column
df['New Time Start'] = df['Time Start'] + pd.Timedelta(hours=12)

# Add 1 hour to the new "Time Start" column to create the new "Time Stop" column
df['New Time Stop'] = df['New Time Start'] + pd.Timedelta(hours=1)

# Delete the old "Time Start" and "Time Stop" columns
df.drop(columns=['Time Start', 'Time Stop'], inplace=True)

# Rename the new columns as "Time Start" and "Time Stop"
df.rename(columns={'New Time Start': 'Time Start', 'New Time Stop': 'Time Stop'}, inplace=True)

# Save the modified DataFrame to a new CSV file
df.to_csv("sample_events_PIDandMRN(151_AVEAA_4_12th).csv", index=False)

In [22]:
df.head()

Unnamed: 0,Group,#,Patient ID,MRN,Time Start,Time Stop
0,43,170,58546,101344103,2023-09-20 22:28:07.000,2023-09-20 23:28:07.000
1,83,331,327496,100229346,2023-09-19 11:29:49.000,2023-09-19 12:29:49.000
2,125,499,734887,101775254,2023-09-14 04:03:47.000,2023-09-14 05:03:47.000
3,148,591,890448,101840209,2023-09-14 02:54:56.128,2023-09-14 03:54:56.128


#### Debugging Demonstration