## Groupby 2nd Phase
Export the scrolling observation window (Mean, Median, Min, Max) sorted to unique separate files.

In [1]:
import pandas as pd
import os
import gc

## Collecting mean data to one file

In [2]:
# Directory where the CSV files are located
csv_directory = 'CSV/Exports/groupby/mean/'

# Initialize an empty list to collect DataFrames
dfs = []

# Iterate through all CSV files in the directory
for filename in os.listdir(csv_directory):
    if filename.endswith('.csv'):
        file_path = os.path.join(csv_directory, filename)
        # Read the CSV file
        df = pd.read_csv(file_path)
        # Filter rows with 'Count' equal to 1 and append to the list
        filtered_rows = df[df['Count'] == 1]
        dfs.append(filtered_rows)

# Concatenate all DataFrames in the list into a single DataFrame
combined_data = pd.concat(dfs, ignore_index=True)

# Sort
combined_data.sort_values(by=['patientunitstayid', 'uniquepid','Time_Zone'], inplace=True)

# Remove rows with NaN values in the 'Mean_Chart' column
combined_data = combined_data.dropna(subset=['Mean_Chart'])

# Save to a CSV file
combined_data.to_csv('CSV\Exports\groupby\o01_eicu_grouped_mean.csv', index=False)

## Collecting median data to one file

In [3]:
# Directory where the CSV files are located
csv_directory = 'CSV/Exports/groupby/median/'

# Initialize an empty list to collect DataFrames
dfs = []

# Iterate through all CSV files in the directory
for filename in os.listdir(csv_directory):
    if filename.endswith('.csv'):
        file_path = os.path.join(csv_directory, filename)
        # Read the CSV file
        df = pd.read_csv(file_path)
        # Filter rows with 'Count' equal to 1 and append to the list
        filtered_rows = df[df['Count'] == 1]
        dfs.append(filtered_rows)

# Concatenate all DataFrames in the list into a single DataFrame
combined_data = pd.concat(dfs, ignore_index=True)

# Sort
combined_data.sort_values(by=['patientunitstayid', 'uniquepid','Time_Zone'], inplace=True)

# Remove rows with NaN values in the 'Mean_Chart' column
combined_data = combined_data.dropna(subset=['Median_Chart'])

# Save to a CSV file
combined_data.to_csv('CSV\Exports\groupby\o02_eicu_grouped_median.csv', index=False)

## Collecting min data to one file

In [4]:
# Directory where the CSV files are located
csv_directory = 'CSV/Exports/groupby/min/'

# Initialize an empty list to collect DataFrames
dfs = []

# Iterate through all CSV files in the directory
for filename in os.listdir(csv_directory):
    if filename.endswith('.csv'):
        file_path = os.path.join(csv_directory, filename)
        # Read the CSV file
        df = pd.read_csv(file_path)
        # Filter rows with 'Count' equal to 1 and append to the list
        filtered_rows = df[df['Count'] == 1]
        dfs.append(filtered_rows)

# Concatenate all DataFrames in the list into a single DataFrame
combined_data = pd.concat(dfs, ignore_index=True)

# Sort
combined_data.sort_values(by=['patientunitstayid', 'uniquepid','Time_Zone'], inplace=True)

# Remove rows with NaN values in the 'Mean_Chart' column
combined_data = combined_data.dropna(subset=['Min_Chart'])

# Save to a CSV file
combined_data.to_csv('CSV\Exports\groupby\o03_eicu_grouped_min.csv', index=False)

# Collecting max data to one file

In [5]:
# Directory where the CSV files are located
csv_directory = 'CSV/Exports/groupby/max/'

# Initialize an empty list to collect DataFrames
dfs = []

# Iterate through all CSV files in the directory
for filename in os.listdir(csv_directory):
    if filename.endswith('.csv'):
        file_path = os.path.join(csv_directory, filename)
        # Read the CSV file
        df = pd.read_csv(file_path)
        # Filter rows with 'Count' equal to 1 and append to the list
        filtered_rows = df[df['Count'] == 1]
        dfs.append(filtered_rows)

# Concatenate all DataFrames in the list into a single DataFrame
combined_data = pd.concat(dfs, ignore_index=True)

# Sort
combined_data.sort_values(by=['patientunitstayid', 'uniquepid','Time_Zone'], inplace=True)

# Remove rows with NaN values in the 'Mean_Chart' column
combined_data = combined_data.dropna(subset=['Max_Chart'])

# Save to a CSV file
combined_data.to_csv('CSV\Exports\groupby\o04_eicu_grouped_max.csv', index=False)