In [29]:
import os
import pandas as pd
import random

# Path to your folder
folder_path = 'dataset/sepsis/'

# Get list of all files in the folder
all_files = os.listdir(folder_path)

# Filter the list to include only CSV files
csv_files = [f for f in all_files if f.endswith('.csv')]

# Use only 250 elements in csv_files with random data
fis_files = random.sample(csv_files, 250)
anfis_files = [file for file in csv_files if file not in fis_files]

# Initialize an empty list to store DataFrames
dataframes = []

# Loop through CSV files and read them into DataFrames
for file in fis_files:
    file_path = os.path.join(folder_path, file)  # Full path to the file
    patient_no = file[:-4]  # Extract patient number from the file name
    df = pd.read_csv(file_path)  # Read CSV into DataFrame
    df['patient_id'] = (int(patient_no) *10) + 1  # Add patient number as a new column
    df.sepsis_icd = df.sepsis_icd.fillna(1)  # Fill missing values in sepsis_icd column
    dataframes.append(df)  # Append the DataFrame to the list

# Optionally, you can print or inspect one of the DataFrames

# Concatenate all DataFrames into a single DataFrame
fis_sepsis_df = pd.concat(dataframes)

dataframes = []

for file in anfis_files:
    file_path = os.path.join(folder_path, file)
    patient_no = file[:-4]
    df = pd.read_csv(file_path)
    df['patient_id'] = (int(patient_no) *10) + 1 
    df.sepsis_icd = df.sepsis_icd.fillna(1)
    dataframes.append(df)

anfis_sepsis_df = pd.concat(dataframes)


In [30]:
folder_path = 'dataset/no_sepsis/'
all_files = os.listdir(folder_path)
csv_files = [f for f in all_files if f.endswith('.csv')]
fis_files = random.sample(csv_files, 250)
anfis_files = [file for file in csv_files if file not in fis_files]
dataframes = []
for file in fis_files:
    file_path = os.path.join(folder_path, file)
    patient_no = file[:-4]
    df = pd.read_csv(file_path)
    df['patient_id'] = (int(patient_no) *10)
    df.sepsis_icd = df.sepsis_icd.fillna(0)
    dataframes.append(df)
fis_no_sepsis_df = pd.concat(dataframes)
dataframes = []
for file in anfis_files:
    file_path = os.path.join(folder_path, file)
    patient_no = file[:-4]
    df = pd.read_csv(file_path)
    df['patient_id'] = (int(patient_no) *10)
    df.sepsis_icd = df.sepsis_icd.fillna(0)
    dataframes.append(df)
anfis_no_sepsis_df = pd.concat(dataframes)

In [31]:
fis_df = pd.concat([fis_sepsis_df, fis_no_sepsis_df])
anfis_df = pd.concat([anfis_sepsis_df, anfis_no_sepsis_df])

In [32]:
fis_df.nunique()

heart_rate      122
bp_systolic     163
bp_diastolic    115
map             271
resp             55
temp            292
spo2             39
fio2             19
wbc             278
bun             101
bilirubin        82
creatinine       82
lactate         120
platelets       369
ph               57
pco2             69
po2             334
bicarbonate      36
hemoglobin      107
hematocrit      252
potassium        50
chloride         46
gcs              13
age             455
sirs              2
qsofa             2
sepsis_icd        2
patient_id      500
dtype: int64

In [33]:
anfis_df.nunique()

heart_rate       150
bp_systolic      182
bp_diastolic     135
map              344
resp              64
temp             346
spo2              61
fio2              22
wbc              388
bun              144
bilirubin        138
creatinine       104
lactate          158
platelets        517
ph                77
pco2              94
po2              427
bicarbonate       40
hemoglobin       119
hematocrit       292
potassium         61
chloride          56
gcs               13
age             1253
sirs               2
qsofa              2
sepsis_icd         2
patient_id      1500
dtype: int64

In [34]:
fis_df.to_csv('fis.csv', index=False)
anfis_df.to_csv('anfis.csv', index=False)