In [None]:
import numpy as np
import pandas as pd

# pd.set_option('display.height', 1000)
pd.set_option("display.max_rows", 500)
pd.set_option("display.max_columns", 500)
pd.set_option("display.width", 1000)
pd.set_option("max_colwidth", 200)
from IPython.core.display import HTML, display

display(HTML("<style>.container { width:100% !important; }</style>"))

def df_stats(df):
    from tabulate import tabulate

    print("\n***** Shape: ", df.shape, " *****\n")

    columns_list = df.columns.values.tolist()
    isnull_list = df.isnull().sum().values.tolist()
    isunique_list = df.nunique().values.tolist()
    dtypes_list = df.dtypes.tolist()

    list_stat_val = list(zip(columns_list, isnull_list, isunique_list, dtypes_list))
    df_stat_val = pd.DataFrame(
        list_stat_val, columns=["Name", "Null", "Unique", "Dtypes"]
    )
    print(tabulate(df_stat_val, headers="keys", tablefmt="psql"))
    return df.head()

In [None]:
import glob
import os
import pandas as pd
from tqdm import tqdm

path = "/media/data1/muse_ge/deepecg_phase2"
all_files = glob.glob(os.path.join(path, "*.xml"))

# Convert all_files to a DataFrame with a progress bar
df = pd.DataFrame(tqdm(all_files, desc='Creating DataFrame'), columns=["path"])
display(df)

In [None]:
import pandas as pd

import matplotlib.pyplot as plt

# FILEPATH: /volume/DeepECG/DeepECG_Prospective_Get_Files.ipynb
def plot_entries_per_year_month(df):
    
    # Convert the 'AcquisitionDate' column to datetime
    df['RestingECG_TestDemographics_AcquisitionDate'] = pd.to_datetime(df['RestingECG_TestDemographics_AcquisitionDate'])

    # Extract the year and month from the 'AcquisitionDate' column
    df['Year'] = df['RestingECG_TestDemographics_AcquisitionDate'].dt.year
    df['Month'] = df['RestingECG_TestDemographics_AcquisitionDate'].dt.month

    # Group by year and month and count the number of entries
    entries_per_year_month = df.groupby(['Year', 'Month']).size()

    # Plot the number of entries per year and per month
    entries_per_year_month.plot(kind='bar', figsize=(12, 6))

    plt.xlabel('Year-Month')
    plt.ylabel('Number of Entries')
    plt.title('Number of Entries per Year-Month')
    plt.show()


In [None]:
import CLI_xml2df as xml2df

df_output = xml2df.tinyxml2df(
    df, out_path="data/ekg_waveforms_output_phase_2/"
).read2flatten()



In [None]:
import pandas as pd

updated_df_old = pd.read_parquet('/media/data1/muse_ge/ECG_ad202207_1453937_cat_labels_v1.1.parquet')
# Convert the 'AcquisitionDate' column to datetime
updated_df_old['RestingECG_TestDemographics_AcquisitionDate'] = pd.to_datetime(updated_df_old['RestingECG_TestDemographics_AcquisitionDate'])

# Extract the year and month from the 'AcquisitionDate' column
updated_df_old['Year'] = updated_df_old['RestingECG_TestDemographics_AcquisitionDate'].dt.year
updated_df_old['Month'] = updated_df_old['RestingECG_TestDemographics_AcquisitionDate'].dt.month

# Group by year and month and count the number of entries
entries_per_year_month = updated_df_old.groupby(['Year', 'Month']).size()
december_entries = entries_per_year_month[entries_per_year_month.index.get_level_values('Month') == 12]
print(december_entries)

# Plot the number of entries per year and per month
december_entries.plot(kind='bar', figsize=(12, 6))

plt.xlabel('Year-Month')
plt.ylabel('Number of Entries')
plt.title('Number of Entries per Year-Month')
plt.show()

### Find the TROU ECGs

In [None]:
path = "/media/data1/muse_ge/ecg_retrospective_trou/2022"
print(f"Files at path {path}:")
for file in os.listdir(path):
    print(file)


In [None]:
import glob
import os
import pandas as pd
from tqdm import tqdm

path = "/media/data1/muse_ge/ecg_retrospective_trou"
all_files = []

for subdir in os.listdir(path):
    subdir_path = os.path.join(path, subdir)
    if os.path.isdir(subdir_path):
        for month_dir in os.listdir(subdir_path):
            month_dir_path = os.path.join(subdir_path, month_dir)
            if os.path.isdir(month_dir_path):
                print(month_dir_path)
                month_dir_files = glob.glob(os.path.join(month_dir_path, "*.xml"))
                all_files.extend(month_dir_files)

# Convert all_files to a DataFrame 
df = pd.DataFrame(all_files, columns=["path"])
print(f"Found {len(df)} files")


In [None]:
import CLI_xml2df as xml2df

df_output = xml2df.tinyxml2df(
    df, out_path="data/ekg_waveforms_output_trou/"
).read2flatten()

In [None]:
plot_entries_per_year_month(df_output)