In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
from tqdm.auto import tqdm
from matplotlib.backends.backend_pdf import PdfPages

In [None]:
def plot_features_from_dataframe(df, subject_id, pdf_pages):
    features_to_plot = [
        'StoS', 'StoR', 'StoL', 'RtoS', 'RtoR', 'RtoL', 'LtoS', 'LtoR', 'LtoL', 
        'std', 'cov', 'range', 'rrInt_var', 'rmean_var', 'rmssd', 'mad', 'iqr', 
        'entropy', 'approx_entropy'
    ]
    
    rhythm_colors = {
        "N": "blue",
        "A": "red",
        "O": "green"
    }

    window_rhythms = df['rhythmLabel'].tolist()

    nrows = 4
    ncols = 5

    fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(20, 15))
    fig.suptitle(f'Subject: {subject_id}', fontsize=16)

    for idx, feature in enumerate(features_to_plot):
        ax = axes[idx // ncols, idx % ncols]
        for rhythm, color in rhythm_colors.items():
            indices = [i for i, r in enumerate(window_rhythms) if r == rhythm]
            ax.scatter(indices, df[feature].iloc[indices], color=color, label=f"{rhythm}", alpha=0.5)
        
        ax.set_xlabel('Stepping Window')
        ax.set_ylabel('Feature Value')
        ax.set_title(f'Feature: {feature}')
        ax.legend()
        ax.grid(True)

    for idx in range(len(features_to_plot), nrows * ncols):
        fig.delaxes(axes.flatten()[idx])

    plt.tight_layout()
    plt.subplots_adjust(top=0.95)
    plt.show()
    pdf_pages.savefig(fig) 
    plt.close(fig)

feature_stem = Path("../mit-bih-time-features-stepping/")
pdf_stem = Path("features_over_time/")
if not pdf_stem.exists():
    pdf_stem.mkdir()
subdirectories = ["100", "200", "400"]

for size in (bar := tqdm(subdirectories)):
    bar.set_description(f"Processing size {size}")
    feature_directory = feature_stem / size
    pdf_filename = pdf_stem / f"{size}.pdf"
    with PdfPages(pdf_filename) as pdf_pages:
        for filename in (pbar := tqdm(list(feature_directory.rglob("*.parquet")), leave=False)):
            subject_id = filename.stem
            pbar.set_description(f"Processing subject {subject_id}")
            try:
                df = pd.read_parquet(filename)
                plot_features_from_dataframe(df, subject_id, pdf_pages)
            except Exception as e:
                print(f"Error processing {subject_id}. Error: {e}")

    print(f"Plots for size {size} saved to {pdf_filename}")