# 5. Feature Extraction Module Tutorial

This notebook demonstrates how to use the `feature_extraction.py` module to compute a comprehensive set of metrics from processed sarcomere data. It loads the sarcomere data, the smoothed sarcomere length signal, the cell state analysis results, and the bounding box data, then calls the `extract_metrics` function to calculate various metrics.

In [1]:
import numpy as np
import pandas as pd
import json

from pathlib import Path

# Import the core function from your new feature_extraction module
from src.feature_extraction import extract_metrics

# --- Configuration ---
INPUT_DIR = Path("data")
OUTPUT_DIR = Path("results")

ND2_FILENAME = "Quartz.2862.D1.Myk.baseline.GFP.m002.nd2" # <<< IMPORTANT: CHANGE THIS TO YOUR ND2 FILE NAME
OUTPUT_FILENAME = ND2_FILENAME[:-4]

SARCOMERE_CSV_FILENAME = OUTPUT_DIR / f"{OUTPUT_FILENAME}_detection_results_sarcomeres.csv"
SMOOTHED_DATA_FILENAME = OUTPUT_DIR / f"{OUTPUT_FILENAME}_denoised_sarcomere_length.csv"
ANALYSIS_RESULTS_FILENAME = OUTPUT_DIR / f"{OUTPUT_FILENAME}_cell_state.json"
BOUNDING_BOX_FILENAME = OUTPUT_DIR / f"{OUTPUT_FILENAME}_segmentation_results.npz"

# Define conversion factors
PIXEL_TO_MICRON = 0.1625 # Example value, adjust if your actual pixel size is different
FRAME_RATE = 100.0       # Example: 100 frames per second. Adjust to your video's frame rate.

In [2]:
# Load raw sarcomere data
sarcomere_data_df = pd.read_csv(SARCOMERE_CSV_FILENAME)

# Load smoothed signal data
smoothed_df = pd.read_csv(SMOOTHED_DATA_FILENAME)
smoothed_signal_array = smoothed_df['length_smoothed'].values

# Load analysis results (cell states)
with open(ANALYSIS_RESULTS_FILENAME, 'r') as f:
    analysis_results_dict = json.load(f)

# Load bounding box data
bounding_box_data = np.load(BOUNDING_BOX_FILENAME, allow_pickle=True)
bounding_boxes_px = bounding_box_data['boxes']
convex_hulls_px = bounding_box_data['hull_points']

In [3]:
# --- Extract Metrics ---
extracted_metrics = extract_metrics(
    sarcomere_data_df=sarcomere_data_df,
    smoothed_signal_array=smoothed_signal_array,
    analysis_results_dict=analysis_results_dict,
    bounding_boxes_px=bounding_boxes_px,
    convex_hulls_px=convex_hulls_px,
    PIXEL_TO_MICRON=PIXEL_TO_MICRON,
    FRAME_RATE=FRAME_RATE,
    FRAME_AVG_RANGE=5 # Keep this consistent with analyze.py if it's a shared parameter
)
print("Feature extraction complete.")


Feature extraction complete.


In [6]:
# --- Display Extracted Metrics ---
metrics_series = pd.Series(extracted_metrics)
print(metrics_series.to_string()) # Use to_string() to ensure all rows are printed


--- Extracted Metrics ---
num_contractions                                 5.000000
contraction_period                               2.558000
contraction_frequency                            0.396281
relaxed_sarcomere_length_mean                    1.839623
relaxed_sarcomere_length_median                  1.856818
relaxed_sarcomere_length_q25                     1.724866
relaxed_sarcomere_length_q75                     1.973438
relaxed_sarcomere_length_std                     0.188806
peak_sarcomere_length_mean                       1.699196
peak_sarcomere_length_median                     1.683541
peak_sarcomere_length_q25                        1.579173
peak_sarcomere_length_q75                        1.804470
peak_sarcomere_length_std                        0.178319
shortening_amplitude                             0.141854
peak_shortening_velocity                        -0.492219
peak_lengthening_velocity                        0.260208
contraction_onset_to_relaxation_end_time     