<a href="https://colab.research.google.com/github/mythogenesys/Andrew-NG-DL-practises/blob/main/ECG_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ==============================================================================
#      REFACTORED: Full ECG Denoising Ablation & Validation Study
# ==============================================================================
# This notebook runs the complete ECG study using the new, refactored
# `run_training.py` and `run_validation.py` scripts.
# ==============================================================================
import os
from google.colab import drive

# ---
# [Step 1/4] SETUP: Mount Drive & Clone/Update Repository
# ---
print("--- [Step 1/4] Setting up the environment ---")
drive.mount('/content/drive')

GIT_REPO = "ecg-denoiser-hackathon"
GIT_PATH = "https://github.com/Mohan-CAS-and-hackathons/ecg-denoiser-hackathon.git"
PROJECT_DIR = f"/content/{GIT_REPO}"

if not os.path.exists(PROJECT_DIR):
    print(f"\nCloning repository...")
    !git clone -q {GIT_PATH} {PROJECT_DIR}
else:
    print(f"\nRepository exists. Pulling latest changes...")
    !cd {PROJECT_DIR} && git pull -q

os.chdir(PROJECT_DIR)
print(f"✅ Current directory: {os.getcwd()}")



--- [Step 1/4] Setting up the environment ---
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

Cloning repository...
✅ Current directory: /content/ecg-denoiser-hackathon


In [None]:
import os
os.chdir("/content")  # or "/content" if you want the Colab workspace
print(os.getcwd())


/content


In [None]:
!rm -rf ecg-denoiser-hackathon

In [None]:
!pwd
!ls

/content
drive  sample_data


In [None]:
# ---
# [Step 2/4] ENVIRONMENT: Install dependencies
# ---
print("\n--- [Step 2/4] Installing dependencies ---")
!pip install -q -r requirements.txt
# Ensure wfdb is up-to-date for PhysioNet downloads
# !pip install --upgrade -q wfdb




--- [Step 2/4] Installing dependencies ---


In [None]:
!pip install --upgrade -q wfdb


[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-colab 1.0.0 requires pandas==2.2.2, but you have pandas 2.3.2 which is incompatible.
cudf-cu12 25.6.0 requires pandas<2.2.4dev0,>=2.0, but you have pandas 2.3.2 which is incompatible.
dask-cudf-cu12 25.6.0 requires pandas<2.2.4dev0,>=2.0, but you have pandas 2.3.2 which is incompatible.[0m[31m
[0m

In [None]:
# ==============================================================================
#                      FINAL DIAGNOSTIC CELL
# ==============================================================================
import os
from google.colab import drive

# --- 1. SETUP: Mount Drive & Point to project ---
print("--- Setting up environment ---")
drive.mount('/content/drive')
PROJECT_DIR = "/content/ecg-denoiser-hackathon"
os.chdir(PROJECT_DIR)
print(f"Current directory: {os.getcwd()}")

# --- 2. DEFINE DATA PATH: This must be the correct path from your Drive ---
DATA_DIR_ECG = '/content/drive/MyDrive/ecg_denoiser_hackathon/data/mit-bih-arrhythmia-database-1.0.0'

# --- 3. RUN DIAGNOSTIC: This will test wfdb access directly ---
print("\n--- Running WFDB access check ---")
!python3 src/experiments/check_wfdb_access.py --data_dir "{DATA_DIR_ECG}"

--- Setting up environment ---
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Current directory: /content/ecg-denoiser-hackathon

--- Running WFDB access check ---
--- WFDB Access Diagnostic Tool ---
Testing with data directory: '/content/drive/MyDrive/ecg_denoiser_hackathon/data/mit-bih-arrhythmia-database-1.0.0'
Constructed path prefix for testing: '/content/drive/MyDrive/ecg_denoiser_hackathon/data/mit-bih-arrhythmia-database-1.0.0/100'

[Test 1/2]: Attempting to read signal record with wfdb.rdrecord...

[SUCCESS]: Successfully read the record!
  - Signal shape: (650000, 2)
  - Sampling Freq: 360 Hz

[Test 2/2]: Attempting to read annotations with wfdb.rdann...

[SUCCESS]: Successfully read the annotation file!
  - Found 2274 annotations.
  - First few symbols: ['+', 'N', 'N', 'N', 'N']

Diagnostic complete.


In [None]:
print("\n--- [Step 3/4] Starting Ablation Study Training ---")

DRIVE_BASE_PATH = '/content/drive/MyDrive/ecg_denoiser_hackathon/'
DATA_DIR_ECG = os.path.join(DRIVE_BASE_PATH, 'data/mit-bih-arrhythmia-database-1.0.0')
DATA_DIR_NOISE = os.path.join(DRIVE_BASE_PATH, 'data/mit-bih-noise-stress-test-database-1.0.0')
MODEL_DIR = os.path.join(DRIVE_BASE_PATH, 'models2')
os.makedirs(MODEL_DIR, exist_ok=True)

MODEL_PATH_L1_ONLY = os.path.join(MODEL_DIR, 'denoiser_l1_only.pth')
MODEL_PATH_L1_GRAD = os.path.join(MODEL_DIR, 'denoiser_l1_grad.pth')
MODEL_PATH_STPC_FULL = os.path.join(MODEL_DIR, 'denoiser_stpc_full.pth')
MODEL_PATH_CLASSIFIER = os.path.join(MODEL_DIR, 'ecg_classifier_model.pth')

print("\n--- Training the ECG Classifier ---")
!python3 src/experiments/run_training.py ecg_classifier \
    --data_dir "/content/drive/MyDrive/ecg_denoiser_hackathon/data/mit-bih-arrhythmia-database-1.0.0" \
    --save_path "{MODEL_PATH_CLASSIFIER}" \
    --epochs 5

print("\n--- [Run 1/3] Training Denoiser with L1 Loss Only ---")
!python3 src/experiments/run_training.py ecg_denoiser \
    --data_dir "{DATA_DIR_ECG}" \
    --noise_dir "{DATA_DIR_NOISE}" \
    --save_path "{MODEL_PATH_L1_ONLY}" \
    --epochs 5 --no-gradient-loss --no-fft-loss

print("\n--- [Run 2/3] Training Denoiser with L1 + Gradient Loss ---")
!python3 src/experiments/run_training.py ecg_denoiser \
    --data_dir "{DATA_DIR_ECG}" \
    --noise_dir "{DATA_DIR_NOISE}" \
    --save_path "{MODEL_PATH_L1_GRAD}" \
    --epochs 5 --no-fft-loss

print("\n--- [Run 3/3] Training Denoiser with Full STPC Loss ---")
!python3 src/experiments/run_training.py ecg_denoiser \
    --data_dir "{DATA_DIR_ECG}" \
    --noise_dir "{DATA_DIR_NOISE}" \
    --save_path "{MODEL_PATH_STPC_FULL}" \
    --epochs 5

print("\n✅ Ablation study training complete!")


--- [Step 3/4] Starting Ablation Study Training ---

--- Training the ECG Classifier ---
--- Training ECG Classifier ---
Extracting all annotated heartbeats from the dataset...
100% 48/48 [00:04<00:00, 11.36it/s]
Extracted a total of 108618 beats.
Label distribution: Counter({0: 90608, 2: 7235, 4: 7192, 1: 2781, 3: 802})
Epoch 1/5: 100% 679/679 [00:04<00:00, 156.91it/s, loss=0.16]
Validation Accuracy: 96.14%
Epoch 2/5: 100% 679/679 [00:03<00:00, 212.63it/s, loss=0.194]
Validation Accuracy: 96.99%
Epoch 3/5: 100% 679/679 [00:03<00:00, 212.59it/s, loss=0.157]
Validation Accuracy: 97.71%
Epoch 4/5: 100% 679/679 [00:05<00:00, 128.14it/s, loss=0.069]
Validation Accuracy: 97.85%
Epoch 5/5: 100% 679/679 [00:03<00:00, 208.21it/s, loss=0.0769]
Validation Accuracy: 97.91%
Classifier model saved to /content/drive/MyDrive/ecg_denoiser_hackathon/models2/ecg_classifier_model.pth

--- [Run 1/3] Training Denoiser with L1 Loss Only ---
--- Training ECG Denoiser ---
Config: L1 Loss Enabled, Gradient Lo

In [None]:
print("\n--- [Step 4/4] Starting End-to-End Validation for All Models ---")

RESULTS_DIR = os.path.join(DRIVE_BASE_PATH, 'results2')
os.makedirs(RESULTS_DIR, exist_ok=True)

OUTPUT_PREFIX_L1_ONLY = os.path.join(RESULTS_DIR, 'l1_only_final')
OUTPUT_PREFIX_L1_GRAD = os.path.join(RESULTS_DIR, 'l1_grad_final')
OUTPUT_PREFIX_STPC_FULL = os.path.join(RESULTS_DIR, 'stpc_full_final')

print("\n--- [Run 1/3] Validating L1 Only Model ---")
!python3 src/experiments/run_validation.py ecg_downstream \
    --denoiser_path "{MODEL_PATH_L1_ONLY}" \
    --classifier_path "{MODEL_PATH_CLASSIFIER}" \
    --data_dir "{DATA_DIR_ECG}" \
    --noise_dir "{DATA_DIR_NOISE}" \
    --output_prefix "{OUTPUT_PREFIX_L1_ONLY}"

print("\n--- [Run 2/3] Validating L1 + Gradient Model ---")
!python3 src/experiments/run_validation.py ecg_downstream \
    --denoiser_path "{MODEL_PATH_L1_GRAD}" \
    --classifier_path "{MODEL_PATH_CLASSIFIER}" \
    --data_dir "{DATA_DIR_ECG}" \
    --noise_dir "{DATA_DIR_NOISE}" \
    --output_prefix "{OUTPUT_PREFIX_L1_GRAD}"

print("\n--- [Run 3/3] Validating Full STPC Model ---")
!python3 src/experiments/run_validation.py ecg_downstream \
    --denoiser_path "{MODEL_PATH_STPC_FULL}" \
    --classifier_path "{MODEL_PATH_CLASSIFIER}" \
    --data_dir "{DATA_DIR_ECG}" \
    --noise_dir "{DATA_DIR_NOISE}" \
    --output_prefix "{OUTPUT_PREFIX_STPC_FULL}"

print("\n\n✅✅✅ Full ECG Study Complete! Check your Google Drive for results. ✅✅✅")


--- [Step 4/4] Starting End-to-End Validation for All Models ---

--- [Run 1/3] Validating L1 Only Model ---
--- Running End-to-End ECG Downstream Validation ---
Loading models...
Loading record: 201 from /content/drive/MyDrive/ecg_denoiser_hackathon/data/mit-bih-arrhythmia-database-1.0.0
Denoising: 100% 221/221 [00:01<00:00, 210.74it/s]

--- PERFORMANCE ON NOISY SIGNAL ---
              precision    recall  f1-score   support

           N       0.92      0.98      0.95      1635
           S       0.50      0.04      0.07       128
           V       0.98      0.72      0.83       198
           F       0.00      0.00      0.00         2
           Q       0.00      0.00      0.00         0

    accuracy                           0.89      1963
   macro avg       0.48      0.35      0.37      1963
weighted avg       0.89      0.89      0.88      1963


--- PERFORMANCE ON DENOISED SIGNAL ---
              precision    recall  f1-score   support

           N       0.95      1.00     