In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os

In [None]:
train = pd.read_csv('/kaggle/input/physionet-ecg-image-digitization/train.csv')
test = pd.read_csv('/kaggle/input/physionet-ecg-image-digitization/test.csv')
submission = pd.read_parquet("/kaggle/input/physionet-ecg-image-digitization/sample_submission.parquet")

In [None]:
print('Train shape', train.shape)
train.head()

In [None]:
print('Test shape', test.shape)
test.head()

In [None]:
print('Sample shape', submission.shape)
submission.head()

In [None]:
import warnings
warnings.filterwarnings('ignore')
idx = 0
print(train.id[idx])

TRAIN_DIR = '/kaggle/input/physionet-ecg-image-digitization/train/'
name = str(train.id[idx])
df_with_id0 = TRAIN_DIR + name + '/' + name + '.csv'

df = pd.read_csv(df_with_id0)
df.head()

In [None]:
import os
from scipy.signal import butter, filtfilt

train = pd.read_csv('/kaggle/input/physionet-ecg-image-digitization/train.csv')
test = pd.read_csv('/kaggle/input/physionet-ecg-image-digitization/test.csv')
TRAIN_DIR = '/kaggle/input/physionet-ecg-image-digitization/train/'

def create_submission(predictions, name):
    submission_data = []
    for _, test_row in test.iterrows():
        base_id = test_row['id']
        lead = test_row['lead']
        n_rows = test_row['number_of_rows']
        
        signal = predictions[(base_id, lead)]
        
        for row_id, value in enumerate(signal):
            signal_id = f"{base_id}_{row_id}_{lead}"
            submission_data.append({
                'id': signal_id,
                'value': value
            })
    
    submission_df = pd.DataFrame(submission_data)
    
    
    filename = f'submission_{name}.csv' if name else 'submission.csv'
    submission_df.to_csv(filename, index=False)
    return submission_df

print("Sine Wave ECG-like Signal")

predictions_sine = {}
ecg_params = {
    'I': {'amplitude': 0.5, 'offset': 0.1}, 'II': {'amplitude': 0.8, 'offset': 0.2},
    'III': {'amplitude': 0.4, 'offset': 0.1}, 'aVR': {'amplitude': -0.3, 'offset': -0.1},
    'aVL': {'amplitude': 0.2, 'offset': 0.05}, 'aVF': {'amplitude': 0.3, 'offset': 0.1},
    'V1': {'amplitude': 0.3, 'offset': 0.0}, 'V2': {'amplitude': 0.4, 'offset': 0.05},
    'V3': {'amplitude': 0.5, 'offset': 0.1}, 'V4': {'amplitude': 0.6, 'offset': 0.15},
    'V5': {'amplitude': 0.5, 'offset': 0.1}, 'V6': {'amplitude': 0.4, 'offset': 0.05}
}

for _, test_row in test.iterrows():
    base_id = test_row['id']
    fs = test_row['fs']
    n_rows = test_row['number_of_rows']
    lead = test_row['lead']
    
    duration = 10.0 if lead == 'II' else 2.5
    t = np.linspace(0, duration, n_rows)
    
    params = ecg_params.get(lead, {'amplitude': 0.3, 'offset': 0.1})
    
    
    heart_rate = 1.0  
    main_rhythm = params['amplitude'] * np.sin(2 * np.pi * heart_rate * t)
    p_wave = 0.1 * params['amplitude'] * np.sin(2 * np.pi * 5 * t + 0.5)
    qrs_complex = 0.3 * params['amplitude'] * np.sin(2 * np.pi * 15 * (t % (1/heart_rate)))
    
    ecg_signal = params['offset'] + main_rhythm + p_wave + qrs_complex
    noise = np.random.normal(0, 0.02, n_rows)
    
    predictions_sine[(base_id, lead)] = ecg_signal + noise

print("Statistical Model")

all_ecg_stats = {}
stats_available = False

for _, row in train.iterrows():
    ecg_path = f"{TRAIN_DIR}{row['id']}/{row['id']}.csv"
    if os.path.exists(ecg_path):
        try:
            ecg_data = pd.read_csv(ecg_path)
            for lead in ecg_data.columns:
                if lead not in all_ecg_stats:
                    all_ecg_stats[lead] = []
                values = ecg_data[lead].dropna().values
                if len(values) > 0:
                    all_ecg_stats[lead].extend(values)
                    stats_available = True
        except Exception as e:
            continue

global_stats = {}
if stats_available:
    for lead, values in all_ecg_stats.items():
        if len(values) > 0:
            values = np.array(values)
            global_stats[lead] = {
                'mean': np.mean(values),
                'std': np.std(values) if len(values) > 1 else 0.1,
                'median': np.median(values),
                'min': np.min(values),
                'max': np.max(values)
            }

predictions_stats = {}
for _, test_row in test.iterrows():
    base_id = test_row['id']
    fs = test_row['fs']
    n_rows = test_row['number_of_rows']
    lead = test_row['lead']
    
    duration = 10.0 if lead == 'II' else 2.5
    
    if lead in global_stats:
        base_value = global_stats[lead]['median']
        amplitude = global_stats[lead]['std'] * 0.5
    else:
        base_value = 0
        amplitude = 0.1
    
    
    t = np.linspace(0, duration, n_rows)
    signal = base_value + np.random.normal(0, amplitude, n_rows)
    
    predictions_stats[(base_id, lead)] = signal


print("Piecewise Approximation Model")

predictions_piecewise = {}
for _, test_row in test.iterrows():
    base_id = test_row['id']
    fs = test_row['fs']
    n_rows = test_row['number_of_rows']
    lead = test_row['lead']
    
    duration = 10.0 if lead == 'II' else 2.5
    t = np.linspace(0, duration, n_rows)
    
    params = ecg_params.get(lead, {'amplitude': 0.3, 'offset': 0.1})
    stats = global_stats.get(lead, {'median': 0, 'std': 0.1})
    
    
    signal = np.zeros(n_rows)
    
    
    heart_period = 0.8  
    for i in range(int(duration / heart_period) + 1):
        start_idx = int(i * heart_period * fs)
        if start_idx >= n_rows:
            break
            
        
        p_start = start_idx
        p_duration = int(0.1 * fs)  # 100ms
        if p_start + p_duration < n_rows:
            signal[p_start:p_start+p_duration] += 0.1 * params['amplitude'] * np.sin(np.linspace(0, np.pi, p_duration))
        
        
        qrs_start = start_idx + int(0.2 * fs)
        qrs_duration = int(0.08 * fs)  # 80ms
        if qrs_start + qrs_duration < n_rows:
            signal[qrs_start:qrs_start+qrs_duration] += params['amplitude'] * np.sin(np.linspace(0, 2*np.pi, qrs_duration))
        
        
        t_start = start_idx + int(0.4 * fs)
        t_duration = int(0.2 * fs)  # 200ms
        if t_start + t_duration < n_rows:
            signal[t_start:t_start+t_duration] += 0.3 * params['amplitude'] * np.sin(np.linspace(0, np.pi, t_duration))
    
    signal = stats['median'] + signal + np.random.normal(0, stats['std'] * 0.05, n_rows)
    predictions_piecewise[(base_id, lead)] = signal

In [None]:
leads = ['I', 'II', 'III', 'aVR', 'aVL', 'aVF', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6']
template_len = 500
lead_templates = {}

for lead in leads:
    signals = []
    for _, row in train.iterrows():
        csv_path = os.path.join(TRAIN_DIR, str(row['id']), f"{row['id']}.csv")
        
        if not os.path.exists(csv_path):
            continue
        
        try:
            df = pd.read_csv(csv_path)
            if lead not in df.columns:
                continue
            
            s = df[lead].dropna().values.astype(np.float32)
            if len(s) < 50:
                continue
            
            s_norm = (s - s.mean()) / (s.std() + 1e-8)
            s_resamp = np.interp(
                np.linspace(0, 1, template_len),
                np.linspace(0, 1, len(s_norm)),
                s_norm
            )
            signals.append(s_resamp)
        except:
            continue
    
    if signals:
        lead_templates[lead] = np.mean(signals, axis=0)
    else:
        t = np.linspace(0, 1, template_len)
        lead_templates[lead] = np.sin(2 * np.pi * t)

In [None]:
predictions = {}
min_val, max_val = 0.0, 0.07

for _, row in test.iterrows():
    base_id = row['id']
    lead = row['lead']
    n_rows = row['number_of_rows']
    fs = row.get('fs', 500)
    
    template = lead_templates.get(lead, lead_templates['II']).copy()
    
    if len(template) != n_rows:
        signal = np.interp(
            np.linspace(0, 1, n_rows),
            np.linspace(0, 1, len(template)),
            template
        )
    else:
        signal = template
    
    if len(signal) > 10:
        nyq = 0.5 * fs
        normal_cutoff = min(15.0 / nyq, 0.99)
        b, a = butter(2, normal_cutoff, btype='low')
        signal = filtfilt(b, a, signal)
    
    s_min, s_max = signal.min(), signal.max()
    
    if s_max - s_min < 1e-8:
        signal = np.full(n_rows, (min_val + max_val) / 2)
    else:
        signal = (signal - s_min) / (s_max - s_min)
        signal = min_val + signal * (max_val - min_val)
    
    predictions[(base_id, lead)] = signal.astype(np.float32)

In [None]:
submission_data = []
for _, row in test.iterrows():
    base_id = row['id']
    lead = row['lead']
    n_rows = row['number_of_rows']
    signal = predictions[(base_id, lead)]
    
    for i in range(n_rows):
        submission_data.append({
            'id': f"{base_id}_{i}_{lead}",
            'value': float(signal[i])
        })

submission = pd.DataFrame(submission_data)
submission.to_csv('submission.csv', index=False)
submission.head(5)