In [None]:
import os
import glob
import numpy as np
import pandas as pd
import cv2
import tensorflow as tf
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.optimizers import Adam

TRAIN_DIR = '/kaggle/input/physionet-ecg-image-digitization/train'
TEST_DIR = '/kaggle/input/physionet-ecg-image-digitization/test'
TRAIN_CSV = '/kaggle/input/physionet-ecg-image-digitization/train.csv'
SAMPLE_SUB = '/kaggle/input/physionet-ecg-image-digitization/sample_submission.parquet'

IMG_WIDTH, IMG_HEIGHT = 224, 224

train_df = pd.read_csv(TRAIN_CSV)
train_patient_csvs = glob.glob(os.path.join(TRAIN_DIR, '*/*.csv'))
train_data_dict = {}

for csv_path in train_patient_csvs:
    pid = os.path.basename(csv_path).split('.')[0]
    train_data_dict[pid] = pd.read_csv(csv_path)

train_images = glob.glob(os.path.join(TRAIN_DIR, '*/*.png'))
train_images_dict = {}
for img_path in train_images:
    pid = os.path.basename(os.path.dirname(img_path))
    if pid not in train_images_dict:
        train_images_dict[pid] = []
    train_images_dict[pid].append(img_path)

def preprocess_signal(signal, length=500):
    if len(signal) < length:
        signal = np.pad(signal, (0, length - len(signal)), 'constant')
    else:
        signal = signal[:length]
    return signal

def preprocess_image(img_path):
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img = cv2.resize(img, (IMG_WIDTH, IMG_HEIGHT))
    img = img / 255.0
    img = np.expand_dims(img, axis=-1)
    return img

X_images = []
y_signals = []
num_samples = 200
count = 0

for pid in train_images_dict.keys():
    signal_df = train_data_dict[pid]
    signal = signal_df['I'].values
    signal_proc = preprocess_signal(signal, length=500)
    for img_path in train_images_dict[pid]:
        if count >= num_samples:
            break
        img_proc = preprocess_image(img_path)
        X_images.append(img_proc)
        y_signals.append(signal_proc)
        count += 1
    if count >= num_samples:
        break

X_images = np.array(X_images, dtype=np.float32)
y_signals = np.array(y_signals, dtype=np.float32)

input_img = Input(shape=(IMG_WIDTH, IMG_HEIGHT, 1))
x = Conv2D(16, (3,3), activation='relu', padding='same')(input_img)
x = MaxPooling2D((2,2))(x)
x = Conv2D(32, (3,3), activation='relu', padding='same')(x)
x = MaxPooling2D((2,2))(x)
x = Flatten()(x)
x = Dense(256, activation='relu')(x)
output = Dense(500, activation='linear')(x)

model = Model(inputs=input_img, outputs=output)
model.compile(optimizer=Adam(0.001), loss='mse')
model.fit(X_images, y_signals, epochs=5, batch_size=16, verbose=1)

model.save('/kaggle/working/ecg_digitizer_model.h5')


sample_sub = pd.read_parquet(SAMPLE_SUB)
submission = sample_sub.copy()


test_images = sorted(glob.glob(os.path.join(TEST_DIR, '*.png')))

def load_test_images(img_list):
    imgs = []
    ids = []
    for path in img_list:
        img = preprocess_image(path)
        imgs.append(img)
        ids.append(os.path.basename(path).split('.')[0])
    return np.array(imgs, dtype=np.float32), ids

X_test, test_ids = load_test_images(test_images)

# Predict ECG signal from test images
predictions = model.predict(X_test, verbose=1)

# Build submission dataframe
sub_rows = []
for pid, pred in zip(test_ids, predictions):
    for i, val in enumerate(pred):
        sub_rows.append({"Id": pid, "Sample": i, "Value": val})

submission = pd.DataFrame(sub_rows)
submission.to_csv("/kaggle/working/submission.csv", index=False)
print("submission.csv created successfully")
