In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
pip install opencv-python-headless numpy scipy matplotlib pandas tensorflow


In [None]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models


In [None]:
base_dir = "/kaggle/input/physionet-ecg-image-digitization"
train_dir = os.path.join(base_dir, "train")
test_dir = os.path.join(base_dir, "test")

train_csv = os.path.join(base_dir, "train.csv")
test_csv = os.path.join(base_dir, "test.csv")

print("âœ… Paths set:")
print(train_dir)
print(test_dir)


In [None]:
train_df = pd.read_csv(train_csv)
print("Columns:", train_df.columns.tolist())
print(train_df.head())


In [None]:
def get_image_paths(ecg_id):
    folder = os.path.join(train_dir, str(ecg_id))
    if os.path.exists(folder):
        images = [os.path.join(folder, f) for f in os.listdir(folder) if f.endswith(".png")]
        return images
    return []

train_images = []
for _, row in train_df.iterrows():
    for img_path in get_image_paths(row["id"]):
        train_images.append({
            "id": row["id"],
            "image_path": img_path,
            "label": "unknown"  # dummy label
        })

train_data = pd.DataFrame(train_images)
print("âœ… Total train images found:", len(train_data))
print(train_data.head())


In [None]:
missing = [p for p in train_data["image_path"] if not os.path.exists(p)]
print(f"Missing {len(missing)} / {len(train_data)} images.")


In [None]:
from sklearn.model_selection import train_test_split

train_df_split, val_df_split = train_test_split(train_data, test_size=0.2, random_state=42)
print("Train size:", len(train_df_split))
print("Val size:", len(val_df_split))


In [None]:
img_height, img_width = 224, 224
batch_size = 32

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=10,
    zoom_range=0.1,
    horizontal_flip=True
)

val_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_dataframe(
    train_df_split,
    x_col="image_path",
    y_col="label",
    target_size=(img_height, img_width),
    class_mode="categorical",
    batch_size=batch_size
)

val_generator = val_datagen.flow_from_dataframe(
    val_df_split,
    x_col="image_path",
    y_col="label",
    target_size=(img_height, img_width),
    class_mode="categorical",
    batch_size=batch_size
)


In [None]:
model = models.Sequential([
    layers.Input(shape=(img_height, img_width, 3)),
    layers.Conv2D(32, (3,3), activation='relu'),
    layers.MaxPooling2D(2,2),
    layers.Conv2D(64, (3,3), activation='relu'),
    layers.MaxPooling2D(2,2),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(1, activation='sigmoid') 
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()


In [None]:
# Example model training
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=1,
    verbose=1
)


In [None]:
import pandas as pd


model.save("ecg_cnn_classifier.h5")
print("âœ… Model saved as ecg_cnn_classifier.h5")


history_df = pd.DataFrame(history.history)


history_df["epoch"] = range(1, len(history_df) + 1)


history_df.to_csv(" submission.csv", index=False)
print("âœ… Training history saved as ecg_training_history.csv")


print("\nðŸ“Š Final few training results:")
print(history_df.tail())


In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os

test_dir = "/kaggle/input/physionet-ecg-image-digitization/test"


test_csv = "/kaggle/input/physionet-ecg-image-digitization/test.csv"
test_df = pd.read_csv(test_csv)


test_df["image_path"] = test_df["id"].astype(str) + ".png"
test_df["image_path"] = test_df["image_path"].apply(lambda x: os.path.join(test_dir, x))

test_datagen = ImageDataGenerator(rescale=1./255)


test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    x_col="image_path",
    y_col=None,
    target_size=(224, 224),
    class_mode=None,
    batch_size=32,
    shuffle=False
)

print("âœ… Test data generator ready.")


In [None]:
import pandas as pd
import numpy as np


preds = model.predict(test_generator)
pred_classes = np.argmax(preds, axis=1)

submission = pd.DataFrame({
    "id": test_df["id"],
    "label": pred_classes
})


submission.to_csv("/kaggle/working/submission.csv", index=False)

print("âœ… submission.csv file created successfully!")
submission.head()


In [None]:
import os
import pandas as pd

# âœ… Delete any wrongly named files (e.g., with leading spaces)
!rm -f "/kaggle/working/ submission.csv"

# âœ… Create correct submission.csv file (no space)
submission_path = "/kaggle/working/submission.csv"
submission.to_csv(submission_path, index=False)

# âœ… Confirm final file
print("âœ… Final submission file created:", submission_path)
!ls -lh /kaggle/working/
