In [None]:
# =============================================================================
# 1. Gerekli Kütüphanelerin Yüklenmesi
# =============================================================================
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import logging
import cv2
import time
from tqdm.notebook import tqdm
from scipy.signal import resample
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.callbacks import ReduceLROnPlateau

print("TensorFlow Sürümü:", tf.__version__)

# =============================================================================
# 2. Yapılandırma ve Loglama
# =============================================================================
log_format = '%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s'
logging.basicConfig(level=logging.INFO, format=log_format)

class Config:
    DEBUG_MODE = False
    IMG_HEIGHT = 128
    IMG_WIDTH = 512
    SEQ_LENGTH = 512
    BATCH_SIZE = 32
    EPOCHS = 10 if not DEBUG_MODE else 2
    TRAIN_SAMPLES = 2500 if not DEBUG_MODE else 50
    VAL_SAMPLES = 400 if not DEBUG_MODE else 10
    INITIAL_LEARNING_RATE = 5e-4

CONFIG = Config()
LEAD_NAMES = ['I', 'II', 'III', 'aVR', 'aVL', 'aVF', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6']
logging.info(f"Hızlı mod (DEBUG_MODE) {'AÇIK' if CONFIG.DEBUG_MODE else 'KAPALI'}.")

# --- HARMANLANACAK DOSYALARIN YOLLARI ---
EFFICIENTNET_WEIGHTS_PATH = '/kaggle/input/keras-applications-models/EfficientNetB0.h5'
EXTERNAL_SUBMISSION_PATH = '/kaggle/input/0-0900/submission - 2025-10-22T170405.663.csv'

# Dosyaların varlığını kontrol et
if not os.path.exists(EFFICIENTNET_WEIGHTS_PATH):
    raise FileNotFoundError("Keras Applications Models veri setini eklemediniz!")
if not os.path.exists(EXTERNAL_SUBMISSION_PATH):
    raise FileNotFoundError("Harmanlanacak harici submission.csv dosyasını eklemediniz!")

# =============================================================================
# 3. Veri Yükleme ve Bölme
# =============================================================================
logging.info("Yarışma verileri yükleniyor...")
data_dir = "/kaggle/input/physionet-ecg-image-digitization"
full_train_df = pd.read_csv(os.path.join(data_dir, "train.csv"))
test_df = pd.read_csv(os.path.join(data_dir, "test.csv"))
unique_ids = full_train_df['id'].unique()
train_ids, val_ids = train_test_split(unique_ids, test_size=0.15, random_state=42)
train_df = full_train_df[full_train_df['id'].isin(train_ids)].copy()
val_df = full_train_df[full_train_df['id'].isin(val_ids)].copy()

if CONFIG.DEBUG_MODE:
    train_df = train_df.sample(n=CONFIG.TRAIN_SAMPLES, random_state=42).reset_index(drop=True)
    val_df = val_df.sample(n=CONFIG.VAL_SAMPLES, random_state=42).reset_index(drop=True)

# =============================================================================
# 4. Veri Hattı Fonksiyonları
# =============================================================================
def get_lead_images_from_file(image_path):
    try:
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        if image is None: return None
        _, thresh = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
        height, width = thresh.shape
        lead_boxes = [
            (140, 205, 120, 620), (270, 335, 120, 620), (140, 205, 620, 1120), 
            (205, 270, 620, 1120), (270, 335, 620, 1120), (140, 205, 1120, 1620), 
            (205, 270, 1120, 1620), (270, 335, 1120, 1620), (140, 205, 1620, 2120), 
            (205, 270, 1620, 2120), (270, 335, 1620, 2120)
        ]
        rhythm_strip = thresh[400:480, 120:width-120]
        lead_images = [thresh[y1:y2, x1:x2] for y1, y2, x1, x2 in lead_boxes]
        lead_images.insert(1, rhythm_strip)
        return lead_images
    except Exception: return None

def process_path(image_path, signal_path=None):
    lead_images_raw = get_lead_images_from_file(image_path.numpy().decode('utf-8'))
    if lead_images_raw is None: return tf.zeros((12, CONFIG.IMG_HEIGHT, CONFIG.IMG_WIDTH, 3)), tf.zeros((12, CONFIG.SEQ_LENGTH))
    processed_images = []
    for img in lead_images_raw:
        if img.shape[0] == 0 or img.shape[1] == 0: img = np.zeros((CONFIG.IMG_HEIGHT, CONFIG.IMG_WIDTH), dtype=np.uint8)
        img_resized = tf.image.resize(img[..., tf.newaxis], [CONFIG.IMG_HEIGHT, CONFIG.IMG_WIDTH])
        processed_images.append(tf.image.grayscale_to_rgb(img_resized))
    images_tensor = tf.stack(processed_images)
    if signal_path is not None:
        true_ts_df = pd.read_csv(signal_path.numpy().decode('utf-8'))
        true_ts_df.ffill(inplace=True); true_ts_df.bfill(inplace=True)
        signals_tensor = tf.stack([resample(true_ts_df[lead].values.astype(np.float32), CONFIG.SEQ_LENGTH) for lead in LEAD_NAMES])
        return images_tensor, signals_tensor
    else: return images_tensor

def create_dataset(df, is_train=True):
    image_paths = [f"{data_dir}/train/{rec_id}/{rec_id}-0001.png" for rec_id in df['id']]
    signal_paths = [f"{data_dir}/train/{rec_id}/{rec_id}.csv" for rec_id in df['id']]
    dataset = tf.data.Dataset.from_tensor_slices((image_paths, signal_paths))
    def py_func_wrapper(img_p, sig_p):
        images, signals = tf.py_function(process_path, [img_p, sig_p], [tf.float32, tf.float32])
        images.set_shape([12, CONFIG.IMG_HEIGHT, CONFIG.IMG_WIDTH, 3]); signals.set_shape([12, CONFIG.SEQ_LENGTH])
        return images, signals
    dataset = dataset.map(py_func_wrapper, num_parallel_calls=tf.data.AUTOTUNE)
    if is_train: dataset = dataset.shuffle(buffer_size=100)
    dataset = dataset.unbatch().batch(CONFIG.BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
    return dataset

train_dataset = create_dataset(train_df)
val_dataset = create_dataset(val_df, is_train=False)

# =============================================================================
# 5. Model Mimarisi ve SNR Callback'i
# =============================================================================
def align_and_get_powers(true_signal, pred_signal, fs):
    try:
        pred_centered, true_centered = pred_signal - np.mean(pred_signal), true_signal - np.mean(true_signal)
        max_shift = int(0.2 * fs)
        corr = np.correlate(pred_centered, true_centered, mode='full')
        best_shift = np.clip(np.argmax(corr) - (len(pred_centered) - 1), -max_shift, max_shift)
        if best_shift >= 0: aligned_pred, aligned_true = pred_centered[best_shift:], true_centered[:len(pred_centered) - best_shift]
        else: aligned_pred, aligned_true = pred_centered[:len(true_centered) + best_shift], true_centered[-best_shift:]
        min_len = min(len(aligned_pred), len(aligned_true))
        if min_len == 0: return 0.0, 1.0
        aligned_true, aligned_pred = aligned_true[:min_len], aligned_pred[:min_len]
        return np.sum(aligned_true ** 2), max(np.sum((aligned_true - aligned_pred) ** 2), 1e-9)
    except Exception: return 0.0, 1.0

class SNREvaluationCallback(keras.callbacks.Callback):
    def __init__(self, val_df, full_df): super().__init__(); self.val_df, self.full_df, self.best_snr = val_df, full_df, -np.inf
    def on_epoch_end(self, epoch, logs=None):
        logs = logs or {}; cv_scores = []
        for rec_id in self.val_df['id'].unique():
            image_path = f"{data_dir}/train/{rec_id}/{rec_id}-0001.png"
            lead_images_tensor = tf.py_function(process_path, [image_path], [tf.float32])[0]
            if tf.shape(lead_images_tensor)[0] != 12: continue
            predicted_sequences = self.model.predict(lead_images_tensor, verbose=0)
            true_ts_df = pd.read_csv(f"{data_dir}/train/{rec_id}/{rec_id}.csv")
            fs = self.full_df[self.full_df['id'] == rec_id].iloc[0]['fs']
            total_signal_power, total_error_power = 0.0, 0.0
            for i, lead_name in enumerate(LEAD_NAMES):
                true_signal = true_ts_df[lead_name].values
                if np.isnan(true_signal).any(): continue
                pred_resampled = resample(predicted_sequences[i], len(true_signal))
                signal_power, error_power = align_and_get_powers(true_signal, pred_resampled, fs)
                total_signal_power += signal_power; total_error_power += error_power
            if total_error_power > 0: cv_scores.append(10 * np.log10(total_signal_power / total_error_power))
        avg_snr = np.mean(cv_scores) if cv_scores else -np.inf; logs['val_snr'] = avg_snr
        if avg_snr > self.best_snr: self.best_snr = avg_snr; print(f" - val_snr: {avg_snr:.4f} (Yeni en iyi skor!)")
        else: print(f" - val_snr: {avg_snr:.4f}")

def build_ecg_model(input_shape, seq_length):
    base_model = EfficientNetB0(include_top=False, weights=EFFICIENTNET_WEIGHTS_PATH, input_shape=input_shape)
    base_model.trainable = False
    inputs = Input(shape=input_shape)
    x = base_model(inputs, training=False)
    x = GlobalAveragePooling2D()(x); x = Dropout(0.3)(x)
    x = Dense(1024, activation='relu')(x); x = Dropout(0.3)(x)
    x = Dense(1024, activation='relu')(x); x = Dropout(0.3)(x)
    x = Dense(512, activation='relu')(x)
    outputs = Dense(seq_length, activation='linear')(x)
    model = Model(inputs, outputs)
    optimizer = keras.optimizers.Adam(learning_rate=CONFIG.INITIAL_LEARNING_RATE)
    model.compile(optimizer=optimizer, loss='mae')
    return model

ecg_digitizer_model = build_ecg_model(input_shape=(CONFIG.IMG_HEIGHT, CONFIG.IMG_WIDTH, 3), seq_length=CONFIG.SEQ_LENGTH)
ecg_digitizer_model.summary()

# =============================================================================
# 6. Modelin Eğitilmesi
# =============================================================================
logging.info("Model eğitimi başlıyor...")
snr_callback = SNREvaluationCallback(val_df, full_train_df)
lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=1e-6, verbose=1)
start_time = time.time()
history = ecg_digitizer_model.fit(
    train_dataset,
    epochs=CONFIG.EPOCHS,
    validation_data=val_dataset,
    callbacks=[snr_callback, lr_scheduler],
    verbose=1
)
end_time = time.time()
logging.info(f"Model eğitimi {end_time - start_time:.2f} saniyede tamamlandı.")

# =============================================================================
# 7. Notebook Tahminlerinin Üretilmesi
# =============================================================================
logging.info("Notebook'un test seti tahminleri üretiliyor...")
notebook_predictions = []
for base_id, group in tqdm(test_df.groupby('id'), desc="Notebook Tahminleri"):
    image_path = f"{data_dir}/test/{base_id}.png"
    lead_images_tensor = tf.py_function(process_path, [image_path], [tf.float32])[0]
    if tf.shape(lead_images_tensor)[0] != 12:
        for _, row in group.iterrows():
            for i in range(row['number_of_rows']): notebook_predictions.append({'id': f"{base_id}_{i}_{row['lead']}", 'value': 0.0})
        continue
    predicted_sequences = ecg_digitizer_model.predict(lead_images_tensor, verbose=0)
    for _, row in group.iterrows():
        lead_name, num_rows_expected = row['lead'], row['number_of_rows']
        lead_index = LEAD_NAMES.index(lead_name)
        final_signal = resample(predicted_sequences[lead_index], num_rows_expected)
        for i, value in enumerate(final_signal):
            notebook_predictions.append({'id': f"{base_id}_{i}_{row['lead']}", 'value': float(value)})

notebook_submission_df = pd.DataFrame(notebook_predictions)
logging.info("Notebook tahminleri başarıyla üretildi.")

# =============================================================================
# 8. Harmanlama (Blending) ve Son Sunum Dosyasını Oluşturma
# =============================================================================
logging.info("Harmanlama süreci başlıyor...")

# Harici sunum dosyasını yükle
logging.info(f"Harici sunum dosyası yükleniyor: {EXTERNAL_SUBMISSION_PATH}")
external_sub_df = pd.read_csv(EXTERNAL_SUBMISSION_PATH)

# İki DataFrame'i birleştir
merged_df = pd.merge(notebook_submission_df, external_sub_df, on='id', suffixes=('_notebook', '_external'))

# Birleştirme sonrası boyutları kontrol et
if len(merged_df) != len(notebook_submission_df):
    logging.warning("Birleştirme sonrası satır sayısı değişti! Bazı ID'ler eşleşmemiş olabilir.")

# Ağırlıklı ortalamayı uygula
logging.info("Ağırlıklı ortalama uygulanıyor (80% harici, 20% notebook)...")
merged_df['value'] = 0.80 * merged_df['value_external'] + 0.20 * merged_df['value_notebook']

# Nihai sunum dosyasını oluştur
final_submission_df = merged_df[['id', 'value']]

# Son dosyayı kaydet
final_submission_df.to_csv('submission.csv', index=False)

logging.info("Harmanlanmış sunum dosyası 'submission.csv' başarıyla oluşturuldu!")
print("\nHarmanlanmış Sunum Dosyasından Örnekler:")
display(final_submission_df.head(15))