In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from PIL import Image
import cv2
import warnings

warnings.filterwarnings('ignore')

import os
import torch
import torch.nn as nn
import albumentations as A
import torch.optim as optim
from torchvision import datasets, transforms, models
from scipy import signal as scipy_signal
from albumentations.pytorch import ToTensorV2
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import classification_report
from tqdm import tqdm
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

# Dataset loaded

In [None]:
# Set paths
DATA_PATH = Path('/kaggle/input/physionet-ecg-image-digitization')
TRAIN_PATH = DATA_PATH / 'train'

# Load metadata
train_df = pd.read_csv(DATA_PATH / 'train.csv')
print(f"Loaded {len(train_df)} training samples")

# Exploratory Data Analysis
## Analyze Image Dimensions and Properties

In [None]:
# Sample a few images and check their properties
sample_ids = train_df['id'].head(10).tolist()
image_properties = []

for sample_id in sample_ids:
    sample_dir = TRAIN_PATH / str(sample_id)
    # Check the original image (0001)
    img_path = sample_dir / f"{sample_id}-0001.png"

    if img_path.exists():
        img = Image.open(img_path)
        img_array = np.array(img)

        image_properties.append({
            'id': sample_id,
            'width': img.size[0],
            'height': img.size[1],
            'mode': img.mode,
            'channels': img_array.shape[2] if len(img_array.shape) == 3 else 1,
            'dtype': img_array.dtype,
            'min_val': img_array.min(),
            'max_val': img_array.max(),
            'mean_val': img_array.mean()
        })

props_df = pd.DataFrame(image_properties)
print("\nImage Properties Summary:")
print(props_df)

print("\nImage Dimensions Distribution:")
print(props_df[['width', 'height']].describe())

## Visualize Image Structure with Annotations

In [None]:
# Load a sample image and analyze its structure
sample_id = str(train_df['id'].iloc[0])
sample_dir = TRAIN_PATH / sample_id
img_path = sample_dir / f"{sample_id}-0001.png"

# Load with PIL and OpenCV
img_pil = Image.open(img_path)
img_cv = cv2.imread(str(img_path))
img_rgb = cv2.cvtColor(img_cv, cv2.COLOR_BGR2RGB)
img_gray = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY)

print(f"Sample ID: {sample_id}")
print(f"Image shape (RGB): {img_rgb.shape}")
print(f"Image shape (Gray): {img_gray.shape}")

# Display original and grayscale
fig, axes = plt.subplots(1, 2, figsize=(18, 8))

axes[0].imshow(img_rgb)
axes[0].set_title('Original ECG Image (RGB)')
axes[0].axis('off')

axes[1].imshow(img_gray, cmap='gray')
axes[1].set_title('Grayscale ECG Image')
axes[1].axis('off')

plt.tight_layout()
plt.show()

## Analyze Horizontal and Vertical Projections

In [None]:
# Horizontal and vertical projections can help identify grid lines and lead regions
horizontal_projection = np.sum(img_gray, axis=1)
vertical_projection = np.sum(img_gray, axis=0)

fig, axes = plt.subplots(2, 2, figsize=(18, 10))

# Show image with projections
axes[0, 0].imshow(img_gray, cmap='gray')
axes[0, 0].set_title('ECG Image')
axes[0, 0].axis('off')

# Horizontal projection
axes[0, 1].plot(horizontal_projection, range(len(horizontal_projection)))
axes[0, 1].set_ylim(len(horizontal_projection), 0)
axes[0, 1].set_title('Horizontal Projection')
axes[0, 1].set_xlabel('Sum of pixel intensities')
axes[0, 1].set_ylabel('Row index')
axes[0, 1].grid(True, alpha=0.3)

# Vertical projection
axes[1, 0].plot(vertical_projection)
axes[1, 0].set_title('Vertical Projection')
axes[1, 0].set_xlabel('Column index')
axes[1, 0].set_ylabel('Sum of pixel intensities')
axes[1, 0].grid(True, alpha=0.3)

# Combined view
axes[1, 1].imshow(img_gray, cmap='gray')
axes[1, 1].set_title('ECG Image')
axes[1, 1].set_xlabel('Column index')
axes[1, 1].set_ylabel('Row index')

plt.tight_layout()
plt.show()

## Detect Grid Lines

In [None]:
# Try to detect horizontal and vertical grid lines using edge detection
edges = cv2.Canny(img_gray, 50, 150)

# Detect lines using Hough Transform
lines = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=100, minLineLength=100, maxLineGap=10)

# Draw detected lines
img_with_lines = img_rgb.copy()
if lines is not None:
    print(f"Detected {len(lines)} line segments")

    # Separate horizontal and vertical lines
    horizontal_lines = []
    vertical_lines = []

    for line in lines:
        x1, y1, x2, y2 = line[0]
        angle = np.abs(np.arctan2(y2 - y1, x2 - x1) * 180 / np.pi)

        if angle < 10 or angle > 170:  # Horizontal
            horizontal_lines.append(line)
            cv2.line(img_with_lines, (x1, y1), (x2, y2), (255, 0, 0), 1)
        elif 80 < angle < 100:  # Vertical
            vertical_lines.append(line)
            cv2.line(img_with_lines, (x1, y1), (x2, y2), (0, 255, 0), 1)

    print(f"Horizontal lines: {len(horizontal_lines)}")
    print(f"Vertical lines: {len(vertical_lines)}")

fig, axes = plt.subplots(1, 3, figsize=(20, 7))

axes[0].imshow(img_gray, cmap='gray')
axes[0].set_title('Original Image')
axes[0].axis('off')

axes[1].imshow(edges, cmap='gray')
axes[1].set_title('Edge Detection (Canny)')
axes[1].axis('off')

axes[2].imshow(img_with_lines)
axes[2].set_title(f'Detected Lines (H: blue, V: green)')
axes[2].axis('off')

plt.tight_layout()
plt.show()

## Analyze Color Channels

In [None]:
# Analyze RGB channels separately - ECG signals might be in specific channels
r_channel = img_rgb[:, :, 0]
g_channel = img_rgb[:, :, 1]
b_channel = img_rgb[:, :, 2]

fig, axes = plt.subplots(2, 3, figsize=(18, 12))

# Show each channel
axes[0, 0].imshow(r_channel, cmap='Reds')
axes[0, 0].set_title('Red Channel')
axes[0, 0].axis('off')

axes[0, 1].imshow(g_channel, cmap='Greens')
axes[0, 1].set_title('Green Channel')
axes[0, 1].axis('off')

axes[0, 2].imshow(b_channel, cmap='Blues')
axes[0, 2].set_title('Blue Channel')
axes[0, 2].axis('off')

# Histograms
axes[1, 0].hist(r_channel.flatten(), bins=50, color='red', alpha=0.7)
axes[1, 0].set_title('Red Channel Histogram')
axes[1, 0].set_xlabel('Pixel Value')
axes[1, 0].set_ylabel('Frequency')

axes[1, 1].hist(g_channel.flatten(), bins=50, color='green', alpha=0.7)
axes[1, 1].set_title('Green Channel Histogram')
axes[1, 1].set_xlabel('Pixel Value')
axes[1, 1].set_ylabel('Frequency')

axes[1, 2].hist(b_channel.flatten(), bins=50, color='blue', alpha=0.7)
axes[1, 2].set_title('Blue Channel Histogram')
axes[1, 2].set_xlabel('Pixel Value')
axes[1, 2].set_ylabel('Frequency')

plt.tight_layout()
plt.show()

## Compare Multiple Image Types

In [None]:
# Compare different image types (original, scanned, photographed, damaged)
image_types_to_check = ['0005', '0006', '0004', '0001', '0010']
image_type_names = {
    '0005': 'Original',
    '0002': 'Color Scan',
    '0004': 'B&W Scan',
    '0001': 'Mobile Photo',
    '0010': 'Damaged'
}

fig, axes = plt.subplots(len(image_types_to_check), 2, figsize=(18, 4*len(image_types_to_check)))

for idx, img_type in enumerate(image_types_to_check):
    img_path = sample_dir / f"{sample_id}-{img_type}.png"

    if img_path.exists():
        img = cv2.imread(str(img_path))
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

        # Show RGB
        axes[idx, 0].imshow(img_rgb)
        axes[idx, 0].set_title(f'{image_type_names.get(img_type, img_type)} - RGB')
        axes[idx, 0].axis('off')

        # Show grayscale histogram
        axes[idx, 1].hist(img_gray.flatten(), bins=50, alpha=0.7)
        axes[idx, 1].set_title(f'{image_type_names.get(img_type, img_type)} - Intensity Histogram')
        axes[idx, 1].set_xlabel('Pixel Value')
        axes[idx, 1].set_ylabel('Frequency')
        axes[idx, 1].grid(True, alpha=0.3)
    else:
        axes[idx, 0].text(0.5, 0.5, 'Image not found', ha='center', va='center')
        axes[idx, 0].axis('off')
        axes[idx, 1].text(0.5, 0.5, 'Image not found', ha='center', va='center')
        axes[idx, 1].axis('off')

plt.tight_layout()
plt.show()


## Cropped Region Analysis

In [None]:
# Take a closer look at a small region to see grid and signal details
# Crop a region from the middle of the image
crop_height = 400
crop_width = 800
start_y = (img_gray.shape[0] - crop_height) // 2
start_x = (img_gray.shape[1] - crop_width) // 2

cropped = img_rgb[start_y:start_y+crop_height, start_x:start_x+crop_width]
cropped_gray = img_gray[start_y:start_y+crop_height, start_x:start_x+crop_width]

fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# Show crop location
axes[0, 0].imshow(img_rgb)
axes[0, 0].add_patch(plt.Rectangle((start_x, start_y), crop_width, crop_height,
                                   fill=False, edgecolor='red', linewidth=2))
axes[0, 0].set_title('Full Image with Crop Region')
axes[0, 0].axis('off')

# Show cropped region
axes[0, 1].imshow(cropped)
axes[0, 1].set_title('Cropped Region (Color)')
axes[0, 1].axis('off')

# Show cropped grayscale
axes[1, 0].imshow(cropped_gray, cmap='gray')
axes[1, 0].set_title('Cropped Region (Grayscale)')
axes[1, 0].axis('off')

# Show inverted (dark signals on light background)
axes[1, 1].imshow(255 - cropped_gray, cmap='gray')
axes[1, 1].set_title('Cropped Region (Inverted)')
axes[1, 1].axis('off')

plt.tight_layout()
plt.show()

# ECG image preprocessor

In [None]:
class Config:
    lr = 3e-4
    num_workers = 4
    img_size = (1024, 1024)
    target_length = 5000

config = Config()

In [None]:
class ECGImageProcessor:
    
    def __init__(self):
        self.lead_positions = {
            'I': (0.1, 0.15), 'II': (0.1, 0.3), 'III': (0.1, 0.45),
            'aVR': (0.1, 0.6), 'aVL': (0.1, 0.75), 'aVF': (0.1, 0.9),
            'V1': (0.55, 0.15), 'V2': (0.55, 0.3), 'V3': (0.55, 0.45),
            'V4': (0.55, 0.6), 'V5': (0.55, 0.75), 'V6': (0.55, 0.9)
        }
    
    def preprocess_image(self, image):
        """Basic ECG image preprocessing."""
        # Convert to grayscale
        if len(image.shape) == 3:
            gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        else:
            gray = image
        
        # Enhance contrast
        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
        enhanced = clahe.apply(gray)
        
        # Remove grid lines
        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2,2))
        opened = cv2.morphologyEx(enhanced, cv2.MORPH_OPEN, kernel)
        
        # Binarize
        _, binary = cv2.threshold(opened, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
        
        return binary
    
    def extract_lead_signal(self, image, lead_name, fs=500):
        """Extracting a specific lead signal."""
        try:
            # Get lead position
            h, w = image.shape[:2]
            x_ratio, y_ratio = self.lead_positions[lead_name]
            lead_x = int(w * x_ratio)
            lead_y = int(h * y_ratio)
            
            # Define ROI around lead
            roi_width = int(w * 0.4)
            roi_height = int(h * 0.08)
            roi_x = max(0, lead_x - roi_width//2)
            roi_y = max(0, lead_y - roi_height//2)
            
            roi = image[roi_y:roi_y+roi_height, roi_x:roi_x+roi_width]
            
            if roi.size == 0:
                return np.zeros(config.target_length)
            
            # Find signal line (dark pixels)
            signal_y = []
            for col in range(roi.shape[1]):
                column = roi[:, col]
                dark_pixels = np.where(column < 128)[0]
                if len(dark_pixels) > 0:
                    signal_y.append(np.mean(dark_pixels))
                else:
                    signal_y.append(roi.shape[0] / 2)
            
            if not signal_y:
                return np.zeros(config.target_length)
            
            # Convert to signal
            ecg_signal = np.array(signal_y)
            
            # Invert and normalize
            ecg_signal = roi_height - ecg_signal  # Invert y-axis
            ecg_signal = (ecg_signal - ecg_signal.mean()) / (ecg_signal.std() + 1e-8)
            
            # Resample to target length
            if len(ecg_signal) > 0:
                ecg_signal = scipy_signal.resample(ecg_signal, config.target_length)
            
            return ecg_signal.astype(np.float32)
            
        except Exception as e:
            print(f"Error extracting {lead_name}: {e}")
            return np.zeros(config.target_length)

## ECG Image Dataset

In [None]:
class ECGDataset(Dataset):
    
    def __init__(self, df, image_dir, transform=None, is_train=True):
        self.df = df
        self.image_dir = image_dir
        self.transform = transform
        self.is_train = is_train
        self.processor = ECGImageProcessor()
        self.leads = ['I', 'II', 'III', 'aVR', 'aVL', 'aVF', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6']
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        base_id = row['id']
        
        # Load image
        if self.is_train:
            img_path = os.path.join(self.image_dir, str(base_id), f"{base_id}-0001.png")
        else:
            img_path = os.path.join(self.image_dir, f"{base_id}.png")
            
        image = cv2.imread(img_path)
        if image is None:
            # Try alternative images
            if self.is_train:
                for seg in ['0003', '0004', '0005']:
                    img_path = os.path.join(self.image_dir, str(base_id), f"{base_id}-{seg}.png")
                    image = cv2.imread(img_path)
                    if image is not None:
                        break
            
        if image is None:
            # Create dummy image as last resort
            image = np.ones((1024, 1024, 3), dtype=np.uint8) * 255 # img_size
            print(f"Could not load image for {base_id}")
        
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        if self.is_train:
            # Load ground truth from CSV
            csv_path = os.path.join(self.image_dir, str(base_id), f"{base_id}.csv")
            try:
                signals_df = pd.read_csv(csv_path)
                # Use lead II as target for training
                target_signal = signals_df['II'].values.astype(np.float32)
                
                # Resize to target length
                if len(target_signal) > config.target_length:
                    target_signal = target_signal[:config.target_length]
                else:
                    target_signal = np.pad(target_signal, (0, config.target_length - len(target_signal)), 
                                         mode='constant')
                
                # Normalize
                if target_signal.std() > 0:
                    target_signal = (target_signal - target_signal.mean()) / target_signal.std()
                    
            except Exception as e:
                print(f"Error loading CSV for {base_id}: {e}")
                # Create synthetic ECG as fallback
                t = np.linspace(0, 10, config.target_length)
                target_signal = (np.sin(2 * np.pi * 1 * t) + 
                               0.5 * np.sin(2 * np.pi * 2 * t) +
                               0.2 * np.sin(2 * np.pi * 0.5 * t))
                target_signal = target_signal.astype(np.float32)
            
            # Process image to extract features
            processed_img = self.processor.preprocess_image(image_rgb)
            
            # Extract lead II signal from image (this will be our input feature)
            extracted_signal = self.processor.extract_lead_signal(processed_img, 'II')
            
            # Prepare image for CNN
            if self.transform:
                image_tensor = self.transform(image=image_rgb)['image']
            else:
                # Default transform
                image_tensor = torch.from_numpy(
                    cv2.resize(image_rgb, config.img_size).transpose(2, 0, 1)
                ).float() / 255.0
            
            return image_tensor, torch.FloatTensor(extracted_signal), torch.FloatTensor(target_signal), base_id
            
        else:
            # For test - just return image
            if self.transform:
                image_tensor = self.transform(image=image_rgb)['image']
            else:
                image_tensor = torch.from_numpy(
                    cv2.resize(image_rgb, config.img_size).transpose(2, 0, 1)
                ).float() / 255.0
            
            return image_tensor, base_id

# Custom CNN model ( Image to Signal Regression)

In [None]:
# convolution block with BatchNormalization
def ConvBlock(in_channels, out_channels, pool=False):
    layers = [nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
             nn.BatchNorm2d(out_channels),
             nn.ReLU(inplace=True)]
    if pool:
        layers.append(nn.MaxPool2d(4))
    return nn.Sequential(*layers)

In [None]:
# resnet architecture 
class ECGNet(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.conv1 = ConvBlock(3, 64)
        self.conv2 = ConvBlock(64, 128, pool=True) 
        self.res1 = nn.Sequential(ConvBlock(128, 128), ConvBlock(128, 128))
        
        self.conv3 = ConvBlock(128, 256, pool=True) 
        self.conv4 = ConvBlock(256, 512, pool=True)
        #self.conv5 = ConvBlock(256, 256, pool=True)
        #self.conv6 = ConvBlock(256, 512, pool=True)
        #self.conv7 = ConvBlock(512, 512, pool=True)
        
        self.res2 = nn.Sequential(ConvBlock(512, 512), ConvBlock(512, 512))

        # self.classifier = nn.Sequential(nn.MaxPool2d(4),
        #                                nn.Flatten(),
        #                                nn.Linear(512, num_diseases))
        
        self.classifier = nn.Sequential(
                nn.AdaptiveAvgPool2d((1, 1)),  # Safe replacement
                nn.Flatten(),
                nn.Linear(512, config.target_length)
        )
        
    def forward(self, x): # x is the loaded batch
        out = self.conv1(x)
        out = self.conv2(out)
        out = self.res1(out) + out
        out = self.conv3(out)
        out = self.conv4(out)
        #out = self.conv5(out)
        #out = self.conv6(out)
        #out = self.conv7(out)
        out = self.res2(out) + out
        out = self.classifier(out)
        
        return out        

## ECG loss 

In [None]:
class ECGLoss(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
        self.mae = nn.L1Loss()
        
    def forward(self, pred, target):
        mse_loss = self.mse(pred, target)
        mae_loss = self.mae(pred, target)
        
        # Frequency domain
        pred_fft = torch.fft.fft(pred, dim=1)
        target_fft = torch.fft.fft(target, dim=1)
        freq_loss = torch.mean(torch.abs(pred_fft - target_fft))
        
        # Loss of correlation
        correlation_loss = 1 - torch.cosine_similarity(pred, target, dim=1).mean()
        
        # Loss for characteristic ECG points
        peak_loss = self._ecg_characteristic_loss(pred, target)
        
        return (0.4 * mse_loss + 0.2 * mae_loss + 
                0.2 * freq_loss + 0.1 * correlation_loss + 0.1 * peak_loss)
    
    def _ecg_characteristic_loss(self, pred, target):
        # Focusing on important areas of the ECG (QRS complexes)
        return self.mae(pred, target)

In [None]:
train_df = pd.read_csv('/kaggle/input/physionet-ecg-image-digitization/train.csv')
test_df = pd.read_csv('/kaggle/input/physionet-ecg-image-digitization/test.csv')

train_df = train_df.head(1000)  # higher -> better results
    
H, W = config.img_size  # unpack
train_transform = A.Compose([
    A.Resize(height=H, width=W),
    A.HorizontalFlip(p=0.3),
    A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.1, rotate_limit=5, p=0.3),
    A.GridDistortion(p=0.1),  # imitation of paper distortion
    A.GaussNoise(p=0.2),
    A.RandomBrightnessContrast(p=0.3),
    A.MotionBlur(p=0.1),      # simulate blurring when photographing
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2()
])
    
# Dataset
train_dataset = ECGDataset(
    train_df, 
    '/kaggle/input/physionet-ecg-image-digitization/train',
    transform=train_transform,
    is_train=True
)
    
train_loader = DataLoader(
    train_dataset, 
    batch_size=4, 
    shuffle=True, 
    num_workers=4
)
    
# Model
resnet_model = ECGNet().to(device)
    
# Loss and optimizer
criterion = ECGLoss()
optimizer = optim.AdamW(resnet_model.parameters(), lr=config.lr, weight_decay=1e-4)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=6, factor=0.5)

## Model training

In [None]:
best_loss = float('inf')
epochs =20
for epoch in range(epochs):
    resnet_model.train()
    running_loss = 0.0
        
    for batch_idx, (images, extracted, targets, base_ids) in tqdm(enumerate(train_loader), desc=f"Epoch {epoch+1}/{epochs}"):
        images = images.to(device)
        targets = targets.to(device)
            
        optimizer.zero_grad()
        outputs = resnet_model(images)
        loss = criterion(outputs, targets)
        loss.backward()
            
        torch.nn.utils.clip_grad_norm_(resnet_model.parameters(), 1.0)
        optimizer.step()
            
        running_loss += loss.item()
        
    epoch_loss = running_loss / len(train_loader)
    scheduler.step(epoch_loss)
        
    print(f'Epoch {epoch+1}/{epochs}, Loss: {epoch_loss:.4f}')
        
    if epoch_loss < best_loss:
        best_loss = epoch_loss
        torch.save(resnet_model.state_dict(), 'best_ecg_model.pth')

# Predict, create submission and check results

In [None]:
resnet_model.load_state_dict(torch.load('best_ecg_model.pth', map_location=device)) # Load best weights
test_df = pd.read_csv('/kaggle/input/physionet-ecg-image-digitization/test.csv')
    
# Test transform
test_transform = A.Compose([
    A.Resize(*config.img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2()
])
    
test_dataset = ECGDataset(
    test_df, 
    '/kaggle/input/physionet-ecg-image-digitization/test',
    transform=test_transform,
    is_train=False
)


test_loader = DataLoader(test_dataset, batch_size=2, shuffle=False)
    
submission_data = []
processor = ECGImageProcessor()
    
resnet_model.eval()
    
for batch_idx, (images, base_ids) in enumerate(test_loader):
    if batch_idx >= len(test_df):  # Safety check
        break
            
    test_row = test_df.iloc[batch_idx]
    base_id = test_row['id']
    lead = test_row['lead']
    num_rows = test_row['number_of_rows']
        
    images = images.to(device)
        
    with torch.no_grad():
        prediction = resnet_model(images).cpu().numpy().flatten()
        
    # Adjust length to required number of rows
    if len(prediction) > num_rows:
        prediction = prediction[:num_rows]
    elif len(prediction) < num_rows:
        prediction = np.pad(prediction, (0, num_rows - len(prediction)), mode='edge')
        
    # Add some realistic variation based on lead type
    if lead in ['V1', 'V2', 'V3', 'V4', 'V5', 'V6']:
        # Chest leads have different characteristics
        prediction = prediction * 0.8 + np.random.normal(0, 0.05, len(prediction))
        
    for row_id in range(num_rows):
        composite_id = f"{base_id}_{row_id}_{lead}"
        submission_data.append({
            'id': composite_id,
            'value': float(prediction[row_id])
        })
    


In [None]:
# Create submission
submission_df = pd.DataFrame(submission_data)
submission_df.to_csv('submission.csv', index=False)
submission_df.head(30)