In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import cv2
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.utils import resample
from sklearn.metrics import roc_auc_score, average_precision_score, precision_recall_curve, auc
from tensorflow.keras.utils import Sequence
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Dense, Conv2D, MaxPooling2D, Flatten, Concatenate, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications.imagenet_utils import preprocess_input
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras import backend as K
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.metrics import Precision, Recall, AUC
import h5py
import random
import base64
import io
from PIL import Image
import math

2024-08-22 19:20:05.427381: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-22 19:20:05.427487: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-22 19:20:05.560532: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
# Set random seed for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

In [3]:
# File paths
train_metadata_path = '/kaggle/input/isic-2024-challenge/train-metadata.csv'
train_image_hdf5_path = '/kaggle/input/isic-2024-challenge/train-image.hdf5'
test_metadata_path = '/kaggle/input/isic-2024-challenge/test-metadata.csv'
test_image_hdf5_path = '/kaggle/input/isic-2024-challenge/test-image.hdf5'

In [4]:
def load_image_from_hdf5(hdf5_path, image_id):
    with h5py.File(hdf5_path, 'r') as hdf:
        # Load the raw data
        image_data = hdf[image_id][()]
        
    # Convert the data to a numpy array
    image_array = np.frombuffer(image_data, dtype=np.uint8)
    
    # Decode the image using OpenCV
    image = cv2.imdecode(image_array, cv2.IMREAD_COLOR)
    
    # Convert BGR to RGB (OpenCV loads images in BGR format)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    return image

In [5]:
def preprocess_image(image, target_size=(224, 224)):
    # Resize the image
    image_resized = cv2.resize(image, target_size)
    
    # Normalize the image
    image_normalized = image_resized.astype(np.float32) / 255.0
    
    return image_normalized

In [6]:
def load_and_preprocess_data(metadata_path, hdf5_path, is_train=True, train_columns=None, train_encoders=None):
    # Load metadata
    data = pd.read_csv(metadata_path, low_memory=False)
    
    # Drop unnecessary columns
    columns_to_drop = ['patient_id', 'copyright_license', 'attribution', 'image_type', 
                       'tbp_tile_type', 'lesion_id']
    data = data.drop(columns=columns_to_drop, errors='ignore')
    
    # Handle missing values in numeric columns
    numeric_columns = ['age_approx', 'clin_size_long_diam_mm', 'tbp_lv_A', 'tbp_lv_Aext', 'tbp_lv_B', 'tbp_lv_Bext', 
                       'tbp_lv_C', 'tbp_lv_Cext', 'tbp_lv_H', 'tbp_lv_Hext', 'tbp_lv_L', 'tbp_lv_Lext', 
                       'tbp_lv_areaMM2', 'tbp_lv_area_perim_ratio', 'tbp_lv_color_std_mean', 'tbp_lv_deltaA', 
                       'tbp_lv_deltaB', 'tbp_lv_deltaL', 'tbp_lv_deltaLBnorm', 'tbp_lv_eccentricity', 
                       'tbp_lv_minorAxisMM', 'tbp_lv_nevi_confidence', 'tbp_lv_norm_border', 'tbp_lv_norm_color', 
                       'tbp_lv_perimeterMM', 'tbp_lv_radial_color_std_max', 'tbp_lv_stdL', 'tbp_lv_stdLExt', 
                       'tbp_lv_symm_2axis', 'tbp_lv_x', 'tbp_lv_y', 'tbp_lv_z']
    
    if is_train:
        imputer = SimpleImputer(strategy='median')
        data[numeric_columns] = imputer.fit_transform(data[numeric_columns])
    else:
        # Use the imputer fitted on training data
        data[numeric_columns] = train_encoders['imputer'].transform(data[numeric_columns])
    
    # Encode categorical variables
    categorical_columns = ['sex', 'anatom_site_general', 'tbp_lv_location', 'tbp_lv_location_simple']
    if is_train:
        categorical_columns.extend(['iddx_full', 'iddx_1', 'iddx_2', 'iddx_3', 'iddx_4', 'iddx_5'])
        label_encoders = {}
        for col in categorical_columns:
            if col in data.columns:
                le = LabelEncoder()
                data[col] = data[col].fillna('Unknown')
                data[col] = le.fit_transform(data[col].astype(str))
                label_encoders[col] = le
    else:
        # Use the label encoders fitted on training data
        for col in categorical_columns:
            if col in data.columns:
                data[col] = data[col].fillna('Unknown')
                data[col] = train_encoders['label_encoders'][col].transform(data[col].astype(str))
    
    # One-hot encode relevant categorical variables
    categorical_columns_to_onehot = ['sex', 'anatom_site_general', 'tbp_lv_location', 'tbp_lv_location_simple']
    if is_train:
        data = pd.get_dummies(data, columns=categorical_columns_to_onehot)
        train_columns = data.columns
    else:
        # For test data, add missing columns
        for col in train_columns:
            if col not in data.columns:
                data[col] = 0
        # Ensure test data has the same columns as train data
        data = data[train_columns]
    
    # Scale numerical features
    if is_train:
        scaler = StandardScaler()
        data[numeric_columns] = scaler.fit_transform(data[numeric_columns])
    else:
        # Use the scaler fitted on training data
        data[numeric_columns] = train_encoders['scaler'].transform(data[numeric_columns])
    
    if is_train:
        # Handle 'mel_mitotic_index' if present
        if 'mel_mitotic_index' in data.columns:
            mitotic_index_mapping = {
                '<1/mm^2': 0, '0/mm^2': 0, '1/mm^2': 1, '2/mm^2': 2, 
                '3/mm^2': 3, '4/mm^2': 4, '>4/mm^2': 5
            }
            data['mel_mitotic_index'] = data['mel_mitotic_index'].map(mitotic_index_mapping).fillna(-1)
        
        # Handle 'mel_thick_mm' if present
        if 'mel_thick_mm' in data.columns:
            data['mel_thick_mm'] = pd.to_numeric(data['mel_thick_mm'], errors='coerce').fillna(-1)
    
    # Reset index
    data = data.reset_index(drop=True)
    
    # Print column names for debugging
    print(f"{'Train' if is_train else 'Test'} columns:", data.columns)
    
    if is_train:
        train_encoders = {
            'imputer': imputer,
            'label_encoders': label_encoders,
            'scaler': scaler
        }
        return data, hdf5_path, train_columns, train_encoders
    else:
        return data, hdf5_path

In [7]:
class HDF5DataGenerator:
    def __init__(self, data, hdf5_path, batch_size=32, dim=(224, 224), n_channels=3, shuffle=True, is_test=False):
        self.data = data
        self.hdf5_path = hdf5_path
        self.batch_size = batch_size
        self.dim = dim
        self.n_channels = n_channels
        self.shuffle = shuffle
        self.is_test = is_test
        self.feature_columns = [col for col in data.columns if col not in ['isic_id', 'target']]

    def __call__(self):
        indices = list(range(len(self.data)))
        if self.shuffle:
            np.random.shuffle(indices)
    
        for i in indices:
            row = self.data.iloc[i]
            img = load_image_from_hdf5(self.hdf5_path, row['isic_id'])
            img_processed = preprocess_image(img, self.dim)
        
            tab_data = row[self.feature_columns].values
        
            if self.is_test:
                yield img_processed, tab_data
            else:
                yield (img_processed, tab_data), row['target']

In [8]:
def create_dataset(generator, data_size, batch_size, is_test=False):
    feature_shape = (data_size.shape[1] - 2,) if 'target' in data_size.columns else (data_size.shape[1] - 1,)
    
    if is_test:
        output_signature = (
            tf.TensorSpec(shape=(224, 224, 3), dtype=tf.float32),
            tf.TensorSpec(shape=feature_shape, dtype=tf.float32)
        )
    else:
        output_signature = (
            (tf.TensorSpec(shape=(224, 224, 3), dtype=tf.float32),
             tf.TensorSpec(shape=feature_shape, dtype=tf.float32)),
            tf.TensorSpec(shape=(), dtype=tf.int32)
        )
    
    dataset = tf.data.Dataset.from_generator(
        generator,
        output_signature=output_signature
    )
    
    if not is_test:
        dataset = dataset.shuffle(buffer_size=len(data_size)).repeat()
    
    return dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)

In [9]:
# Focal Loss implementation
def focal_loss(gamma=2., alpha=.25):
    def focal_loss_fixed(y_true, y_pred):
        pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
        pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
        return -K.mean(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1 + K.epsilon())) - K.mean((1 - alpha) * K.pow(pt_0, gamma) * K.log(1. - pt_0 + K.epsilon()))
    return focal_loss_fixed

In [10]:
# Function to balance dataset
def balance_dataset(data, undersample_ratio=0.5):
    majority_class = data[data['target'] == 0]
    minority_class = data[data['target'] == 1]
    
    # Undersample majority class
    n_majority = int(len(minority_class) / (1 - undersample_ratio))
    majority_undersampled = resample(majority_class, 
                                     n_samples=n_majority, 
                                     random_state=42)
    
    # Combine minority class with undersampled majority class
    balanced_data = pd.concat([majority_undersampled, minority_class])
    
    return balanced_data.reset_index(drop=True)

In [11]:
def create_model(img_shape, tab_shape):
    # Image input branch
    img_input = Input(shape=img_shape)
    x = Conv2D(32, (3, 3), activation='relu')(img_input)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(64, (3, 3), activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)
    x = Flatten()(x)
    x = Dense(64, activation='relu')(x)
    x = Dropout(0.3)(x)

    # Tabular input branch
    tab_input = Input(shape=(tab_shape,))
    y = Dense(64, activation='relu')(tab_input)
    y = BatchNormalization()(y)
    y = Dropout(0.3)(y)

    # Combine branches
    combined = Concatenate()([x, y])
    z = Dense(32, activation='relu')(combined)
    z = BatchNormalization()(z)
    z = Dropout(0.3)(z)
    output = Dense(1, activation='sigmoid')(z)

    model = Model(inputs=[img_input, tab_input], outputs=output)
    model.compile(optimizer=Adam(learning_rate=1e-4),
                  loss=focal_loss(alpha=.25, gamma=2),
                  metrics=['accuracy', AUC(name='auc'), Precision(name='precision'), Recall(name='recall')])
    
    return model

In [12]:
def train_model(train_data, train_hdf5_path, val_data=None, val_hdf5_path=None, n_splits=2, epochs=30, batch_size=32):
    if val_data is None:
        skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
        best_model = None
        best_auc = 0
        
        for fold, (train_idx, val_idx) in enumerate(skf.split(train_data, train_data['target'])):
            print(f"Training fold {fold + 1}")
            
            train_data_fold = train_data.iloc[train_idx].reset_index(drop=True)
            val_data_fold = train_data.iloc[val_idx].reset_index(drop=True)
            
            train_gen = HDF5DataGenerator(train_data_fold, train_hdf5_path, batch_size=batch_size)
            val_gen = HDF5DataGenerator(val_data_fold, train_hdf5_path, batch_size=batch_size)
            
            train_dataset = create_dataset(train_gen, train_data_fold, batch_size)
            val_dataset = create_dataset(val_gen, val_data_fold, batch_size)
            
            model = create_model((224, 224, 3), train_data.shape[1] - 2)
            
            # Define callbacks
            model_checkpoint = ModelCheckpoint(
                f'best_model_fold_{fold+1}.keras',
                monitor='val_auc',
                mode='max',
                save_best_only=True,
                verbose=1
            )
            callbacks = [
                EarlyStopping(patience=10, restore_best_weights=True),
                ReduceLROnPlateau(factor=0.5, patience=5, min_lr=1e-6),
                model_checkpoint
            ]
            
            # Calculate steps per epoch
            steps_per_epoch = math.ceil(len(train_data_fold) / batch_size)
            validation_steps = math.ceil(len(val_data_fold) / batch_size)
            
            # Train model
            history = model.fit(
                train_dataset,
                validation_data=val_dataset,
                epochs=epochs,
                steps_per_epoch=steps_per_epoch,
                validation_steps=validation_steps,
                callbacks=callbacks
            )
            
            # Evaluate model
            val_loss, val_accuracy, val_auc, val_precision, val_recall = model.evaluate(val_dataset, steps=validation_steps)
            print(f"Fold {fold + 1} - Validation Loss: {val_loss:.4f}, "
                  f"Accuracy: {val_accuracy:.4f}, AUC: {val_auc:.4f}, "
                  f"Precision: {val_precision:.4f}, Recall: {val_recall:.4f}")
            
            # Calculate F1-score
            f1_score = 2 * (val_precision * val_recall) / (val_precision + val_recall + K.epsilon())
            print(f"F1-score: {f1_score:.4f}")
            
            # Keep track of the best model
            if val_auc > best_auc:
                best_auc = val_auc
                best_model = model
    
    else:
        train_gen = HDF5DataGenerator(train_data, train_hdf5_path, batch_size=batch_size)
        val_gen = HDF5DataGenerator(val_data, val_hdf5_path, batch_size=batch_size)
        
        train_dataset = create_dataset(train_gen, train_data, batch_size)
        val_dataset = create_dataset(val_gen, val_data, batch_size)
        
        model = create_model((224, 224, 3), train_data.shape[1] - 2)
        
        # Define callbacks
        model_checkpoint = ModelCheckpoint(
            'best_model.keras',
            monitor='val_auc',
            mode='max',
            save_best_only=True,
            verbose=1
        )
        callbacks = [
            EarlyStopping(patience=10, restore_best_weights=True),
            ReduceLROnPlateau(factor=0.5, patience=5, min_lr=1e-6),
            model_checkpoint
        ]
        
        # Calculate steps per epoch
        steps_per_epoch = math.ceil(len(train_data) / batch_size)
        validation_steps = math.ceil(len(val_data) / batch_size)
        
        # Train model
        history = model.fit(
            train_dataset,
            validation_data=val_dataset,
            epochs=epochs,
            steps_per_epoch=steps_per_epoch,
            validation_steps=validation_steps,
            callbacks=callbacks
        )
        
        best_model = model
    
    # Save the overall best model
    best_model.save('best_model_overall.keras')
    print(f"Best model saved with validation AUC: {best_auc:.4f}")
    
    return best_model

In [13]:
def check_class_distribution(data):
    class_counts = data['target'].value_counts()
    class_percentages = class_counts / len(data) * 100
    
    print("Class Distribution:")
    for class_label, count in class_counts.items():
        percentage = class_percentages[class_label]
        print(f"Class {class_label}: {count} samples ({percentage:.2f}%)")
    
    imbalance_ratio = class_counts.max() / class_counts.min()
    print(f"\nImbalance Ratio: {imbalance_ratio:.2f}")

In [14]:
def evaluate_on_test_set(model, test_gen, test_metadata):
    all_predictions = []
    for i in range(len(test_gen)):
        try:
            batch = test_gen[i]
            # Print batch information for debugging
            print(f"Batch {i} shapes - Image: {batch[0]['image_input'].shape}, Tabular: {batch[0]['tabular_input'].shape}")
            predictions = model.predict(batch[0], verbose=0)
            all_predictions.append(predictions)
            print(f"Successfully predicted batch {i} with shape {predictions.shape}")
        except Exception as e:
            print(f"Error predicting batch {i}: {str(e)}")
            # Print more detailed error information
            import traceback
            print(traceback.format_exc())
    
    print(f"Total batches processed: {len(test_gen)}")
    print(f"Number of successful predictions: {len(all_predictions)}")
    
    if not all_predictions:
        raise ValueError("No predictions were made successfully. Check the error messages above for more details.")
    
    predictions = np.concatenate(all_predictions).flatten()
    
    print(f"Final predictions shape: {predictions.shape}")
    
    # Create submission DataFrame
    submission = pd.DataFrame({
        'isic_id': test_metadata['isic_id'],
        'target': predictions
    })
    
    # Save submission file
    submission.to_csv('submission.csv', index=False)
    print("Predictions saved to 'submission.csv'")
    
    return predictions

In [15]:
if __name__ == "__main__":
    
    # When processing train data
    train_data, train_hdf5_path, train_columns, train_encoders = load_and_preprocess_data(train_metadata_path, train_image_hdf5_path, is_train=True)

    print("Original data distribution:")
    check_class_distribution(train_data)
    
    # Balance the dataset (if needed)
    balanced_data = balance_dataset(train_data)
    
    print("\nBalanced data distribution:")
    check_class_distribution(balanced_data)
    
    # Train model
    best_model = train_model(balanced_data, train_hdf5_path)
    
    # When processing test data
    test_data, test_hdf5_path = load_and_preprocess_data(test_metadata_path, test_image_hdf5_path, is_train=False, train_columns=train_columns, train_encoders=train_encoders)

    print("\nTest data shape:", test_data.shape)
    print("Test data columns:", test_data.columns)
    
    # Create test generator and dataset
    test_gen = HDF5DataGenerator(test_data, test_hdf5_path, is_test=True)
    test_dataset = create_dataset(test_gen, test_data, batch_size=32, is_test=True)

    # Print shapes before prediction
    for batch in test_dataset.take(1):
        print("Test batch shape:", batch[0].shape, batch[1].shape)

    # Make predictions on test set
    predictions = []
    for batch in test_dataset:
        batch_predictions = best_model.predict(batch)
        predictions.append(batch_predictions)

    predictions = np.concatenate(predictions).flatten()

    # Create submission DataFrame
    submission = pd.DataFrame({
        'isic_id': test_data['isic_id'],
        'target': predictions
    })

    # Save submission file
    submission.to_csv('submission.csv', index=False)
    print("Predictions saved to 'submission.csv'")

Train columns: Index(['isic_id', 'target', 'age_approx', 'clin_size_long_diam_mm', 'tbp_lv_A',
       'tbp_lv_Aext', 'tbp_lv_B', 'tbp_lv_Bext', 'tbp_lv_C', 'tbp_lv_Cext',
       'tbp_lv_H', 'tbp_lv_Hext', 'tbp_lv_L', 'tbp_lv_Lext', 'tbp_lv_areaMM2',
       'tbp_lv_area_perim_ratio', 'tbp_lv_color_std_mean', 'tbp_lv_deltaA',
       'tbp_lv_deltaB', 'tbp_lv_deltaL', 'tbp_lv_deltaLB',
       'tbp_lv_deltaLBnorm', 'tbp_lv_eccentricity', 'tbp_lv_minorAxisMM',
       'tbp_lv_nevi_confidence', 'tbp_lv_norm_border', 'tbp_lv_norm_color',
       'tbp_lv_perimeterMM', 'tbp_lv_radial_color_std_max', 'tbp_lv_stdL',
       'tbp_lv_stdLExt', 'tbp_lv_symm_2axis', 'tbp_lv_symm_2axis_angle',
       'tbp_lv_x', 'tbp_lv_y', 'tbp_lv_z', 'iddx_full', 'iddx_1', 'iddx_2',
       'iddx_3', 'iddx_4', 'iddx_5', 'mel_mitotic_index', 'mel_thick_mm',
       'tbp_lv_dnn_lesion_confidence', 'sex_0', 'sex_1', 'sex_2',
       'anatom_site_general_0', 'anatom_site_general_1',
       'anatom_site_general_2', 'anatom_site

2024-08-22 19:21:21.812042: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 49284: 7.84353, expected 6.92193
2024-08-22 19:21:21.812101: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 49286: 7.76792, expected 6.84632
2024-08-22 19:21:21.812111: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 49287: 6.15631, expected 5.23472
2024-08-22 19:21:21.812119: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 49288: 6.71859, expected 5.797
2024-08-22 19:21:21.812127: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 49289: 7.43913, expected 6.51753
2024-08-22 19:21:21.812135: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 49290: 7.1783, expected 6.25671
2024-08-22 19:21:21.812143: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 49291: 6.06838, expected 5.14679
2024-08-22 19:21:21.812151: E 

[1m 2/19[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 79ms/step - accuracy: 0.5391 - auc: 0.4981 - loss: 0.3362 - precision: 0.3721 - recall: 0.5000  

I0000 00:00:1724354489.411591      71 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.5598 - auc: 0.5573 - loss: 0.3019 - precision: 0.3969 - recall: 0.5332
Epoch 1: val_auc improved from -inf to 0.61836, saving model to best_model_fold_1.keras
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m180s[0m 7s/step - accuracy: 0.5603 - auc: 0.5585 - loss: 0.3006 - precision: 0.3966 - recall: 0.5339 - val_accuracy: 0.7582 - val_auc: 0.6184 - val_loss: 0.0776 - val_precision: 0.9831 - val_recall: 0.2843 - learning_rate: 1.0000e-04
Epoch 2/30
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.5905 - auc: 0.5946 - loss: 0.3143 - precision: 0.4227 - recall: 0.5162
Epoch 2: val_auc did not improve from 0.61836
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m91s[0m 5s/step - accuracy: 0.5905 - auc: 0.5958 - loss: 0.3118 - precision: 0.4224 - recall: 0.5169 - val_accuracy: 0.6678 - val_auc: 0.5803 - val_loss: 0.0636 - val_precision: 1.0000 - val_r

2024-08-22 19:53:00.322232: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 98570: 7.29926, expected 6.39021
2024-08-22 19:53:00.322304: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 98571: 6.54057, expected 5.63152
2024-08-22 19:53:00.322325: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 98572: 6.60398, expected 5.69494
2024-08-22 19:53:00.322353: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 98573: 6.72281, expected 5.81376
2024-08-22 19:53:00.322368: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 98574: 6.22525, expected 5.31621
2024-08-22 19:53:00.322381: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 98575: 5.68646, expected 4.77741
2024-08-22 19:53:00.322394: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 98576: 5.92668, expected 5.01763
2024-08-22 19:53:00.322406:

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 989ms/step
Predictions saved to 'submission.csv'
