In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, Model, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy, SparseCategoricalCrossentropy
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')

# Try to import cuML for GPU acceleration
try:
    import cudf
    import cuml
    from cuml.preprocessing import StandardScaler as cuMLStandardScaler
    from cuml.model_selection import train_test_split as cuml_train_test_split
    CUML_AVAILABLE = True
    print("cuML available - GPU acceleration enabled")
except ImportError:
    CUML_AVAILABLE = False
    print("cuML not available - using CPU preprocessing")

# Set mixed precision for GPU optimization
tf.keras.mixed_precision.set_global_policy('mixed_float16')

# Configure GPU memory growth
# Removed GPU memory growth configuration as it conflicts with cuML
# physical_devices = tf.config.experimental.list_physical_devices('GPU')
# if len(physical_devices) > 0:
#     for gpu in physical_devices:
#         tf.config.experimental.set_memory_growth(gpu, True)
#     print(f"GPU configuration complete: {len(physical_devices)} GPU(s) found")
# else:
#     print("No GPU found - using CPU")


# Constants for the model
MAX_SEQUENCE_LENGTH = 50  # Maximum sequence length
NUM_IMU_FEATURES = 7      # acc_x, acc_y, acc_z, rot_w, rot_x, rot_y, rot_z
NUM_THERMOPILE = 5        # thm_1 through thm_5
NUM_TOF_SENSORS = 5       # Time of Flight sensors
TOF_PIXELS_PER_SENSOR = 64  # 8x8 grid per ToF sensor
NUM_DEMOGRAPHIC_FEATURES = 7  # adult_child, age, sex, handedness, height, shoulder_to_wrist, elbow_to_wrist

def load_and_preprocess_data(train_file, train_demographics_file, test_file=None, test_demographics_file=None):
    """
    Load and preprocess the BFRB dataset with sensor and demographic data

    Args:
        train_file: Path to training CSV file
        train_demographics_file: Path to training demographics CSV file
        test_file: Optional path to test CSV file
        test_demographics_file: Optional path to test demographics CSV file

    Returns:
        Dictionary containing processed data for model training and evaluation
    """
    print("Loading data...")

    # Load training data with a limit of 1000 rows
    if CUML_AVAILABLE:
        train_df = cudf.read_csv(train_file)
        train_demo_df = cudf.read_csv(train_demographics_file)
    else:
        train_df = pd.read_csv(train_file)
        train_demo_df = pd.read_csv(train_demographics_file)

    # Process training sequences
    print("Processing training sequences...")
    train_sequences = process_sequences(train_df, train_demo_df)

    # Prepare training data for model
    print("Preparing training data for model...")
    X_train, y_train = prepare_model_data(train_sequences)

    # Always create train/validation split for training
    if CUML_AVAILABLE:
        # Convert dictionaries to cudf DataFrames or concatenate to single arrays for cuML split
        # For simplicity and compatibility with the model's dictionary input, we'll use pandas split
        # or manually split the indices and then subset the dictionaries.
        # Let's stick to the manual index splitting approach which works for both pandas and cuML arrays if converted.

        # Convert CuPy arrays in X_train and y_train to NumPy for sklearn split
        X_train_np = {key: value.get() if hasattr(value, 'get') else value for key, value in X_train.items()}
        y_train_np = {key: value.get() if hasattr(value, 'get') else value for key, value in y_train.items()}


        train_indices, val_indices = train_test_split(
            range(len(X_train_np['imu_input'])), test_size=0.2, random_state=42
        )

        X_train_split = {key: value[train_indices] for key, value in X_train.items()}
        X_val = {key: value[val_indices] for key, value in X_train.items()}
        y_train_split = {key: value[train_indices] for key, value in y_train.items()}
        y_val = {key: value[val_indices] for key, value in y_train.items()}


    else:
        # For dictionary inputs, we need to split each component
        train_indices, val_indices = train_test_split(
            range(len(X_train['imu_input'])), test_size=0.2, random_state=42
        )

        X_train_split = {key: value[train_indices] for key, value in X_train.items()}
        X_val = {key: value[val_indices] for key, value in X_train.items()}
        y_train_split = {key: value[train_indices] for key, value in y_train.items()}
        y_val = {key: value[val_indices] for key, value in y_train.items()}

    # Prepare return dictionary with training and validation data
    data_dict = {
        'X_train': X_train_split,
        'y_train': y_train_split,
        'X_val': X_val,
        'y_val': y_val
    }

    # Load and process test data if provided
    if test_file and test_demographics_file:
        print("Loading test data...")
        if CUML_AVAILABLE:
            test_df = cudf.read_csv(test_file)
            test_demo_df = cudf.read_csv(test_demographics_file)
        else:
            test_df = pd.read_csv(test_file)
            test_demo_df = pd.read_csv(test_demographics_file)

        print("Processing test sequences...")
        test_sequences = process_sequences(test_df, test_demo_df, is_train=False)

        print("Preparing test data for model...")
        X_test, sequence_ids = prepare_model_data(test_sequences, is_train=False)

        # Add test data to dictionary
        data_dict['X_test'] = X_test
        data_dict['sequence_ids'] = sequence_ids

    return data_dict

def process_sequences(df, demo_df, is_train=True):
    """
    Process raw dataframe into sequence data with demographic information
    """
    sequences = []

    # Get unique sequence IDs and convert to host array if using cuML
    if CUML_AVAILABLE:
        sequence_ids = df['sequence_id'].unique().values_host
    else:
        sequence_ids = df['sequence_id'].unique()

    for seq_id in sequence_ids:
        seq_data = df[df['sequence_id'] == seq_id]

        # Skip sequence if empty
        if len(seq_data) == 0:
            print(f"Warning: Empty sequence data for sequence ID {seq_id}")
            continue

        # Convert to pandas if using cuML for easier processing
        if CUML_AVAILABLE:
            seq_data = seq_data.to_pandas()
            demo_df_pandas = demo_df.to_pandas()
        else:
            demo_df_pandas = demo_df

        # Get subject ID for this sequence
        subject_id = seq_data['subject'].iloc[0]

        # Get demographic data for this subject
        subject_demo = demo_df_pandas[demo_df_pandas['subject'] == subject_id]

        if len(subject_demo) == 0:
            print(f"Warning: No demographic data found for subject {subject_id}")
            continue

        # Extract demographic features and handle potential nulls
        demo_features = [
            subject_demo['adult_child'].fillna(0).iloc[0],
            subject_demo['age'].fillna(0).iloc[0],
            subject_demo['sex'].fillna(0).iloc[0],
            subject_demo['handedness'].fillna(0).iloc[0],
            subject_demo['height_cm'].fillna(0).iloc[0],
            subject_demo['shoulder_to_wrist_cm'].fillna(0).iloc[0],
            subject_demo['elbow_to_wrist_cm'].fillna(0).iloc[0]
        ]

        # Extract sequence features
        imu_cols = [col for col in seq_data.columns if col.startswith('acc_') or col.startswith('rot_')]
        thm_cols = [col for col in seq_data.columns if col.startswith('thm_')]
        tof_cols = [col for col in seq_data.columns if col.startswith('tof_')]

        # Handle sequence length
        if len(seq_data) < MAX_SEQUENCE_LENGTH:
            padding_needed = MAX_SEQUENCE_LENGTH - len(seq_data)
            last_row = seq_data.iloc[-1:].copy()
            for _ in range(padding_needed):
                seq_data = pd.concat([seq_data, last_row])
        elif len(seq_data) > MAX_SEQUENCE_LENGTH:
            seq_data = seq_data.iloc[:MAX_SEQUENCE_LENGTH]

        # Extract data and convert to numpy arrays
        imu_data = seq_data[imu_cols].fillna(0).values
        thm_data = seq_data[thm_cols].fillna(0).values
        tof_data = seq_data[tof_cols].fillna(0).values

        # Create mask for missing ToF data
        tof_mask = (tof_data != -1).astype(np.float32)
        tof_data = np.where(tof_data == -1, 0, tof_data)

        sequence = {
            'sequence_id': seq_id,
            'imu_data': imu_data,
            'thm_data': thm_data,
            'tof_data': tof_data,
            'tof_mask': tof_mask,
            'demographic': demo_features,
        }

        # Add target values for training data
        if is_train:
            sequence_type = seq_data['sequence_type'].iloc[0]
            gesture = seq_data['gesture'].iloc[0]
            binary_target = 1 if sequence_type == 'target' else 0

            sequence['binary_target'] = binary_target
            sequence['gesture'] = gesture

        sequences.append(sequence)

    return sequences

def prepare_model_data(sequences, is_train=True):
    """
    Prepare sequences for model input

    Args:
        sequences: List of processed sequence dictionaries
        is_train: Boolean indicating if this is training data

    Returns:
        Model inputs and targets (for training) or sequence IDs (for testing)
    """
    # Initialize lists for inputs
    imu_data = []
    thm_data = []
    tof_data = []
    tof_mask = []
    demo_data = []

    # Targets for training data
    binary_targets = []
    gesture_targets = []
    sequence_ids = []

    # Process each sequence
    for seq in sequences:
        imu_data.append(seq['imu_data'])
        thm_data.append(seq['thm_data'])
        tof_data.append(seq['tof_data'])
        tof_mask.append(seq['tof_mask'])
        demo_data.append(seq['demographic'])
        sequence_ids.append(seq['sequence_id'])

        if is_train:
            binary_targets.append(seq['binary_target'])
            gesture_targets.append(seq['gesture'])

    # Helper function to convert CuPy arrays to NumPy
    def cupy_to_numpy(data):
        """Convert CuPy arrays to NumPy arrays if necessary"""
        if hasattr(data, 'get'):  # CuPy array
            return data.get()
        elif isinstance(data, list):
            return [cupy_to_numpy(item) for item in data]
        else:
            return data

    # Convert CuPy arrays to NumPy arrays if necessary
    imu_data = [cupy_to_numpy(item) for item in imu_data]
    thm_data = [cupy_to_numpy(item) for item in thm_data]
    tof_data = [cupy_to_numpy(item) for item in tof_data]
    tof_mask = [cupy_to_numpy(item) for item in tof_mask]
    demo_data = [cupy_to_numpy(item) for item in demo_data]

    # Now safely convert to numpy arrays
    imu_data = np.array(imu_data)
    thm_data = np.array(thm_data)
    tof_data = np.array(tof_data)
    tof_mask = np.array(tof_mask)
    demo_data = np.array(demo_data)

    # Use sklearn scalers for TensorFlow compatibility
    from sklearn.preprocessing import StandardScaler as SKStandardScaler
    imu_scaler = SKStandardScaler()
    thm_scaler = SKStandardScaler()
    demo_scaler = SKStandardScaler()

    # Reshape for scaling
    imu_shape = imu_data.shape
    thm_shape = thm_data.shape

    imu_flat = imu_data.reshape(-1, imu_shape[2])
    thm_flat = thm_data.reshape(-1, thm_shape[2])

    # Fit and transform
    imu_flat = imu_scaler.fit_transform(imu_flat)
    thm_flat = thm_scaler.fit_transform(thm_flat)
    demo_data = demo_scaler.fit_transform(demo_data)

    # Reshape back
    imu_data = imu_flat.reshape(imu_shape)
    thm_data = thm_flat.reshape(thm_shape)

    # Normalize ToF data (valid values range from 0-254)
    # Only normalize non-masked values
    tof_data = np.where(tof_mask == 1, tof_data / 254.0, 0)

    # Create model inputs dictionary
    X = {
        'imu_input': imu_data,
        'thm_input': thm_data,
        'tof_input': tof_data,
        'tof_mask': tof_mask,
        'demo_input': demo_data
    }

    if is_train:
        # Convert targets to numpy arrays with CuPy handling
        binary_targets = cupy_to_numpy(binary_targets)
        gesture_targets = cupy_to_numpy(gesture_targets)

        binary_targets = np.array(binary_targets)

        # Handle gesture targets
        label_encoder = LabelEncoder()
        gesture_targets = label_encoder.fit_transform(gesture_targets)
        gesture_targets = np.array(gesture_targets)

        # Create targets dictionary
        y = {
            'binary_output': binary_targets,
            'multiclass_output': gesture_targets
        }

        return X, y
    else:
        return X, sequence_ids

def build_multimodal_cnn(
    sequence_length=MAX_SEQUENCE_LENGTH,
    num_imu_features=NUM_IMU_FEATURES,
    num_thermopile=NUM_THERMOPILE,
    num_tof_sensors=NUM_TOF_SENSORS,
    tof_pixels=TOF_PIXELS_PER_SENSOR,
    num_demographic=NUM_DEMOGRAPHIC_FEATURES,
    num_gestures=19  # 8 BFRB + 10 non-BFRB + 1 non-target
):
    """
    Build multimodal CNN for BFRB detection

    Args:
        sequence_length: Length of input sequences
        num_imu_features: Number of IMU features
        num_thermopile: Number of thermopile sensors
        num_tof_sensors: Number of ToF sensors
        tof_pixels: Pixels per ToF sensor
        num_demographic: Number of demographic features
        num_gestures: Number of gesture classes

    Returns:
        Compiled Keras model
    """
    # Input layers
    imu_input = Input(shape=(sequence_length, num_imu_features), name='imu_input')
    thm_input = Input(shape=(sequence_length, num_thermopile), name='thm_input')
    tof_input = Input(shape=(sequence_length, num_tof_sensors * tof_pixels), name='tof_input')
    tof_mask = Input(shape=(sequence_length, num_tof_sensors * tof_pixels), name='tof_mask')
    demo_input = Input(shape=(num_demographic,), name='demo_input')

    # IMU branch - 1D CNN
    x_imu = layers.Conv1D(32, 3, activation='relu', padding='same')(imu_input)
    x_imu = layers.BatchNormalization()(x_imu)
    x_imu = layers.MaxPooling1D(2)(x_imu)

    x_imu = layers.Conv1D(64, 3, activation='relu', padding='same')(x_imu)
    x_imu = layers.BatchNormalization()(x_imu)
    x_imu = layers.MaxPooling1D(2)(x_imu)

    x_imu = layers.Conv1D(128, 3, activation='relu', padding='same')(x_imu)
    x_imu = layers.BatchNormalization()(x_imu)
    x_imu = layers.GlobalAveragePooling1D()(x_imu)
    x_imu = layers.Dropout(0.3)(x_imu)

    # Thermopile branch - 1D CNN
    x_thm = layers.Conv1D(16, 3, activation='relu', padding='same')(thm_input)
    x_thm = layers.BatchNormalization()(x_thm)
    x_thm = layers.MaxPooling1D(2)(x_thm)

    x_thm = layers.Conv1D(32, 3, activation='relu', padding='same')(x_thm)
    x_thm = layers.BatchNormalization()(x_thm)
    x_thm = layers.GlobalAveragePooling1D()(x_thm)
    x_thm = layers.Dropout(0.3)(x_thm)

    # ToF branch - Reshape and apply 3D CNN
    # Reshape to (batch, time, sensors, 8, 8, 1) for 3D CNN
    x_tof = layers.Reshape((sequence_length, num_tof_sensors, 8, 8))(tof_input)

    # Apply mask by multiplying with reshaped mask
    mask_reshaped = layers.Reshape((sequence_length, num_tof_sensors, 8, 8))(tof_mask)
    x_tof = layers.Multiply()([x_tof, mask_reshaped])

    # Expand dimensions for conv3d
    x_tof = layers.Lambda(lambda x: tf.expand_dims(x, axis=-1))(x_tof)

    # Apply 3D CNN using TimeDistributed to process each time step
    x_tof = layers.TimeDistributed(
        layers.Conv3D(16, (1, 3, 3), activation='relu', padding='same')
    )(x_tof)
    x_tof = layers.TimeDistributed(layers.BatchNormalization())(x_tof)
    x_tof = layers.TimeDistributed(layers.MaxPooling3D(pool_size=(1, 2, 2)))(x_tof)

    x_tof = layers.TimeDistributed(
        layers.Conv3D(32, (1, 3, 3), activation='relu', padding='same')
    )(x_tof)
    x_tof = layers.TimeDistributed(layers.BatchNormalization())(x_tof)

    # Reshape and apply 1D convolution across time
    x_tof = layers.TimeDistributed(
        layers.Reshape((-1,))
    )(x_tof)

    x_tof = layers.Conv1D(64, 3, activation='relu', padding='same')(x_tof)
    x_tof = layers.BatchNormalization()(x_tof)
    x_tof = layers.GlobalAveragePooling1D()(x_tof)
    x_tof = layers.Dropout(0.3)(x_tof)

    # Demographic branch - Dense network
    x_demo = layers.Dense(32, activation='relu')(demo_input)
    x_demo = layers.BatchNormalization()(x_demo)
    x_demo = layers.Dropout(0.3)(x_demo)

    # Weighted fusion of branches based on ToF mask availability
    # Calculate fraction of valid ToF data
    tof_availability = layers.Lambda(
        lambda x: tf.reduce_mean(x, axis=[1, 2])
    )(tof_mask)

    # Create fusion weights - if ToF data available, use it more
    tof_weight = layers.Lambda(lambda x: x)(tof_availability)
    imu_weight = layers.Lambda(lambda x: tf.ones_like(x))(tof_availability)
    thm_weight = layers.Lambda(lambda x: tf.ones_like(x))(tof_availability)

    # Normalize weights to sum to 1
    total_weight = layers.Add()([imu_weight, thm_weight, tof_weight])
    imu_weight = layers.Lambda(lambda x: x[0]/x[1])([imu_weight, total_weight])
    thm_weight = layers.Lambda(lambda x: x[0]/x[1])([thm_weight, total_weight])
    tof_weight = layers.Lambda(lambda x: x[0]/x[1])([tof_weight, total_weight])

    # Reshape weights for multiplication
    imu_weight = layers.Reshape((1,))(imu_weight)
    thm_weight = layers.Reshape((1,))(thm_weight)
    tof_weight = layers.Reshape((1,))(tof_weight)

    # Apply weights by broadcasting
    x_imu_weighted = layers.Multiply()([x_imu, imu_weight])
    x_thm_weighted = layers.Multiply()([x_thm, thm_weight])
    x_tof_weighted = layers.Multiply()([x_tof, tof_weight])

    # Concatenate all features
    merged = layers.Concatenate()([x_imu_weighted, x_thm_weighted, x_tof_weighted, x_demo])

    # Common layers
    shared = layers.Dense(128, activation='relu')(merged)
    shared = layers.BatchNormalization()(shared)
    shared = layers.Dropout(0.5)(shared)

    # Output layers
    binary_output = layers.Dense(1, activation='sigmoid', name='binary_output')(shared)
    multiclass_output = layers.Dense(num_gestures, activation='softmax', name='multiclass_output')(shared)

    # Create model
    model = Model(
        inputs=[imu_input, thm_input, tof_input, tof_mask, demo_input],
        outputs=[binary_output, multiclass_output]
    )

    return model

# Custom F1 Score implementation for TensorFlow
class F1Score(tf.keras.metrics.Metric):
    def __init__(self, name='f1_score', **kwargs):
        super(F1Score, self).__init__(name=name, **kwargs)
        self.precision = tf.keras.metrics.Precision() # Use fully qualified name
        self.recall = tf.keras.metrics.Recall() # Use fully qualified name

    def update_state(self, y_true, y_pred, sample_weight=None):
        # Threshold predictions to get binary values
        y_pred_binary = tf.cast(tf.greater_equal(y_pred, 0.5), tf.float32)

        # Update precision and recall
        self.precision.update_state(y_true, y_pred_binary, sample_weight)
        self.recall.update_state(y_true, y_pred_binary, sample_weight)

    def result(self):
        # Calculate F1 from precision and recall
        p = self.precision.result()
        r = self.recall.result()
        return 2 * ((p * r) / (p + r + tf.keras.backend.epsilon()))

    def reset_states(self):
        self.precision.reset_states()
        self.recall.reset_states()

# Custom Macro F1 for multi-class classification
class MacroF1Score(tf.keras.metrics.Metric):
    def __init__(self, num_classes, name='macro_f1_score', **kwargs):
        super(MacroF1Score, self).__init__(name=name, **kwargs)
        self.num_classes = num_classes

        # Fix: Use explicit parameter names
        self.true_positives = self.add_weight(
            shape=(num_classes,),
            name='true_positives',
            initializer='zeros'
        )
        self.false_positives = self.add_weight(
            shape=(num_classes,),
            name='false_positives',
            initializer='zeros'
        )
        self.false_negatives = self.add_weight(
            shape=(num_classes,),
            name='false_negatives',
            initializer='zeros'
        )

    def update_state(self, y_true, y_pred, sample_weight=None):
        # Convert predictions to one-hot
        y_pred_classes = tf.argmax(y_pred, axis=1)
        y_pred_one_hot = tf.one_hot(y_pred_classes, self.num_classes)

        # Convert sparse y_true to one-hot
        if len(y_true.shape) == 1 or y_true.shape[1] == 1:
            y_true_one_hot = tf.one_hot(tf.cast(y_true, tf.int32), self.num_classes)
        else:
            y_true_one_hot = y_true

        # Calculate true positives, false positives, false negatives
        tp = tf.reduce_sum(y_true_one_hot * y_pred_one_hot, axis=0)
        fp = tf.reduce_sum((1 - y_true_one_hot) * y_pred_one_hot, axis=0)
        fn = tf.reduce_sum(y_true_one_hot * (1 - y_pred_one_hot), axis=0)

        # Update states
        self.true_positives.assign_add(tp)
        self.false_positives.assign_add(fp)
        self.false_negatives.assign_add(fn)

    def result(self):
        # Calculate precision and recall
        precision = self.true_positives / (self.true_positives + self.false_positives + tf.keras.backend.epsilon())
        recall = self.true_positives / (self.true_positives + self.false_negatives + tf.keras.backend.epsilon())

        # Calculate F1 score per class
        f1_per_class = 2 * ((precision * recall) / (precision + recall + tf.keras.backend.epsilon()))

        # Return macro average
        return tf.reduce_mean(f1_per_class)

    def reset_states(self):
        self.true_positives.assign(tf.zeros_like(self.true_positives))
        self.false_positives.assign(tf.zeros_like(self.false_positives))
        self.false_negatives.assign(tf.zeros_like(self.false_negatives))

def train_model(model, data, epochs=50, batch_size=32, output_dir='models'):
    """
    Train the BFRB detection model

    Args:
        model: Compiled Keras model
        data: Dictionary containing training and validation data
        epochs: Number of training epochs
        batch_size: Batch size for training
        output_dir: Directory to save model files

    Returns:
        Trained model and training history
    """
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)

    # Prepare callbacks
    callbacks = [
        # Early stopping to prevent overfitting
        EarlyStopping(
            monitor='val_loss',
            patience=10,
            restore_best_weights=True,
            verbose=1
        ),

        # Reduce learning rate when training plateaus
        ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,
            patience=5,
            min_lr=1e-6,
            verbose=1
        ),

        # Save best model
        ModelCheckpoint(
            filepath=os.path.join(output_dir, 'best_model.keras'),  # Use .keras extension
            monitor='val_loss',
            save_best_only=True,
            verbose=1
        )

    ]

    # Train model
    print("Starting model training...")
    history = model.fit(
        x=data['X_train'],
        y=data['y_train'],
        batch_size=batch_size,
        epochs=epochs,
        validation_data=(data['X_val'], data['y_val']),
        callbacks=callbacks,
        verbose=1
    )

    # Save final model
    model.save(os.path.join(output_dir, 'final_model.keras'))  # Use .keras extension

    # Save training history
    pd.DataFrame(history.history).to_csv(
        os.path.join(output_dir, 'training_history.csv'),
        index=False
    )

    print(f"Model training complete. Models saved to {output_dir}")

    return model, history

def evaluate_model(model, data):
    """
    Evaluate the BFRB detection model

    Args:
        model: Trained Keras model
        data: Dictionary containing evaluation data

    Returns:
        Dictionary of evaluation metrics
    """
    print("Evaluating model...")

    # Evaluate on validation data
    results = model.evaluate(
        data['X_val'],
        data['y_val'],
        verbose=1
    )

    # Extract metrics
    metrics = {}
    for i, metric_name in enumerate(model.metrics_names):
        metrics[metric_name] = results[i]

    print("Evaluation metrics:")
    for name, value in metrics.items():
        print(f"{name}: {value:.4f}")

    return metrics

def predict_and_save(model, data, output_file):
    """
    Generate predictions and save to file for competition submission

    Args:
        model: Trained Keras model
        data: Dictionary containing test data
        output_file: Path to save predictions
    """
    print("Generating predictions...")

    # Make predictions
    predictions = model.predict(data['X_test'])

    # Extract binary and multiclass predictions
    binary_preds = predictions[0]
    multiclass_preds = predictions[1]

    # Convert to binary predictions (0 or 1)
    binary_classes = (binary_preds > 0.5).astype(int).flatten()

    # Get class with highest probability for multiclass
    multiclass_classes = np.argmax(multiclass_preds, axis=1)

    # Map predictions to gestures
    # Assuming class 0 is non-target, and classes 1-18 are specific gestures
    # This mapping would need to be adjusted based on your actual class encoding
    gesture_map = {
        0: "non_target",
        # Add mappings for other gesture classes based on your encoding
    }

    # Create output dataframe
    output_df = pd.DataFrame({
        'sequence_id': data['sequence_ids'],
        'is_target': binary_classes,
        'gesture_class': multiclass_classes,
        'gesture': [gesture_map.get(cls, f"gesture_{cls}") for cls in multiclass_classes]
    })

    # Save predictions
    output_df.to_csv(output_file, index=False)
    print(f"Predictions saved to {output_file}")

def save_model(model, model_dir):
    """
    Save the trained model in multiple formats
    """
    # Create directory if it doesn't exist
    os.makedirs(model_dir, exist_ok=True)

    # Save in Keras native format (recommended)
    model.save(os.path.join(model_dir, 'model.keras'))

    # Save as SavedModel using export (for TFLite/TFServing)
    model.export(os.path.join(model_dir, 'savedmodel'))

    # Save in H5 format for compatibility
    model.save(os.path.join(model_dir, 'model.h5'))

    # Save model architecture as JSON
    model_json = model.to_json()
    with open(os.path.join(model_dir, 'model_architecture.json'), 'w') as f:
        f.write(model_json)

    # Save model weights separately
    model.save_weights(os.path.join(model_dir, 'model.weights.h5'))

    print(f"Model saved to {model_dir} in multiple formats")

def load_model(model_dir):
    """
    Load a trained model from a directory
    """
    try:
        # Try loading Keras native format first
        model = tf.keras.models.load_model(os.path.join(model_dir, 'model.keras'))
        print(f"Loaded model from {model_dir} (Keras format)")
    except:
        try:
            # Try loading H5 format
            model = tf.keras.models.load_model(os.path.join(model_dir, 'model.h5'))
            print(f"Loaded model from {model_dir} (H5 format)")
        except:
            # Load architecture and weights separately
            with open(os.path.join(model_dir, 'model_architecture.json'), 'r') as f:
                model_json = f.read()

            model = tf.keras.models.model_from_json(model_json)
            model.load_weights(os.path.join(model_dir, 'model.weights.h5'))

            # Recompile model (add your custom metrics here)
            model.compile(
                optimizer=Adam(learning_rate=1e-3),
                loss={
                    'binary_output': BinaryCrossentropy(),
                    'multiclass_output': SparseCategoricalCrossentropy()
                },
                metrics={
                    'binary_output': [F1Score()],
                    'multiclass_output': ['accuracy']
                }
            )

            print(f"Loaded model from {model_dir} (architecture + weights)")

    return model

def main():
    """
    Main execution function
    """
    # GPU and mixed precision configuration is done at the top of the script

    # Define paths to data files
    train_file = 'train.csv'
    train_demo_file = 'train_demographics.csv'
    test_file = 'test.csv'
    test_demo_file = 'test_demographics.csv'

    # Load and preprocess data
    data = load_and_preprocess_data(
        train_file,
        train_demo_file,
        test_file,
        test_demo_file
    )

    # Build model
    model = build_multimodal_cnn()

    # Compile model with custom metrics
    model.compile(
        optimizer=Adam(learning_rate=1e-3),
        loss={
            'binary_output': BinaryCrossentropy(),
            'multiclass_output': SparseCategoricalCrossentropy()
        },
        metrics={
            'binary_output': [F1Score()],
            'multiclass_output': ['accuracy', MacroF1Score(num_classes=19)]
        },
        # Equal weight for both tasks (binary and multiclass)
        loss_weights={
            'binary_output': 0.5,
            'multiclass_output': 0.5
        }
    )

    # Print model summary
    model.summary()

    # Train model if we have training data
    if 'X_train' in data and 'y_train' in data:
        model, history = train_model(
            model,
            data,
            epochs=50,
            batch_size=32,
            output_dir='bfrb_model'
        )

        # Evaluate model
        if 'X_val' in data and 'y_val' in data:
            metrics = evaluate_model(model, data)

    # Generate predictions if we have test data
    if 'X_test' in data:
        predict_and_save(model, data, 'bfrb_predictions.csv')

    # Save model
    save_model(model, 'bfrb_model')

    print("BFRB detection pipeline complete")

if __name__ == "__main__":
    main()

cuML available - GPU acceleration enabled
Loading data...


FileNotFoundError: ['train.csv'] could not be resolved to any files

In [1]:
import zipfile
with zipfile.ZipFile("cmi-detect-behavior-with-sensor-data.zip", 'r') as zip_ref:
    zip_ref.extractall("./")