In [10]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from sklearn.preprocessing import StandardScaler, LabelEncoder, RobustScaler
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_sample_weight
from sklearn.metrics import classification_report, confusion_matrix, precision_recall_curve, f1_score
import warnings
warnings.filterwarnings('ignore')

# Try to import cuML for GPU acceleration
try:
    import cudf
    import cuml
    from cuml.preprocessing import StandardScaler as cuMLStandardScaler
    from cuml.model_selection import train_test_split as cuml_train_test_split
    CUML_AVAILABLE = True
    print("cuML available - GPU acceleration enabled")
except ImportError:
    CUML_AVAILABLE = False
    print("cuML not available - using CPU preprocessing")

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Constants for the model
MAX_SEQUENCE_LENGTH = 50
NUM_IMU_FEATURES = 7
NUM_THERMOPILE = 5
NUM_TOF_SENSORS = 5
TOF_PIXELS_PER_SENSOR = 64
NUM_DEMOGRAPHIC_FEATURES = 7

# Global variables for class weights
BINARY_CLASS_WEIGHTS = None
MULTICLASS_CLASS_WEIGHTS = None

class AsymmetricFocalLoss(nn.Module):
    def __init__(self, alpha_pos=0.25, alpha_neg=0.75, gamma_pos=1.0, gamma_neg=4.0):
        """
        Asymmetric Focal Loss for binary classification to improve precision

        Args:
            alpha_pos: Weight for positive samples (lower to reduce false positives)
            alpha_neg: Weight for negative samples (higher to emphasize true negatives)
            gamma_pos: Focusing parameter for positive samples (lower focusing)
            gamma_neg: Focusing parameter for negative samples (higher focusing on hard negatives)
        """
        super(AsymmetricFocalLoss, self).__init__()
        self.alpha_pos = alpha_pos
        self.alpha_neg = alpha_neg
        self.gamma_pos = gamma_pos
        self.gamma_neg = gamma_neg

    def forward(self, y_pred, y_true):
        y_true = y_true.float()
        y_pred = torch.clamp(y_pred, min=1e-8, max=1.0 - 1e-8)

        # Calculate asymmetric focal loss components
        # For positive samples (y_true = 1)
        pos_loss = -self.alpha_pos * torch.pow(1 - y_pred, self.gamma_pos) * torch.log(y_pred)

        # For negative samples (y_true = 0)
        neg_loss = -self.alpha_neg * torch.pow(y_pred, self.gamma_neg) * torch.log(1 - y_pred)

        # Apply based on true labels
        loss = torch.where(y_true == 1, pos_loss, neg_loss)

        return torch.mean(loss)

class SparseCategoricalFocalLoss(nn.Module):
    def __init__(self, alpha=2.0, gamma=2.5, num_classes=18):
        super(SparseCategoricalFocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.num_classes = num_classes

    def forward(self, y_pred, y_true):
        y_true = y_true.long()
        y_pred = F.softmax(y_pred, dim=1)
        y_pred = torch.clamp(y_pred, min=1e-8, max=1.0 - 1e-8)

        # Convert sparse labels to one-hot
        y_true_one_hot = F.one_hot(y_true, num_classes=self.num_classes).float()

        # Calculate focal loss with class-aware weighting
        ce_loss = -torch.sum(y_true_one_hot * torch.log(y_pred), dim=1)
        p_t = torch.sum(y_true_one_hot * y_pred, dim=1)
        focal_weight = self.alpha * torch.pow(1 - p_t, self.gamma)

        focal_loss = focal_weight * ce_loss
        return torch.mean(focal_loss)

# Custom Metrics
class WeightedF1Score:
    def __init__(self):
        self.reset()

    def reset(self):
        self.tp = 0
        self.fp = 0
        self.fn = 0

    def update(self, y_pred, y_true):
        y_pred_binary = (y_pred >= 0.5).float()
        self.tp += torch.sum((y_pred_binary == 1) & (y_true == 1)).item()
        self.fp += torch.sum((y_pred_binary == 1) & (y_true == 0)).item()
        self.fn += torch.sum((y_pred_binary == 0) & (y_true == 1)).item()

    def compute(self):
        precision = self.tp / (self.tp + self.fp + 1e-8)
        recall = self.tp / (self.tp + self.fn + 1e-8)
        f1 = 2 * (precision * recall) / (precision + recall + 1e-8)
        return f1

class MacroF1Score:
    def __init__(self, num_classes):
        self.num_classes = num_classes
        self.reset()

    def reset(self):
        self.tp = torch.zeros(self.num_classes)
        self.fp = torch.zeros(self.num_classes)
        self.fn = torch.zeros(self.num_classes)

    def update(self, y_pred, y_true):
        y_pred_classes = torch.argmax(y_pred, dim=1)

        for i in range(self.num_classes):
            self.tp[i] += torch.sum((y_pred_classes == i) & (y_true == i)).item()
            self.fp[i] += torch.sum((y_pred_classes == i) & (y_true != i)).item()
            self.fn[i] += torch.sum((y_pred_classes != i) & (y_true == i)).item()

    def compute(self):
        precision = self.tp / (self.tp + self.fp + 1e-8)
        recall = self.tp / (self.tp + self.fn + 1e-8)
        f1_per_class = 2 * (precision * recall) / (precision + recall + 1e-8)
        return torch.mean(f1_per_class).item()

class PrecisionAtRecall:
    def __init__(self, target_recall=0.95):
        self.target_recall = target_recall
        self.predictions = []
        self.targets = []

    def reset(self):
        self.predictions = []
        self.targets = []

    def update(self, y_pred, y_true):
        self.predictions.extend(y_pred.cpu().detach().numpy())
        self.targets.extend(y_true.cpu().detach().numpy())

    def compute(self):
        if len(self.predictions) == 0:
            return 0.0

        precision, recall, thresholds = precision_recall_curve(
            self.targets, self.predictions
        )

        # Find precision at target recall
        valid_indices = recall >= self.target_recall
        if np.any(valid_indices):
            return np.max(precision[valid_indices])
        return 0.0

# Time Series Data Augmentation Functions
def time_series_augmentation(data, augment_prob=0.5):
    """Apply various time series augmentation techniques"""
    if np.random.random() < augment_prob:
        augmentation_type = np.random.choice(['jitter', 'scale', 'time_warp', 'magnitude_warp'])

        if augmentation_type == 'jitter':
            # Add random noise
            noise = np.random.normal(0, 0.03, data.shape)
            return data + noise

        elif augmentation_type == 'scale':
            # Scale the magnitude
            scale_factor = np.random.uniform(0.8, 1.2)
            return data * scale_factor

        elif augmentation_type == 'time_warp':
            # Time warping by random sampling
            seq_len = data.shape[0]
            indices = np.sort(np.random.choice(seq_len, int(seq_len * 0.9), replace=False))
            warped_data = np.zeros_like(data)
            warped_indices = np.linspace(0, seq_len-1, len(indices)).astype(int)
            warped_data[warped_indices] = data[indices]
            return warped_data

        elif augmentation_type == 'magnitude_warp':
            # Smooth magnitude warping
            warp = np.random.normal(1.0, 0.1, data.shape[1])
            return data * warp

    return data

def load_and_preprocess_data(train_file, train_demographics_file, test_file=None, test_demographics_file=None):
    """Enhanced data loading with proper class balance analysis"""
    print("Loading data...")

    if CUML_AVAILABLE:
        train_df = cudf.read_csv(train_file)
        train_demo_df = cudf.read_csv(train_demographics_file)
    else:
        train_df = pd.read_csv(train_file)
        train_demo_df = pd.read_csv(train_demographics_file)

    print("Processing training sequences...")
    train_sequences = process_sequences(train_df, train_demo_df)

    print("Preparing training data for model...")
    X_train, y_train = prepare_model_data(train_sequences)

    # Analyze class distribution with proper handling
    binary_counts = np.bincount(y_train['binary_output'])
    multiclass_counts = np.bincount(y_train['multiclass_output'])

    print(f"Binary class distribution: {binary_counts}")
    print(f"Multiclass class distribution: {multiclass_counts}")
    print(f"Number of unique multiclass labels: {len(multiclass_counts)}")
    print(f"Class imbalance ratio: {binary_counts[0]/binary_counts[1] if len(binary_counts) > 1 else 'No positive samples'}")

    # Calculate improved class weights for imbalanced data
    global BINARY_CLASS_WEIGHTS, MULTICLASS_CLASS_WEIGHTS

    if len(binary_counts) > 1:
        # More aggressive rebalancing for binary classification
        pos_weight = binary_counts[0] / binary_counts[1]
        BINARY_CLASS_WEIGHTS = np.array([1.0, pos_weight * 1.5])  # Extra weight for positive class
        print(f"Enhanced binary class weights: {BINARY_CLASS_WEIGHTS}")
    else:
        BINARY_CLASS_WEIGHTS = np.array([1.0, 1.0])

    # Enhanced multiclass class weights with stronger minority class support
    unique_classes = np.unique(y_train['multiclass_output'])
    n_samples = len(y_train['multiclass_output'])
    n_classes = len(unique_classes)

    # Calculate balanced weights with extra boost for very rare classes
    class_weights = []
    for class_id in unique_classes:
        class_count = np.sum(y_train['multiclass_output'] == class_id)
        # Standard balanced weight with additional minority boost
        base_weight = n_samples / (n_classes * class_count)
        # Extra boost for classes with very few samples (< 5% of average)
        avg_samples = n_samples / n_classes
        if class_count < 0.05 * avg_samples:
            minority_boost = 2.0
        elif class_count < 0.2 * avg_samples:
            minority_boost = 1.5
        else:
            minority_boost = 1.0

        final_weight = base_weight * minority_boost
        class_weights.append(final_weight)

    MULTICLASS_CLASS_WEIGHTS = np.array(class_weights)
    print(f"Enhanced multiclass class weights shape: {MULTICLASS_CLASS_WEIGHTS.shape}")
    print(f"Multiclass weights range: {MULTICLASS_CLASS_WEIGHTS.min():.3f} - {MULTICLASS_CLASS_WEIGHTS.max():.3f}")

    # Create train/validation split with stratification on both tasks
    # Create combined stratification key
    stratify_key = [f"{binary}_{multi}" for binary, multi in zip(y_train['binary_output'], y_train['multiclass_output'])]

    train_indices, val_indices = train_test_split(
        range(len(X_train['imu_input'])),
        test_size=0.2,
        random_state=42,
        stratify=stratify_key  # Stratify on both binary and multiclass
    )

    X_train_split = {key: value[train_indices] for key, value in X_train.items()}
    X_val = {key: value[val_indices] for key, value in X_train.items()}
    y_train_split = {key: value[train_indices] for key, value in y_train.items()}
    y_val = {key: value[val_indices] for key, value in y_train.items()}

    # Verify class distribution in splits
    val_binary_counts = np.bincount(y_val['binary_output'])
    print(f"Validation binary distribution: {val_binary_counts}")

    data_dict = {
        'X_train': X_train_split,
        'y_train': y_train_split,
        'X_val': X_val,
        'y_val': y_val
    }

    # Load test data if provided
    if test_file and test_demographics_file:
        print("Loading test data...")
        if CUML_AVAILABLE:
            test_df = cudf.read_csv(test_file)
            test_demo_df = cudf.read_csv(test_demographics_file)
        else:
            test_df = pd.read_csv(test_file)
            test_demo_df = pd.read_csv(test_demographics_file)

        print("Processing test sequences...")
        test_sequences = process_sequences(test_df, test_demo_df, is_train=False)

        print("Preparing test data for model...")
        X_test, sequence_ids = prepare_model_data(test_sequences, is_train=False)

        data_dict['X_test'] = X_test
        data_dict['sequence_ids'] = sequence_ids

    return data_dict

def process_sequences(df, demo_df, is_train=True):
    """sequence processing with better error handling"""
    sequences = []

    if CUML_AVAILABLE:
        sequence_ids = df['sequence_id'].unique().values_host
    else:
        sequence_ids = df['sequence_id'].unique()

    for seq_id in sequence_ids:
        seq_data = df[df['sequence_id'] == seq_id]

        if len(seq_data) == 0:
            print(f"Warning: Empty sequence data for sequence ID {seq_id}")
            continue

        if CUML_AVAILABLE:
            seq_data = seq_data.to_pandas()
            demo_df_pandas = demo_df.to_pandas()
        else:
            demo_df_pandas = demo_df

        subject_id = seq_data['subject'].iloc[0]
        subject_demo = demo_df_pandas[demo_df_pandas['subject'] == subject_id]

        if len(subject_demo) == 0:
            print(f"Warning: No demographic data found for subject {subject_id}")
            # Use default demographic values
            demo_features = [0.0, 25.0, 0.0, 1.0, 170.0, 52.0, 28.0]
        else:
            demo_features = [
                subject_demo['adult_child'].fillna(0).iloc[0],
                subject_demo['age'].fillna(25).iloc[0],
                subject_demo['sex'].fillna(0).iloc[0],
                subject_demo['handedness'].fillna(1).iloc[0],
                subject_demo['height_cm'].fillna(170).iloc[0],
                subject_demo['shoulder_to_wrist_cm'].fillna(52).iloc[0],
                subject_demo['elbow_to_wrist_cm'].fillna(28).iloc[0]
            ]

        # Extract sensor columns
        imu_cols = [col for col in seq_data.columns if col.startswith('acc_') or col.startswith('rot_')]
        thm_cols = [col for col in seq_data.columns if col.startswith('thm_')]
        tof_cols = [col for col in seq_data.columns if col.startswith('tof_')]

        # Handle sequence length with padding/truncation
        if len(seq_data) < MAX_SEQUENCE_LENGTH:
            padding_needed = MAX_SEQUENCE_LENGTH - len(seq_data)
            last_row = seq_data.iloc[-1:].copy()
            for _ in range(padding_needed):
                seq_data = pd.concat([seq_data, last_row])
        elif len(seq_data) > MAX_SEQUENCE_LENGTH:
            seq_data = seq_data.iloc[:MAX_SEQUENCE_LENGTH]

        # Extract and process sensor data
        imu_data = seq_data[imu_cols].fillna(0).values
        thm_data = seq_data[thm_cols].fillna(0).values
        tof_data = seq_data[tof_cols].fillna(0).values

        # Apply data augmentation for training data
        if is_train:
            imu_data = time_series_augmentation(imu_data)
            thm_data = time_series_augmentation(thm_data)

        # Create ToF mask and normalize
        tof_mask = (tof_data != -1).astype(np.float32)
        tof_data = np.where(tof_data == -1, 0, tof_data)

        sequence = {
            'sequence_id': seq_id,
            'imu_data': imu_data,
            'thm_data': thm_data,
            'tof_data': tof_data,
            'tof_mask': tof_mask,
            'demographic': demo_features,
        }

        if is_train:
            sequence_type = seq_data['sequence_type'].iloc[0]
            gesture = seq_data['gesture'].iloc[0]
            binary_target = 1 if sequence_type == 'Target' else 0

            sequence['binary_target'] = binary_target
            sequence['gesture'] = gesture

        sequences.append(sequence)

    return sequences

def prepare_model_data(sequences, is_train=True):
    """data preparation with improved normalization"""
    imu_data = []
    thm_data = []
    tof_data = []
    tof_mask = []
    demo_data = []
    binary_targets = []
    gesture_targets = []
    sequence_ids = []

    for seq in sequences:
        imu_data.append(seq['imu_data'])
        thm_data.append(seq['thm_data'])
        tof_data.append(seq['tof_data'])
        tof_mask.append(seq['tof_mask'])
        demo_data.append(seq['demographic'])
        sequence_ids.append(seq['sequence_id'])

        if is_train:
            binary_targets.append(seq['binary_target'])
            gesture_targets.append(seq['gesture'])

    # Convert CuPy arrays to NumPy
    def cupy_to_numpy(data):
        if hasattr(data, 'get'):
            return data.get()
        elif isinstance(data, list):
            return [cupy_to_numpy(item) for item in data]
        else:
            return data

    imu_data = [cupy_to_numpy(item) for item in imu_data]
    thm_data = [cupy_to_numpy(item) for item in thm_data]
    tof_data = [cupy_to_numpy(item) for item in tof_data]
    tof_mask = [cupy_to_numpy(item) for item in tof_mask]
    demo_data = [cupy_to_numpy(item) for item in demo_data]

    # Convert to numpy arrays
    imu_data = np.array(imu_data)
    thm_data = np.array(thm_data)
    tof_data = np.array(tof_data)
    tof_mask = np.array(tof_mask)
    demo_data = np.array(demo_data)

    # normalization
    from sklearn.preprocessing import StandardScaler as SKStandardScaler, RobustScaler

    # Use RobustScaler for better outlier handling
    imu_scaler = RobustScaler()
    thm_scaler = RobustScaler()
    demo_scaler = SKStandardScaler()

    # Normalize sensor data
    imu_shape = imu_data.shape
    thm_shape = thm_data.shape

    imu_flat = imu_data.reshape(-1, imu_shape[2])
    thm_flat = thm_data.reshape(-1, thm_shape[2])

    # Fit and transform with outlier-robust scaling
    imu_flat = imu_scaler.fit_transform(imu_flat)
    thm_flat = thm_scaler.fit_transform(thm_flat)
    demo_data = demo_scaler.fit_transform(demo_data)

    imu_data = imu_flat.reshape(imu_shape)
    thm_data = thm_flat.reshape(thm_shape)

    # Improved ToF normalization
    tof_data = np.where(tof_mask == 1, tof_data / 254.0, 0)

    X = {
        'imu_input': imu_data,
        'thm_input': thm_data,
        'tof_input': tof_data,
        'tof_mask': tof_mask,
        'demo_input': demo_data
    }

    if is_train:
        binary_targets = cupy_to_numpy(binary_targets)
        gesture_targets = cupy_to_numpy(gesture_targets)

        binary_targets = np.array(binary_targets)

        label_encoder = LabelEncoder()
        gesture_targets = label_encoder.fit_transform(gesture_targets)
        gesture_targets = np.array(gesture_targets)

        y = {
            'binary_output': binary_targets,
            'multiclass_output': gesture_targets
        }

        return X, y
    else:
        return X, sequence_ids

class MultimodalBFRBCNN(nn.Module):
    def __init__(
        self,
        sequence_length=MAX_SEQUENCE_LENGTH,
        num_imu_features=NUM_IMU_FEATURES,
        num_thermopile=NUM_THERMOPILE,
        num_tof_sensors=NUM_TOF_SENSORS,
        tof_pixels=TOF_PIXELS_PER_SENSOR,
        num_demographic=NUM_DEMOGRAPHIC_FEATURES,
        num_gestures=18
    ):
        super(MultimodalBFRBCNN, self).__init__()

        self.sequence_length = sequence_length
        self.num_tof_sensors = num_tof_sensors

        # IMU branch with residual connections
        self.imu_conv1 = nn.Conv1d(num_imu_features, 64, kernel_size=5, padding=2)
        self.imu_bn1 = nn.BatchNorm1d(64)
        self.imu_dropout1 = nn.Dropout(0.2)

        # Residual block
        self.imu_residual_conv = nn.Conv1d(64, 64, kernel_size=3, padding=1)
        self.imu_residual_bn = nn.BatchNorm1d(64)

        self.imu_conv2 = nn.Conv1d(64, 128, kernel_size=3, padding=1)
        self.imu_bn2 = nn.BatchNorm1d(128)
        self.imu_pool = nn.MaxPool1d(2)
        self.imu_dropout2 = nn.Dropout(0.3)

        self.imu_conv3 = nn.Conv1d(128, 256, kernel_size=3, padding=1)
        self.imu_bn3 = nn.BatchNorm1d(256)
        self.imu_global_pool = nn.AdaptiveAvgPool1d(1)
        self.imu_dropout3 = nn.Dropout(0.4)

        # Thermopile branch
        self.thm_conv1 = nn.Conv1d(num_thermopile, 32, kernel_size=3, padding=1)
        self.thm_bn1 = nn.BatchNorm1d(32)
        self.thm_dropout1 = nn.Dropout(0.2)

        self.thm_conv2 = nn.Conv1d(32, 64, kernel_size=3, padding=1)
        self.thm_bn2 = nn.BatchNorm1d(64)
        self.thm_pool = nn.MaxPool1d(2)
        self.thm_dropout2 = nn.Dropout(0.3)

        self.thm_global_pool = nn.AdaptiveAvgPool1d(1)
        self.thm_dropout3 = nn.Dropout(0.4)

        # ToF branch (simplified 2D CNN approach)
        self.tof_conv1 = nn.Conv2d(num_tof_sensors, 32, kernel_size=3, padding=1)
        self.tof_bn1 = nn.BatchNorm2d(32)
        self.tof_pool1 = nn.MaxPool2d(2)

        self.tof_conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.tof_bn2 = nn.BatchNorm2d(64)

        self.tof_global_pool = nn.AdaptiveAvgPool2d(1)
        self.tof_dropout = nn.Dropout(0.4)

        # ToF attention mechanism
        self.tof_attention_fc1 = nn.Linear(1, 64)
        self.tof_attention_fc2 = nn.Linear(64, 64)

        # Demographic branch
        self.demo_fc1 = nn.Linear(num_demographic, 64)
        self.demo_bn1 = nn.BatchNorm1d(64)
        self.demo_dropout1 = nn.Dropout(0.3)

        self.demo_fc2 = nn.Linear(64, 32)
        self.demo_bn2 = nn.BatchNorm1d(32)
        self.demo_dropout2 = nn.Dropout(0.3)

        # Calculate feature dimensions after concatenation
        concat_dim = 256 + 64 + 64 + 32  # IMU + Thermopile + ToF + Demo

        # Feature attention
        self.feature_attention = nn.Linear(concat_dim, concat_dim)

        # Shared layers
        self.shared_fc1 = nn.Linear(concat_dim, 512)
        self.shared_bn1 = nn.BatchNorm1d(512)
        self.shared_dropout1 = nn.Dropout(0.5)

        self.shared_fc2 = nn.Linear(512, 256)
        self.shared_bn2 = nn.BatchNorm1d(256)
        self.shared_dropout2 = nn.Dropout(0.5)

        self.shared_fc3 = nn.Linear(256, 128)
        self.shared_bn3 = nn.BatchNorm1d(128)
        self.shared_dropout3 = nn.Dropout(0.4)

        # Output layers
        self.binary_output = nn.Linear(128, 1)
        self.multiclass_output = nn.Linear(128, num_gestures)

    def forward(self, imu_input, thm_input, tof_input, tof_mask, demo_input):
        batch_size = imu_input.size(0)

        # IMU branch
        x_imu = imu_input.transpose(1, 2)  # (batch, features, sequence)
        x_imu = F.relu(self.imu_bn1(self.imu_conv1(x_imu)))
        x_imu = self.imu_dropout1(x_imu)

        # Residual connection
        imu_residual = F.relu(self.imu_residual_bn(self.imu_residual_conv(x_imu)))
        x_imu = x_imu + imu_residual

        x_imu = F.relu(self.imu_bn2(self.imu_conv2(x_imu)))
        x_imu = self.imu_pool(x_imu)
        x_imu = self.imu_dropout2(x_imu)

        x_imu = F.relu(self.imu_bn3(self.imu_conv3(x_imu)))
        x_imu = self.imu_global_pool(x_imu)
        x_imu = self.imu_dropout3(x_imu)
        x_imu = x_imu.view(batch_size, -1)

        # Thermopile branch
        x_thm = thm_input.transpose(1, 2)  # (batch, features, sequence)
        x_thm = F.relu(self.thm_bn1(self.thm_conv1(x_thm)))
        x_thm = self.thm_dropout1(x_thm)

        x_thm = F.relu(self.thm_bn2(self.thm_conv2(x_thm)))
        x_thm = self.thm_pool(x_thm)
        x_thm = self.thm_dropout2(x_thm)

        x_thm = self.thm_global_pool(x_thm)
        x_thm = self.thm_dropout3(x_thm)
        x_thm = x_thm.view(batch_size, -1)

        # ToF branch - reshape and process
        x_tof = tof_input.view(batch_size, self.sequence_length, self.num_tof_sensors, 8, 8)
        mask_reshaped = tof_mask.view(batch_size, self.sequence_length, self.num_tof_sensors, 8, 8)

        # Apply mask and average over sequence length
        x_tof = x_tof * mask_reshaped
        x_tof = torch.mean(x_tof, dim=1)  # Average over sequence length

        x_tof = F.relu(self.tof_bn1(self.tof_conv1(x_tof)))
        x_tof = self.tof_pool1(x_tof)

        x_tof = F.relu(self.tof_bn2(self.tof_conv2(x_tof)))
        x_tof = self.tof_global_pool(x_tof)
        x_tof = self.tof_dropout(x_tof)
        x_tof = x_tof.view(batch_size, -1)

        # ToF attention mechanism
        tof_availability = torch.mean(tof_mask, dim=[1, 2], keepdim=True)
        tof_attention = torch.tanh(self.tof_attention_fc1(tof_availability))
        tof_attention = torch.sigmoid(self.tof_attention_fc2(tof_attention))
        x_tof = x_tof * tof_attention.squeeze(1)

        # Demographic branch
        x_demo = F.relu(self.demo_bn1(self.demo_fc1(demo_input)))
        x_demo = self.demo_dropout1(x_demo)
        x_demo = F.relu(self.demo_bn2(self.demo_fc2(x_demo)))
        x_demo = self.demo_dropout2(x_demo)

        # Feature fusion with attention
        concat_features = torch.cat([x_imu, x_thm, x_tof, x_demo], dim=1)

        # Self-attention
        attention_weights = F.softmax(self.feature_attention(concat_features), dim=1)
        attended_features = concat_features * attention_weights

        # Shared layers
        shared = F.relu(self.shared_bn1(self.shared_fc1(attended_features)))
        shared = self.shared_dropout1(shared)

        shared = F.relu(self.shared_bn2(self.shared_fc2(shared)))
        shared = self.shared_dropout2(shared)

        shared = F.relu(self.shared_bn3(self.shared_fc3(shared)))
        shared = self.shared_dropout3(shared)

        # Output layers
        binary_out = torch.sigmoid(self.binary_output(shared))
        multiclass_out = F.softmax(self.multiclass_output(shared), dim=1)

        return binary_out, multiclass_out

class BFRBDataset(Dataset):
    def __init__(self, X_dict, y_dict=None, sample_weights=None):
        self.X_dict = X_dict
        self.y_dict = y_dict
        self.sample_weights = sample_weights
        self.length = len(X_dict['imu_input'])

    def __len__(self):
        return self.length

    def __getitem__(self, idx):
        sample = {
            'imu_input': torch.FloatTensor(self.X_dict['imu_input'][idx]),
            'thm_input': torch.FloatTensor(self.X_dict['thm_input'][idx]),
            'tof_input': torch.FloatTensor(self.X_dict['tof_input'][idx]),
            'tof_mask': torch.FloatTensor(self.X_dict['tof_mask'][idx]),
            'demo_input': torch.FloatTensor(self.X_dict['demo_input'][idx])
        }

        if self.y_dict is not None:
            sample['binary_target'] = torch.FloatTensor([self.y_dict['binary_output'][idx]])
            sample['multiclass_target'] = torch.LongTensor([self.y_dict['multiclass_output'][idx]])

        if self.sample_weights is not None:
            sample['binary_weight'] = torch.FloatTensor([self.sample_weights[0][idx]])
            sample['multiclass_weight'] = torch.FloatTensor([self.sample_weights[1][idx]])

        return sample

def create_data_loaders(data, batch_size=16):
    """Create PyTorch DataLoaders from processed data"""

    # Compute sample weights
    binary_class_weight_dict = {0: BINARY_CLASS_WEIGHTS[0] * 1.5, 1: BINARY_CLASS_WEIGHTS[1]}
    binary_sample_weights = compute_sample_weight(
        binary_class_weight_dict,
        data['y_train']['binary_output']
    )

    multiclass_class_weight_dict = {i: MULTICLASS_CLASS_WEIGHTS[i] for i in range(len(MULTICLASS_CLASS_WEIGHTS))}
    multiclass_sample_weights = compute_sample_weight(
        multiclass_class_weight_dict,
        data['y_train']['multiclass_output']
    )

    sample_weights = [binary_sample_weights, multiclass_sample_weights]

    train_dataset = BFRBDataset(data['X_train'], data['y_train'], sample_weights)
    val_dataset = BFRBDataset(data['X_val'], data['y_val'])

    train_loader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=4 if torch.cuda.is_available() else 0
    )

    val_loader = DataLoader(
        val_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=4 if torch.cuda.is_available() else 0
    )

    data_loaders = {'train': train_loader, 'val': val_loader}

    if 'X_test' in data:
        test_dataset = BFRBDataset(data['X_test'])
        test_loader = DataLoader(
            test_dataset,
            batch_size=batch_size,
            shuffle=False,
            num_workers=4 if torch.cuda.is_available() else 0
        )
        data_loaders['test'] = test_loader

    return data_loaders

class OptimizedThreshold:
    def __init__(self, target_precision=0.75, patience=5, verbose=1):
        self.target_precision = target_precision
        self.patience = patience
        self.verbose = verbose
        self.best_threshold = 0.5
        self.best_f1 = 0.0

    def update(self, model, val_loader, epoch):
        if epoch % self.patience == 0:
            model.eval()
            predictions = []
            targets = []

            with torch.no_grad():
                for batch in val_loader:
                    imu_input = batch['imu_input'].to(device)
                    thm_input = batch['thm_input'].to(device)
                    tof_input = batch['tof_input'].to(device)
                    tof_mask = batch['tof_mask'].to(device)
                    demo_input = batch['demo_input'].to(device)
                    binary_target = batch['binary_target'].to(device)

                    binary_out, _ = model(imu_input, thm_input, tof_input, tof_mask, demo_input)

                    predictions.extend(binary_out.squeeze().cpu().detach().numpy())
                    targets.extend(binary_target.squeeze().cpu().detach().numpy())

            # Find optimal threshold using precision-recall curve
            precision, recall, thresholds = precision_recall_curve(targets, predictions)

            # Calculate F1 scores for each threshold
            f1_scores = 2 * (precision * recall) / (precision + recall + 1e-8)

            # Find thresholds that meet minimum precision requirement
            valid_indices = precision >= self.target_precision
            if np.any(valid_indices):
                valid_f1 = f1_scores[valid_indices]
                valid_thresholds = thresholds[valid_indices[:-1]]  # thresholds array is 1 shorter

                if len(valid_thresholds) > 0:
                    best_idx = np.argmax(valid_f1)
                    optimal_threshold = valid_thresholds[best_idx]
                    optimal_f1 = valid_f1[best_idx]

                    if optimal_f1 > self.best_f1:
                        self.best_f1 = optimal_f1
                        self.best_threshold = optimal_threshold

                        if self.verbose:
                            print(f"\nEpoch {epoch}: Updated optimal threshold to {optimal_threshold:.4f} "
                                  f"(F1: {optimal_f1:.4f}, Precision: {precision[valid_indices][best_idx]:.4f})")

    def get_optimal_threshold(self):
        return self.best_threshold

def train_model_pytorch(model, data_loaders, epochs=80, learning_rate=2e-4, output_dir='pytorch_models'):
    """PyTorch training with precision optimization"""

    os.makedirs(output_dir, exist_ok=True)

    # Loss functions
    binary_criterion = AsymmetricFocalLoss(alpha_pos=0.25, alpha_neg=0.75, gamma_pos=1.0, gamma_neg=4.0)
    multiclass_criterion = SparseCategoricalFocalLoss(alpha=2.0, gamma=2.5, num_classes=18)

    # Optimizer with weight decay
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-4)

    # Learning rate scheduler
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='max', factor=0.3, patience=6, min_lr=1e-8, verbose=True
    )

    # Threshold optimizer
    threshold_optimizer = OptimizedThreshold(target_precision=0.75, patience=3, verbose=1)

    # Metrics
    train_metrics = {
        'binary_f1': WeightedF1Score(),
        'multiclass_f1': MacroF1Score(18),
        'precision_at_recall': PrecisionAtRecall(0.95)
    }

    val_metrics = {
        'binary_f1': WeightedF1Score(),
        'multiclass_f1': MacroF1Score(18),
        'precision_at_recall': PrecisionAtRecall(0.95)
    }

    # Training history
    history = {
        'train_loss': [], 'val_loss': [],
        'train_binary_acc': [], 'val_binary_acc': [],
        'train_multiclass_acc': [], 'val_multiclass_acc': [],
        'train_binary_f1': [], 'val_binary_f1': [],
        'train_multiclass_f1': [], 'val_multiclass_f1': [],
        'train_precision_at_recall': [], 'val_precision_at_recall': []
    }

    best_val_precision = 0.0
    patience_counter = 0
    patience = 15

    model = model.to(device)

    print("Starting precision-optimized model training...")

    for epoch in range(epochs):
        print(f'\nEpoch {epoch+1}/{epochs}')
        print('-' * 50)

        # Training phase
        model.train()
        running_loss = 0.0
        binary_correct = 0
        multiclass_correct = 0
        total_samples = 0

        # Reset metrics
        for metric in train_metrics.values():
            metric.reset()

        for batch_idx, batch in enumerate(data_loaders['train']):
            # Move data to device
            imu_input = batch['imu_input'].to(device)
            thm_input = batch['thm_input'].to(device)
            tof_input = batch['tof_input'].to(device)
            tof_mask = batch['tof_mask'].to(device)
            demo_input = batch['demo_input'].to(device)
            binary_target = batch['binary_target'].to(device)
            multiclass_target = batch['multiclass_target'].squeeze().to(device)

            # Get sample weights if available
            binary_weight = batch.get('binary_weight', torch.ones_like(binary_target)).to(device)
            multiclass_weight = batch.get('multiclass_weight', torch.ones_like(multiclass_target.float())).to(device)

            optimizer.zero_grad()

            # Forward pass
            binary_out, multiclass_out = model(imu_input, thm_input, tof_input, tof_mask, demo_input)

            # Calculate losses with sample weights
            binary_loss = binary_criterion(binary_out.squeeze(), binary_target.squeeze())
            multiclass_loss = multiclass_criterion(multiclass_out, multiclass_target)

            # Apply sample weights
            binary_loss = binary_loss * binary_weight.squeeze().mean()
            multiclass_loss = multiclass_loss * multiclass_weight.squeeze().mean()

            # Combined loss with weights
            total_loss = 0.6 * binary_loss + 0.4 * multiclass_loss

            # Backward pass
            total_loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=0.8)
            optimizer.step()

            # Statistics
            running_loss += total_loss.item()

            # Binary accuracy
            binary_pred = (binary_out.squeeze() > 0.5).float()
            binary_correct += (binary_pred == binary_target.squeeze()).sum().item()

            # Multiclass accuracy
            multiclass_pred = torch.argmax(multiclass_out, dim=1)
            multiclass_correct += (multiclass_pred == multiclass_target).sum().item()

            total_samples += binary_target.size(0)

            # Update metrics
            train_metrics['binary_f1'].update(binary_out.squeeze(), binary_target.squeeze())
            train_metrics['multiclass_f1'].update(multiclass_out, multiclass_target)
            train_metrics['precision_at_recall'].update(binary_out.squeeze(), binary_target.squeeze())

        # Calculate epoch metrics
        epoch_loss = running_loss / len(data_loaders['train'])
        binary_acc = binary_correct / total_samples
        multiclass_acc = multiclass_correct / total_samples

        train_binary_f1 = train_metrics['binary_f1'].compute()
        train_multiclass_f1 = train_metrics['multiclass_f1'].compute()
        train_precision_at_recall = train_metrics['precision_at_recall'].compute()

        # Validation phase
        model.eval()
        val_loss = 0.0
        val_binary_correct = 0
        val_multiclass_correct = 0
        val_total_samples = 0

        # Reset validation metrics
        for metric in val_metrics.values():
            metric.reset()

        with torch.no_grad():
            for batch in data_loaders['val']:
                # Move data to device
                imu_input = batch['imu_input'].to(device)
                thm_input = batch['thm_input'].to(device)
                tof_input = batch['tof_input'].to(device)
                tof_mask = batch['tof_mask'].to(device)
                demo_input = batch['demo_input'].to(device)
                binary_target = batch['binary_target'].to(device)
                multiclass_target = batch['multiclass_target'].squeeze().to(device)

                # Forward pass
                binary_out, multiclass_out = model(imu_input, thm_input, tof_input, tof_mask, demo_input)

                # Calculate losses
                binary_loss = binary_criterion(binary_out.squeeze(), binary_target.squeeze())
                multiclass_loss = multiclass_criterion(multiclass_out, multiclass_target)
                total_loss = 0.6 * binary_loss + 0.4 * multiclass_loss

                val_loss += total_loss.item()

                # Binary accuracy
                binary_pred = (binary_out.squeeze() > 0.5).float()
                val_binary_correct += (binary_pred == binary_target.squeeze()).sum().item()

                # Multiclass accuracy
                multiclass_pred = torch.argmax(multiclass_out, dim=1)
                val_multiclass_correct += (multiclass_pred == multiclass_target).sum().item()

                val_total_samples += binary_target.size(0)

                # Update metrics
                val_metrics['binary_f1'].update(binary_out.squeeze(), binary_target.squeeze())
                val_metrics['multiclass_f1'].update(multiclass_out, multiclass_target)
                val_metrics['precision_at_recall'].update(binary_out.squeeze(), binary_target.squeeze())

        # Calculate validation metrics
        val_loss = val_loss / len(data_loaders['val'])
        val_binary_acc = val_binary_correct / val_total_samples
        val_multiclass_acc = val_multiclass_correct / val_total_samples

        val_binary_f1 = val_metrics['binary_f1'].compute()
        val_multiclass_f1 = val_metrics['multiclass_f1'].compute()
        val_precision_at_recall = val_metrics['precision_at_recall'].compute()

        # Update threshold optimizer
        threshold_optimizer.update(model, data_loaders['val'], epoch)

        # Learning rate scheduling
        scheduler.step(val_precision_at_recall)

        # Save history
        history['train_loss'].append(epoch_loss)
        history['val_loss'].append(val_loss)
        history['train_binary_acc'].append(binary_acc)
        history['val_binary_acc'].append(val_binary_acc)
        history['train_multiclass_acc'].append(multiclass_acc)
        history['val_multiclass_acc'].append(val_multiclass_acc)
        history['train_binary_f1'].append(train_binary_f1)
        history['val_binary_f1'].append(val_binary_f1)
        history['train_multiclass_f1'].append(train_multiclass_f1)
        history['val_multiclass_f1'].append(val_multiclass_f1)
        history['train_precision_at_recall'].append(train_precision_at_recall)
        history['val_precision_at_recall'].append(val_precision_at_recall)

        # Print epoch results
        print(f'Train Loss: {epoch_loss:.4f}, Val Loss: {val_loss:.4f}')
        print(f'Train Binary Acc: {binary_acc:.4f}, Val Binary Acc: {val_binary_acc:.4f}')
        print(f'Train Multiclass Acc: {multiclass_acc:.4f}, Val Multiclass Acc: {val_multiclass_acc:.4f}')
        print(f'Val Precision@Recall: {val_precision_at_recall:.4f}')

        # Early stopping and model saving
        if val_precision_at_recall > best_val_precision:
            best_val_precision = val_precision_at_recall
            patience_counter = 0

            # Save best model
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'best_val_precision': best_val_precision,
                'threshold': threshold_optimizer.get_optimal_threshold()
            }, os.path.join(output_dir, 'best_model.pth'))

        else:
            patience_counter += 1

        if patience_counter >= patience:
            print(f'Early stopping triggered after {patience} epochs without improvement')
            break

    # Save optimal threshold for inference
    optimal_threshold = threshold_optimizer.get_optimal_threshold()
    threshold_info = {
        'optimal_threshold': float(optimal_threshold),
        'default_threshold': 0.5,
        'precision_target': 0.75
    }

    import json
    with open(os.path.join(output_dir, 'optimal_threshold.json'), 'w') as f:
        json.dump(threshold_info, f, indent=2)

    # Save final model and history
    torch.save(model.state_dict(), os.path.join(output_dir, 'final_model.pth'))
    pd.DataFrame(history).to_csv(os.path.join(output_dir, 'training_history.csv'), index=False)

    print(f"Training complete. Best validation precision@recall: {best_val_precision:.4f}")
    print(f"Optimal threshold: {optimal_threshold:.4f}")
    return model, history

def evaluate_model_pytorch(model, val_loader):
    """PyTorch model evaluation with detailed metrics"""
    print("Evaluating model...")

    model.eval()
    binary_predictions = []
    binary_targets = []
    multiclass_predictions = []
    multiclass_targets = []

    with torch.no_grad():
        for batch in val_loader:
            imu_input = batch['imu_input'].to(device)
            thm_input = batch['thm_input'].to(device)
            tof_input = batch['tof_input'].to(device)
            tof_mask = batch['tof_mask'].to(device)
            demo_input = batch['demo_input'].to(device)
            binary_target = batch['binary_target'].to(device)
            multiclass_target = batch['multiclass_target'].squeeze().to(device)

            binary_out, multiclass_out = model(imu_input, thm_input, tof_input, tof_mask, demo_input)

            binary_predictions.extend(binary_out.squeeze().cpu().detach().numpy())
            binary_targets.extend(binary_target.squeeze().cpu().detach().numpy())
            multiclass_predictions.extend(torch.argmax(multiclass_out, dim=1).cpu().detach().numpy())
            multiclass_targets.extend(multiclass_target.cpu().detach().numpy())

    # Binary classification analysis
    binary_pred_classes = (np.array(binary_predictions) > 0.5).astype(int)

    print("\nBinary Classification Report:")
    print(classification_report(binary_targets, binary_pred_classes))
    print("\nBinary Confusion Matrix:")
    print(confusion_matrix(binary_targets, binary_pred_classes))

    # Multiclass analysis
    print("\nMulticlass Classification Report:")
    print(classification_report(multiclass_targets, multiclass_predictions))

    return {
        'binary_predictions': binary_predictions,
        'binary_targets': binary_targets,
        'multiclass_predictions': multiclass_predictions,
        'multiclass_targets': multiclass_targets
    }

def predict_and_save_pytorch(model, test_loader, sequence_ids, output_file):
    """PyTorch prediction with confidence scores"""
    print("Generating predictions...")

    model.eval()
    binary_predictions = []
    multiclass_predictions = []

    with torch.no_grad():
        for batch in test_loader:
            imu_input = batch['imu_input'].to(device)
            thm_input = batch['thm_input'].to(device)
            tof_input = batch['tof_input'].to(device)
            tof_mask = batch['tof_mask'].to(device)
            demo_input = batch['demo_input'].to(device)

            binary_out, multiclass_out = model(imu_input, thm_input, tof_input, tof_mask, demo_input)

            binary_predictions.extend(binary_out.squeeze().cpu().numpy())
            multiclass_predictions.extend(multiclass_out.cpu().numpy())

    binary_predictions = np.array(binary_predictions)
    multiclass_predictions = np.array(multiclass_predictions)

    # Process predictions
    binary_classes = (binary_predictions > 0.5).astype(int)
    binary_confidence = np.maximum(binary_predictions, 1-binary_predictions)

    multiclass_classes = np.argmax(multiclass_predictions, axis=1)
    multiclass_confidence = np.max(multiclass_predictions, axis=1)

    # Gesture mapping
    gesture_map = {
        0: "non_target",
        1: "above_ear_pull_hair",
        2: "forehead_pull_hairline",
        3: "forehead_scratch",
        4: "eyebrow_pull_hair",
        5: "eyelash_pull_hair",
        6: "neck_pinch_skin",
        7: "neck_scratch",
        8: "cheek_pinch_skin",
        9: "drink_from_bottle",
        10: "glasses_on_off",
        11: "pull_air_toward_face",
        12: "pinch_knee_leg_skin",
        13: "scratch_knee_leg_skin",
        14: "write_name_on_leg",
        15: "text_on_phone",
        16: "feel_around_tray",
        17: "write_name_in_air"
    }

    output_df = pd.DataFrame({
        'sequence_id': sequence_ids,
        'is_target': binary_classes,
        'binary_confidence': binary_confidence,
        'gesture_class': multiclass_classes,
        'multiclass_confidence': multiclass_confidence,
        'gesture': [gesture_map.get(cls, f"gesture_{cls}") for cls in multiclass_classes]
    })

    output_df.to_csv(output_file, index=False)
    print(f"Predictions saved to {output_file}")

def save_model_pytorch(model, model_dir):
    """PyTorch model saving with metadata"""
    os.makedirs(model_dir, exist_ok=True)

    # Save model state dict
    torch.save(model.state_dict(), os.path.join(model_dir, 'model_state_dict.pth'))

    # Save entire model
    torch.save(model, os.path.join(model_dir, 'complete_model.pth'))

    # Save model metadata
    metadata = {
        'model_type': 'PyTorch_Multimodal_BFRB_CNN',
        'input_shape': {
            'imu': (MAX_SEQUENCE_LENGTH, NUM_IMU_FEATURES),
            'thermopile': (MAX_SEQUENCE_LENGTH, NUM_THERMOPILE),
            'tof': (MAX_SEQUENCE_LENGTH, NUM_TOF_SENSORS * TOF_PIXELS_PER_SENSOR),
            'demographic': (NUM_DEMOGRAPHIC_FEATURES,)
        },
        'num_classes': 18,
        'improvements': [
            'asymmetric_focal_loss', 'data_augmentation', 'sample_weights',
            'precision_optimization', 'attention_mechanism'
        ]
    }

    import json
    with open(os.path.join(model_dir, 'model_metadata.json'), 'w') as f:
        json.dump(metadata, f, indent=2)

    print(f"PyTorch model saved to {model_dir} with metadata")

def load_model_pytorch(model_dir):
    """PyTorch model loading"""
    try:
        # Try loading complete model first
        model = torch.load(os.path.join(model_dir, 'complete_model.pth'), map_location=device)
        print(f"Loaded complete model from {model_dir}")
    except:
        # Load from state dict
        model = MultimodalBFRBCNN(num_gestures=18)
        model.load_state_dict(torch.load(os.path.join(model_dir, 'model_state_dict.pth'), map_location=device))
        print(f"Loaded model from state dict: {model_dir}")

    return model

def main():
    """Main execution with PyTorch precision optimization"""

    # File paths
    train_file = 'train.csv'
    train_demo_file = 'train_demographics.csv'
    test_file = 'test.csv'
    test_demo_file = 'test_demographics.csv'

    # Load and preprocess data
    data = load_and_preprocess_data(train_file, train_demo_file, test_file, test_demo_file)

    # Create PyTorch data loaders
    data_loaders = create_data_loaders(data, batch_size=16)

    # Build PyTorch model
    model = MultimodalBFRBCNN(num_gestures=18)

    print(f"\nPyTorch Model Summary:")
    print(f"Parameters: {sum(p.numel() for p in model.parameters()):,}")
    print(f"Trainable parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}")

    # Train with PyTorch
    if 'train' in data_loaders and 'val' in data_loaders:
        model, history = train_model_pytorch(
            model, data_loaders, epochs=80, learning_rate=2e-4,
            output_dir='pytorch_precision_optimized_bfrb_model'
        )

        # Evaluate model
        if 'val' in data_loaders:
            evaluate_model_pytorch(model, data_loaders['val'])

    # Generate predictions
    if 'test' in data_loaders:
        predict_and_save_pytorch(model, data_loaders['test'], data.get('sequence_ids', []),
                                'pytorch_precision_optimized_predictions.csv')

    # Save model
    save_model_pytorch(model, 'pytorch_precision_optimized_bfrb_model')

    print("PyTorch BFRB detection pipeline complete with precision optimization")

if __name__ == "__main__":
    main()


cuML available - GPU acceleration enabled
Using device: cuda
Loading data...
Processing training sequences...
Preparing training data for model...
Binary class distribution: [3038 5113]
Multiclass class distribution: [638 637 161 638 640 161 640 640 161 640 640 161 477 161 640 478 477 161]
Number of unique multiclass labels: 18
Class imbalance ratio: 0.5941717191472716
Enhanced binary class weights: [1.         0.89125758]
Enhanced multiclass class weights shape: (18,)
Multiclass weights range: 0.708 - 2.813
Validation binary distribution: [ 607 1024]
Loading test data...
Processing test sequences...
Preparing test data for model...

PyTorch Model Summary:
Parameters: 728,531
Trainable parameters: 728,531
Starting precision-optimized model training...

Epoch 1/80
--------------------------------------------------

Epoch 0: Updated optimal threshold to 0.4925 (F1: 0.8502, Precision: 0.8141)
Train Loss: 2.0387, Val Loss: 1.9970
Train Binary Acc: 0.6235, Val Binary Acc: 0.8007
Train Multi

In [1]:
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!kaggle competitions download -c cmi-detect-behavior-with-sensor-data

Downloading cmi-detect-behavior-with-sensor-data.zip to /content
 87% 155M/178M [00:00<00:00, 192MB/s]
100% 178M/178M [00:00<00:00, 213MB/s]


In [2]:
import zipfile
with zipfile.ZipFile("cmi-detect-behavior-with-sensor-data.zip", 'r') as zip_ref:
    zip_ref.extractall("./")