# V8 Fine-Grained Behavior Detection - Kaggle Submission

**Model Info:**
- ✅ V8 Multi-task Model (Action + Agent + Target)
- ✅ 28 fine-grained behavior classes
- ✅ Validation Action Acc: 86.31%
- ✅ Agent/Target Acc: 98%+
- ⚡ GPU inference ~1-2 hours

**Setup:**
1. Upload `best_model.pth` to Kaggle Dataset (name: `mabe-v8-model`)
2. Add dataset: `mabe-v8-model` (contains best_model.pth)
3. Add dataset: `MABe-mouse-behavior-detection` (competition data)
4. Enable GPU (T4 or P100)
5. Run all cells
6. Notebook output = submission

In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from pathlib import Path
from tqdm.notebook import tqdm
import warnings
warnings.filterwarnings('ignore')

print("="*60)
print("V8 Multi-task Behavior Detection - Kaggle Submission")
print("="*60)

## 1. Define V8 Model Architecture

In [None]:
class V8BehaviorDetector(nn.Module):
    """
    V8 Multi-task Model
    Outputs: Action (28 classes) + Agent (4 mice) + Target (4 mice)
    """

    def __init__(
        self,
        input_dim=112,
        num_actions=28,
        num_mice=4,
        conv_channels=[128, 256, 512],
        lstm_hidden=256,
        lstm_layers=2,
        dropout=0.3
    ):
        super().__init__()

        self.input_dim = input_dim
        self.num_actions = num_actions
        self.num_mice = num_mice

        # Shared convolutional backbone
        conv_layers = []
        in_channels = input_dim

        for out_channels in conv_channels:
            conv_layers.extend([
                nn.Conv1d(in_channels, out_channels, kernel_size=5, padding=2),
                nn.BatchNorm1d(out_channels),
                nn.ReLU(),
                nn.Dropout(dropout)
            ])
            in_channels = out_channels

        self.conv_backbone = nn.Sequential(*conv_layers)

        # Bidirectional LSTM
        self.lstm = nn.LSTM(
            input_size=conv_channels[-1],
            hidden_size=lstm_hidden,
            num_layers=lstm_layers,
            bidirectional=True,
            batch_first=True,
            dropout=dropout if lstm_layers > 1 else 0
        )

        lstm_output_dim = lstm_hidden * 2

        # Action classification head
        self.action_head = nn.Sequential(
            nn.Linear(lstm_output_dim, 512),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(256, num_actions)
        )

        # Agent identification head
        self.agent_head = nn.Sequential(
            nn.Linear(lstm_output_dim, 128),
            nn.ReLU(),
            nn.Dropout(dropout * 0.5),
            nn.Linear(128, num_mice)
        )

        # Target identification head
        self.target_head = nn.Sequential(
            nn.Linear(lstm_output_dim, 128),
            nn.ReLU(),
            nn.Dropout(dropout * 0.5),
            nn.Linear(128, num_mice)
        )

    def forward(self, x):
        batch_size, seq_len, _ = x.shape

        # Conv expects [B, D, T]
        x = x.transpose(1, 2)
        x = self.conv_backbone(x)
        x = x.transpose(1, 2)

        # LSTM
        x, _ = self.lstm(x)

        # Task-specific predictions
        action_logits = self.action_head(x)
        agent_logits = self.agent_head(x)
        target_logits = self.target_head(x)

        return action_logits, agent_logits, target_logits

print("✓ V8BehaviorDetector model defined")

## 2. Action Mapping (28 Fine-grained Behaviors)

In [None]:
ACTION_TO_ID = {
    'background': 0,
    # Social (1-7)
    'sniff': 1, 'sniffgenital': 2, 'sniffface': 3, 'sniffbody': 4,
    'reciprocalsniff': 5, 'approach': 6, 'follow': 7,
    # Mating (8-11)
    'mount': 8, 'intromit': 9, 'attemptmount': 10, 'ejaculate': 11,
    # Aggressive (12-18)
    'attack': 12, 'chase': 13, 'chaseattack': 14, 'bite': 15,
    'dominance': 16, 'defend': 17, 'flinch': 18,
    # Other (19-27)
    'avoid': 19, 'escape': 20, 'freeze': 21, 'allogroom': 22,
    'shepherd': 23, 'disengage': 24, 'run': 25,
    'dominancegroom': 26, 'huddle': 27,
}

ID_TO_ACTION = {v: k for k, v in ACTION_TO_ID.items()}
NUM_ACTIONS = 28

print(f"✓ {NUM_ACTIONS} action classes defined")

## 3. Load Model Checkpoint

In [None]:
# Device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")

if device.type == 'cuda':
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
print()

# Load checkpoint (adjust path to your Kaggle dataset)
MODEL_PATH = Path('/kaggle/input/mabe-v8-model/last_model.pth')

if not MODEL_PATH.exists():
    raise FileNotFoundError(
        f"Model not found at {MODEL_PATH}.\n"
        "Please upload last_model.pth to a Kaggle dataset named 'mabe-v8-model'"
    )

# Build model
model = V8BehaviorDetector(
    input_dim=112,  # 7 bodyparts × 4 mice × 2 coords + motion features
    num_actions=NUM_ACTIONS,
    num_mice=4,
    conv_channels=[128, 256, 512],
    lstm_hidden=256,
    lstm_layers=2,
    dropout=0.3
).to(device)

# Load weights
state_dict = torch.load(MODEL_PATH, map_location=device)
model.load_state_dict(state_dict)
model.eval()

print(f"✓ Model loaded from {MODEL_PATH.name}")
print(f"  Parameters: {sum(p.numel() for p in model.parameters()):,}")

## 4. Helper Functions

In [None]:
def add_motion_features(keypoints, fps=33.3):
    """Add speed and acceleration features"""
    dt = 1.0 / fps
    T, D = keypoints.shape

    assert D == 56, f"Expected 56 coords, got {D}"

    num_keypoints = D // 2
    coords = keypoints.reshape(T, num_keypoints, 2)

    # Velocity
    velocity = np.zeros_like(coords)
    if T > 1:
        velocity[1:] = (coords[1:] - coords[:-1]) / dt
        velocity[0] = velocity[1]

    speed = np.sqrt(np.sum(velocity ** 2, axis=2, keepdims=True))

    # Acceleration
    acceleration_vec = np.zeros_like(velocity)
    if T > 1:
        acceleration_vec[1:] = (velocity[1:] - velocity[:-1]) / dt
        acceleration_vec[0] = acceleration_vec[1]

    acceleration = np.sqrt(np.sum(acceleration_vec ** 2, axis=2, keepdims=True))

    # Concatenate: [coords, speed, accel]
    keypoints_flat = coords.reshape(T, -1)
    speed_flat = speed.squeeze(-1)
    accel_flat = acceleration.squeeze(-1)

    enhanced = np.concatenate([keypoints_flat, speed_flat, accel_flat], axis=1)

    return enhanced


def predictions_to_intervals(action_preds, agent_preds, target_preds, min_duration=5):
    """Convert frame-level predictions to interval format"""
    T = len(action_preds)
    intervals = []

    current_action = None
    current_agent = None
    current_target = None
    start_frame = 0

    for t in range(T):
        action = action_preds[t]
        agent = agent_preds[t]
        target = target_preds[t]

        # Skip background (action_id = 0)
        if action == 0:
            if current_action is not None:
                duration = t - start_frame
                if duration >= min_duration:
                    intervals.append({
                        'agent_id': current_agent,
                        'target_id': current_target,
                        'action_id': current_action,
                        'action': ID_TO_ACTION.get(current_action, 'background'),
                        'start_frame': start_frame,
                        'stop_frame': t - 1
                    })
                current_action = None
            continue

        # Check if same interval continues
        if (action == current_action and agent == current_agent and target == current_target):
            continue
        else:
            # Save previous interval
            if current_action is not None:
                duration = t - start_frame
                if duration >= min_duration:
                    intervals.append({
                        'agent_id': current_agent,
                        'target_id': current_target,
                        'action_id': current_action,
                        'action': ID_TO_ACTION.get(current_action, 'background'),
                        'start_frame': start_frame,
                        'stop_frame': t - 1
                    })

            # Start new interval
            current_action = action
            current_agent = agent
            current_target = target
            start_frame = t

    # Handle last interval
    if current_action is not None:
        duration = T - start_frame
        if duration >= min_duration:
            intervals.append({
                'agent_id': current_agent,
                'target_id': current_target,
                'action_id': current_action,
                'action': ID_TO_ACTION.get(current_action, 'background'),
                'start_frame': start_frame,
                'stop_frame': T - 1
            })

    return intervals

print("✓ Helper functions defined")

## 5. Load Test Data

In [None]:
# Find competition dataset
possible_paths = [
    Path('/kaggle/input/MABe-mouse-behavior-detection'),
    Path('/kaggle/input/mabe-mouse-behavior-detection'),
]

DATA_DIR = None
for path in possible_paths:
    if path.exists():
        DATA_DIR = path
        break

if DATA_DIR is None:
    raise FileNotFoundError("Cannot find MABe dataset. Please add it to notebook inputs.")

print(f"✓ Using dataset: {DATA_DIR}")

# Load test metadata
if (DATA_DIR / 'test.csv').exists():
    test_csv = pd.read_csv(DATA_DIR / 'test.csv')
else:
    # Construct from test_tracking directory
    test_data = []
    for lab_dir in (DATA_DIR / 'test_tracking').iterdir():
        if lab_dir.is_dir():
            for video_file in lab_dir.glob('*.parquet'):
                test_data.append({
                    'video_id': video_file.stem,
                    'lab_id': lab_dir.name
                })
    test_csv = pd.DataFrame(test_data)

print(f"  Total test videos: {len(test_csv)}")

## 6. Generate Predictions

In [None]:
# Standard bodyparts
standard_bodyparts = [
    'nose', 'ear_left', 'ear_right', 'neck',
    'hip_left', 'hip_right', 'tail_base'
]

sequence_length = 100
all_intervals = []
row_id = 0

print("Processing test videos...\n")

with torch.no_grad():
    for idx, row in tqdm(test_csv.iterrows(), total=len(test_csv)):
        video_id = row['video_id']
        lab_id = row['lab_id']

        tracking_file = DATA_DIR / 'test_tracking' / lab_id / f'{video_id}.parquet'

        if not tracking_file.exists():
            continue

        try:
            tracking_df = pd.read_parquet(tracking_file)
            tracking_df = tracking_df[tracking_df['bodypart'].isin(standard_bodyparts)]

            if len(tracking_df) == 0 or tracking_df['video_frame'].isna().all():
                continue

            max_frame = tracking_df['video_frame'].max()
            if pd.isna(max_frame):
                continue

            num_frames = int(max_frame) + 1
            num_mice = 4
            num_bodyparts = len(standard_bodyparts)

            # Pivot
            x_pivot = tracking_df.pivot_table(
                index='video_frame',
                columns=['mouse_id', 'bodypart'],
                values='x',
                aggfunc='first'
            )
            y_pivot = tracking_df.pivot_table(
                index='video_frame',
                columns=['mouse_id', 'bodypart'],
                values='y',
                aggfunc='first'
            )

            keypoints_raw = np.zeros((num_frames, num_mice * num_bodyparts * 2), dtype=np.float32)

            for mouse_id in range(1, 5):
                for bp_idx, bodypart in enumerate(standard_bodyparts):
                    if (mouse_id, bodypart) in x_pivot.columns:
                        frames = x_pivot.index.values.astype(int)
                        x_vals = x_pivot[(mouse_id, bodypart)].values
                        y_vals = y_pivot[(mouse_id, bodypart)].values

                        base_idx = (mouse_id - 1) * num_bodyparts * 2 + bp_idx * 2
                        keypoints_raw[frames, base_idx] = x_vals
                        keypoints_raw[frames, base_idx + 1] = y_vals

            keypoints = np.nan_to_num(keypoints_raw, nan=0.0)
            keypoints = add_motion_features(keypoints, fps=33.3)

            # Predict with sliding window (no overlap for inference)
            T = len(keypoints)
            action_preds = np.zeros(T, dtype=np.int64)
            agent_preds = np.zeros(T, dtype=np.int64)
            target_preds = np.zeros(T, dtype=np.int64)

            for start_idx in range(0, T, sequence_length):
                end_idx = min(start_idx + sequence_length, T)
                window_len = end_idx - start_idx

                if window_len < sequence_length:
                    window = np.zeros((sequence_length, 112), dtype=np.float32)
                    window[:window_len] = keypoints[start_idx:end_idx]
                else:
                    window = keypoints[start_idx:end_idx]

                window_tensor = torch.FloatTensor(window).unsqueeze(0).to(device)
                action_logits, agent_logits, target_logits = model(window_tensor)

                action_pred = action_logits[0].argmax(dim=-1).cpu().numpy()
                agent_pred = agent_logits[0].argmax(dim=-1).cpu().numpy()
                target_pred = target_logits[0].argmax(dim=-1).cpu().numpy()

                valid_len = window_len
                action_preds[start_idx:end_idx] = action_pred[:valid_len]
                agent_preds[start_idx:end_idx] = agent_pred[:valid_len]
                target_preds[start_idx:end_idx] = target_pred[:valid_len]

            # Convert to intervals
            intervals = predictions_to_intervals(
                action_preds, agent_preds, target_preds, min_duration=5
            )

            # Add to submission
            for interval in intervals:
                all_intervals.append({
                    'row_id': row_id,
                    'video_id': video_id,
                    'agent_id': f"mouse{interval['agent_id'] + 1}",
                    'target_id': f"mouse{interval['target_id'] + 1}",
                    'action': interval['action'],
                    'start_frame': interval['start_frame'],
                    'stop_frame': interval['stop_frame']
                })
                row_id += 1

        except Exception as e:
            print(f"Error processing {video_id}: {e}")
            continue

print(f"\n✓ Generated {len(all_intervals)} behavior intervals")

## 7. Create Submission

In [None]:
# Create submission DataFrame
if len(all_intervals) == 0:
    print("[!] WARNING: No predictions generated!")
    submission = pd.DataFrame(columns=[
        'row_id', 'video_id', 'agent_id', 'target_id',
        'action', 'start_frame', 'stop_frame'
    ])
else:
    submission = pd.DataFrame(all_intervals)

# Save
submission.to_csv('/kaggle/working/submission.csv', index=False)

print(f"✓ Submission saved to /kaggle/working/submission.csv")
print(f"  Total intervals: {len(submission):,}")
print(f"  Unique videos: {submission['video_id'].nunique()}")
print(f"\n  Action distribution:")
for action, count in submission['action'].value_counts().head(10).items():
    print(f"    {action}: {count}")

print("\n🎯 V8 Submission ready!")

## 8. Preview

In [None]:
submission.head(20)