In [None]:
!nvidia-smi
!python -c "import torch; print(torch.__version__, torch.version.cuda)"


!rm -r /kaggle/working/SpaMo_HOW2SIGN
!rm -r /kaggle/working/clip-vit-large-patch14_feat_How2Sign
!rm -r /kaggle/working/CACHE_DIR 
!rm /kaggle/working/spamo.ckpt
!rm /kaggle/working/vit_features.zip


import pandas as pd
import numpy as np
import os

csv_path = '/kaggle/input/how2sign-clips/how2sign_realigned_test_clips.csv'
video_root = '/kaggle/input/how2sign-clips/testing/testing/test_clips_raw_videos'

# Read CSV
df = pd.read_csv(csv_path, sep='\t')
print(f"Loaded {len(df)} rows")
print(f"Columns: {df.columns.tolist()}")

data = {}
for i, row in df.iterrows():
    video_filename = f"{row['SENTENCE_NAME']}.mp4"
    video_path = os.path.join(video_root, video_filename)
    
    # Removed the print(video_path) to keep output clean
    
    data[i] = {
        'folder': video_path,
        'fileid': row['SENTENCE_ID'],
        'original_info': {'START_REALIGNED': None, 'END_REALIGNED': None},
        'sentence': row['SENTENCE'],
        'video_id': row['VIDEO_ID'],
        'video_name': row['VIDEO_NAME']
    }

# --- FIX 1: Save the annotation file to the location the script expects ---
save_dir = '/kaggle/working/SpaMo_HOW2SIGN/preprocess/How2Sign'
save_path = os.path.join(save_dir, 'test_info.npy')

os.makedirs(save_dir, exist_ok=True) # Create the directory if it doesn't exist
np.save(save_path, data)
print(f"✓ Saved {len(data)} entries to {save_path}") # Verify the correct path

# Verify (optional but recommended)
loaded = np.load(save_path, allow_pickle=True).item()
print(f"\nExample entry:")
print(f"  fileid: {loaded[0]['fileid']}")
print(f"  folder: {loaded[0]['folder']}")
print(f"  sentence: {loaded[0]['sentence']}")





!git clone https://github.com/OmarHani4306/SpaMo_HOW2SIGN.git



%%capture
# Install packages that are definitely not in Kaggle or need specific versions
!pip install --no-deps accelerate==1.4.0
!pip install --no-deps peft==0.7.1
!pip install --no-deps lightning==1.9.5
!pip install --no-deps einops==0.8.1
!pip install --no-deps einops-exts==0.0.4
!pip install omegaconf rouge-score sacrebleu==2.2.0

# Let pip handle these with dependencies
!pip install transformers==4.32.0 datasets==3.4.1


!pip install av


!mkdir -p /kaggle/working/CACHE_DIR


# !python /kaggle/working/SpaMo_HOW2SIGN/scripts/vit_extract_feature.py \
#     --anno_root /kaggle/working/SpaMo_HOW2SIGN/preprocess/How2Sign \
#     --model_name openai/clip-vit-large-patch14 \
#     --video_root /kaggle/input/how2sign-clips/testing/testing/test_clips_raw_videos/ \
#     --cache_dir /kaggle/working/CACHE_DIR \
#     --save_dir /kaggle/working/ \
#     --batch_size 128 \
#     --device cuda:0




# --- FIX 2: Save features to a dedicated folder ---

# Create cache and dedicated save directory
!mkdir -p /kaggle/working/CACHE_DIR
!mkdir -p /kaggle/working/how2sign_motion_features # Dedicated folder

print("--- Starting MOTION Feature Extraction (Test Set) ---")
print(f"Features will be saved to: /kaggle/working/how2sign_motion_features")

# Run the (now optimized) extraction script
!python /kaggle/working/SpaMo_HOW2SIGN/scripts/mae_extract_feature.py \
    --anno_root /kaggle/working/SpaMo_HOW2SIGN/preprocess/How2Sign \
    --model_name MCG-NJU/videomae-large \
    --video_root /kaggle/input/how2sign-clips/testing/testing/test_clips_raw_videos/ \
    --cache_dir /kaggle/working/CACHE_DIR \
    --save_dir /kaggle/working/how2sign_motion_features \ 
    --overlap_size 8 \
    --batch_size 16 \ 
    --device cuda:0

print("--- Motion feature extraction complete. ---")