In [1]:
import re
import gc
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.keras.mixed_precision as mixed_precision

from tensorflow.keras.layers import (
    Dropout, ZeroPadding1D, DepthwiseConv1D, Dense, BatchNormalization,
    Reshape, Softmax, Permute, Add, Masking, GlobalAveragePooling1D
)
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import CSVLogger, ModelCheckpoint, EarlyStopping
from tensorflow.keras.optimizers.schedules import CosineDecay
tf.test.gpu_device_name()

'/device:GPU:0'

# Get the data from [Kaggle](https://www.kaggle.com/competitions/asl-signs)

In [2]:
# Run KaggleDatasets.get_gcs_path(dataset_name) in the kaggle notebook to update gcs_path as they expires after several weeks..
# Notebook: https://www.kaggle.com/code/hoyso48/islr-get-gcs-path

GCS_PATH = {
    'ISLR':'gs://kds-e34677f8787ebf6fc3a1a3319c912f091d61efb9050da67805d754ce',
    '5fold':'gs://kds-47631c9e20af9b803ddfa3d4a08b9401fc6a5a0a90456a11429331ca',
    '5fold_randsplit':'gs://kds-1f5f54e399848a499848a49061a4779db1f5f6c8f1b271f8344ceb8b',
}

TRAIN_FILENAMES = tf.io.gfile.glob(GCS_PATH['5fold']+'/*.tfrecords')
COMPETITION_PATH = GCS_PATH['ISLR']

print(len(TRAIN_FILENAMES))
!gsutil cp {COMPETITION_PATH}/train.csv .
!gsutil cp {COMPETITION_PATH}/sign_to_prediction_index_map.json .

187
Copying gs://kds-e34677f8787ebf6fc3a1a3319c912f091d61efb9050da67805d754ce/train.csv...
\ [1 files][  6.1 MiB/  6.1 MiB]                                                
Operation completed over 1 objects/6.1 MiB.                                      
Copying gs://kds-e34677f8787ebf6fc3a1a3319c912f091d61efb9050da67805d754ce/sign_to_prediction_index_map.json...
/ [1 files][  3.3 KiB/  3.3 KiB]                                                
Operation completed over 1 objects/3.3 KiB.                                      


In [3]:
train_df = pd.read_csv('train.csv')
display(train_df.head())
display(train_df.info())

Unnamed: 0,path,participant_id,sequence_id,sign
0,train_landmark_files/26734/1000035562.parquet,26734,1000035562,blow
1,train_landmark_files/28656/1000106739.parquet,28656,1000106739,wait
2,train_landmark_files/16069/100015657.parquet,16069,100015657,cloud
3,train_landmark_files/25571/1000210073.parquet,25571,1000210073,bird
4,train_landmark_files/62590/1000240708.parquet,62590,1000240708,owie


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 94477 entries, 0 to 94476
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   path            94477 non-null  object
 1   participant_id  94477 non-null  int64 
 2   sequence_id     94477 non-null  int64 
 3   sign            94477 non-null  object
dtypes: int64(2), object(2)
memory usage: 2.9+ MB


None

In [4]:
def count_data_items(filenames):
    n = [int(re.compile(r"-([0-9]*)\.").search(filename.split('/')[-1]).group(1)) for filename in filenames]
    return np.sum(n)

print(count_data_items(TRAIN_FILENAMES), len(train_df))
assert count_data_items(TRAIN_FILENAMES) == len(train_df)

94477 94477


# Constants and Training Configurations

In [None]:
MAX_LEN = 384
ROWS_PER_FRAME = 543
NUM_CLASSES = 250
NOSE = [1, 2, 98, 327]
LNOSE = [98]
RNOSE = [327]
LIP = [
    0, 61, 185, 40, 39, 37, 267, 269, 270, 409,
    291, 146, 91, 181, 84, 17, 314, 405, 321, 375,
    78, 191, 80, 81, 82, 13, 312, 311, 310, 415,
    95, 88, 178, 87, 14, 317, 402, 318, 324, 308,
]
REYE = [
    33, 7, 163, 144, 145, 153, 154, 155, 133,
    246, 161, 160, 159, 158, 157, 173,
]
LEYE = [
    263, 249, 390, 373, 374, 380, 381, 382, 362,
    466, 388, 387, 386, 385, 384, 398,
]
LHAND = np.arange(468, 489).tolist()
RHAND = np.arange(522, 543).tolist()
POINT_LANDMARKS = LIP + LHAND + RHAND + NOSE + REYE + LEYE
CHANNELS = 6 * len(POINT_LANDMARKS) # 6 channels for x, y, z, visibility, handness, and confidence

In [6]:
class CFG:
    output_dir = '.'
    seed = 42
    dim = 192
    resume = 0
    epoch = 300
    batch_size = 768
    fp16 = True
    dropout_start_epoch = 15
    comment = f'islr-fp16-192-8-seed{seed}'

# Data Augmentation

In [7]:
def temporal_crop(x, max_len=MAX_LEN):
    length = tf.shape(x)[0]
    offset = tf.random.uniform((), 0, tf.clip_by_value(length - max_len, 1, max_len), dtype=tf.int32)
    return x[offset:offset + max_len]

def spatial_random_affine(xyz, scale  = (0.8,1.2), shear=(-0.15,0.15), shift=(-0.1,0.1), degree=(-30,30)):
    center = tf.constant([0.5,0.5])
    if scale is not None:
        scale = tf.random.uniform((),*scale)
        xyz = scale * xyz

    if shear is not None:
        xy = xyz[..., :2]
        z = xyz[..., 2:]

        shear_x = shear_y = tf.random.uniform((),*shear)
        if tf.random.uniform(()) < 0.5: shear_x = 0.
        else: shear_y = 0.

        shear_mat = tf.identity([[1., shear_x], [shear_y,1.]])
        xy = xy @ shear_mat
        center = center + [shear_y, shear_x]
        xyz = tf.concat([xy, z], axis=-1)

    if degree is not None:
        xy = xyz[..., :2]
        z = xyz[..., 2:]
        xy -= center
        degree = tf.random.uniform((), *degree)
        radian = degree/180 * np.pi

        cos, sin = tf.math.cos(radian), tf.math.sin(radian)
        rotate_mat = tf.identity([[cos, sin], [-sin, cos]])
        xy = xy @ rotate_mat + center
        xyz = tf.concat([xy, z], axis=-1)

    if shift is not None:
        shift = tf.random.uniform((),*shift)
        xyz = xyz + shift
    return xyz

def temporal_mask(x, size=(0.2, 0.4)):
    length = tf.shape(x)[0]
    mask_size = tf.random.uniform((), *size)
    mask_size = tf.cast(tf.cast(length, tf.float32) * mask_size, tf.int32)
    mask_offset = tf.random.uniform((), 0, tf.clip_by_value(length - mask_size, 1, length), dtype=tf.int32)
    return tf.tensor_scatter_nd_update(
        x, tf.range(mask_offset, mask_offset + mask_size)[...,None],
        tf.fill([mask_size, 543, 3], float('nan')) # 543 is the number of landmarks
    )

def spatial_mask(x, size=(0.2, 0.4)):
    mask_offset_x, mask_offset_y = tf.random.uniform(()), tf.random.uniform(())
    mask_size = tf.random.uniform((), *size)
    mask_x = (mask_offset_x < x[...,0]) & (x[...,0] < mask_offset_x + mask_size)
    mask_y = (mask_offset_y < x[...,1]) & (x[...,1] < mask_offset_y + mask_size)
    mask = mask_x & mask_y
    return tf.where(mask[...,None], float('nan'), x)

def augment_fn(x, always=False, max_len=None):
    # if tf.random.uniform(()) < 0.8: x = resample(x, (0.5, 1.5))
    if max_len is not None: x = temporal_crop(x, max_len)
    if tf.random.uniform(()) < 0.75: x = spatial_random_affine(x)
    if tf.random.uniform(()) < 0.5: x = temporal_mask(x)
    if tf.random.uniform(()) < 0.5: x = spatial_mask(x)
    return x

# Preprocessing

In [8]:
class PreProcessor(tf.keras.layers.Layer):
    def __init__(self, max_len, point_landmarks, **kwargs):
        super().__init__(**kwargs)
        self.max_len = max_len
        self.point_landmarks = point_landmarks

    def tf_nan_mean(self, x, axis=0, keepdims=False):
        return tf.reduce_sum(
                tf.where(tf.math.is_nan(x), tf.zeros_like(x), x),
                axis=axis, keepdims=keepdims
            ) / tf.reduce_sum(
                tf.where(tf.math.is_nan(x), tf.zeros_like(x), tf.ones_like(x)),
                axis=axis, keepdims=keepdims
            )

    def tf_nan_std(self, x, center=None, axis=0, keepdims=False):
        if center is None: center = self.tf_nan_mean(x, axis=axis, keepdims=True)
        d = x - center
        return tf.math.sqrt(self.tf_nan_mean(d * d, axis=axis, keepdims=keepdims))

    def call(self, inputs):
        x = tf.cond(tf.rank(inputs) == 3, lambda: inputs[None, ...], lambda: inputs)
        # x = inputs[None,...] if tf.rank(inputs) == 3 else inputs
        mean = self.tf_nan_mean(tf.gather(x, [17], axis=2), axis=[1, 2], keepdims=True)
        mean = tf.where(tf.math.is_nan(mean), tf.constant(0.5, x.dtype), mean)
        x = tf.gather(x, self.point_landmarks, axis=2) # N,T,P,C
        x = (x - mean) / self.tf_nan_std(x, center=mean, axis=[1, 2], keepdims=True)

        if self.max_len is not None: x = x[:, :self.max_len]
        length = tf.shape(x)[1]
        x = x[..., :2] # Only x, y coordinates

        dx1 = tf.cond(tf.shape(x)[1] > 1, lambda:tf.pad(x[:, 1:] - x[:, :-1], [[0,0], [0,1], [0,0], [0,0]]), lambda:tf.zeros_like(x))
        dx2 = tf.cond(tf.shape(x)[1] > 2, lambda:tf.pad(x[:, 2:] - x[:, :-2], [[0,0], [0,2], [0,0], [0,0]]), lambda:tf.zeros_like(x))
        x = tf.concat([
            tf.reshape(x  , (-1, length, 2 * len(self.point_landmarks))),
            tf.reshape(dx1, (-1, length, 2 * len(self.point_landmarks))),
            tf.reshape(dx2, (-1, length, 2 * len(self.point_landmarks))),
        ], axis=-1)
        return tf.where(tf.math.is_nan(x), tf.constant(0., x.dtype), x)

# Data Loading

In [9]:
class DataLoader:
    def __init__(self, max_len, point_landmarks, rows_per_frame, channels, num_classes):
        self.max_len = max_len
        self.point_landmarks = point_landmarks
        self.rows_per_frame = rows_per_frame
        self.channels = channels
        self.num_classes = num_classes
        self.preprocessor = PreProcessor(max_len=max_len, point_landmarks=point_landmarks)

    def decode_tfrec(self, record_bytes):
        features = tf.io.parse_single_example(record_bytes, {
            'coordinates': tf.io.FixedLenFeature([], tf.string),
            'sign': tf.io.FixedLenFeature([], tf.int64),
        })
        out = {}
        out['coordinates'] = tf.reshape(
            tf.io.decode_raw(features['coordinates'], tf.float32),
            (-1, self.rows_per_frame, 3)
        )
        out['sign'] = features['sign']
        return out

    def filter_nans_tf(self, x):
        mask = tf.math.logical_not(tf.reduce_all(
            tf.math.is_nan(tf.gather(x, self.point_landmarks, axis=1)),
            axis=[-2, -1]
        ))
        return tf.boolean_mask(x, mask, axis=0)

    def process(self, x, augment=False):
        coord = self.filter_nans_tf(x['coordinates'])
        if augment: coord = augment_fn(coord, self.max_len)
        coord = tf.ensure_shape(coord, (None, self.rows_per_frame, 3))
        return tf.cast(self.preprocessor(coord)[0], tf.float32), tf.one_hot(x['sign'], self.num_classes)

    def get_tfrec_dataset(self, tfrecords, batch_size=64, max_len=64, drop_remainder=False, augment=False, shuffle=False, repeat=False):
        ds = tf.data.TFRecordDataset(tfrecords, num_parallel_reads=tf.data.AUTOTUNE, compression_type='GZIP') # Initialize dataset with TFRecords
        ds = ds.map(self.decode_tfrec, tf.data.AUTOTUNE).map(lambda x: self.process(x, augment=augment), tf.data.AUTOTUNE)
        if repeat: ds = ds.repeat()
        if shuffle:
            ds = ds.shuffle(shuffle)
            options = tf.data.Options()
            options.experimental_deterministic = (False)
            ds = ds.with_options(options)

        ds = ds.padded_batch(
            batch_size, padding_values=-100.,
            padded_shapes=([max_len, self.channels], [self.num_classes]),
            drop_remainder=drop_remainder
        )
        return ds.prefetch(tf.data.AUTOTUNE)

In [10]:
train_files = [x for x in TRAIN_FILENAMES if f'fold{0}' not in x]
valid_files = [x for x in TRAIN_FILENAMES if f'fold{0}' in x]
num_train = count_data_items(train_files)
num_valid = count_data_items(valid_files)

In [11]:
data_loader = DataLoader(max_len=MAX_LEN, point_landmarks=POINT_LANDMARKS, rows_per_frame=ROWS_PER_FRAME, channels=CHANNELS, num_classes=NUM_CLASSES)
train_ds = data_loader.get_tfrec_dataset(train_files, batch_size=CFG.batch_size, max_len=MAX_LEN, drop_remainder=True, augment=True, repeat=True, shuffle=32768)
valid_ds = data_loader.get_tfrec_dataset(valid_files, batch_size=CFG.batch_size, max_len=MAX_LEN, drop_remainder=False, repeat=False, shuffle=False)

# Model Implementation

In [12]:
class ECA(tf.keras.layers.Layer): # Efficient Channel Attention
    def __init__(self, kernel_size=5, **kwargs):
        super().__init__(**kwargs)
        self.supports_masking = True
        self.kernel_size = kernel_size
        self.conv = tf.keras.layers.Conv1D(1, kernel_size=kernel_size, strides=1, padding="same", use_bias=False)

    def call(self, inputs, mask=None):
        x = tf.keras.layers.GlobalAveragePooling1D()(inputs, mask=mask)
        x = self.conv(tf.expand_dims(x, -1))
        x = tf.squeeze(x, -1)
        x = tf.sigmoid(x)[:, None, :]
        return inputs * x


class LateDropout(tf.keras.layers.Layer):
    def __init__(self, rate, noise_shape=None, start_step=0, **kwargs):
        super().__init__(**kwargs)
        self.supports_masking = True
        self.rate = rate
        self.start_step = start_step
        self.dropout = Dropout(rate, noise_shape=noise_shape)

    def build(self, input_shape):
        super().build(input_shape)
        agg = tf.VariableAggregation.ONLY_FIRST_REPLICA
        self._train_counter = tf.Variable(0, dtype="int64", aggregation=agg, trainable=False)

    def call(self, inputs, training=False):
        x = tf.cond(self._train_counter < self.start_step, lambda:inputs, lambda:self.dropout(inputs, training=training))
        if training: self._train_counter.assign_add(1)
        return x


class CausalDWConv1D(tf.keras.layers.Layer):
    def __init__(self,
        kernel_size=17, dilation_rate=1, use_bias=False,
        depthwise_initializer='glorot_uniform', name='', **kwargs
    ):
        super().__init__(name=name, **kwargs)
        self.causal_pad = ZeroPadding1D((dilation_rate * (kernel_size - 1), 0), name=name + '_pad')
        self.dw_conv = DepthwiseConv1D(
            kernel_size, strides=1, dilation_rate=dilation_rate, padding='valid', use_bias=use_bias,
            depthwise_initializer=depthwise_initializer, name=name + '_dwconv'
        )
        self.supports_masking = True

    def call(self, inputs):
        x = self.causal_pad(inputs)
        x = self.dw_conv(x)
        return x


def Conv1DBlock(
    channel_size, kernel_size, dilation_rate=1, drop_rate=0.0,
    expand_ratio=2, se_ratio=0.25, activation='swish', name=None
):
    if name is None: name = str(tf.keras.backend.get_uid("mbblock"))
    def apply(inputs): # Expansion phase
        channels_in = tf.keras.backend.int_shape(inputs)[-1]
        channels_expand = channels_in * expand_ratio
        skip = inputs

        x = Dense(channels_expand, use_bias=True, activation=activation, name=name + '_expand_conv')(inputs)
        x = CausalDWConv1D(kernel_size, dilation_rate=dilation_rate, use_bias=False, name=name + '_dwconv')(x) # Depthwise Convolution
        x = BatchNormalization(momentum=0.95, name=name + '_bn')(x)
        x = ECA()(x)
        x = Dense(channel_size, use_bias=True, name=name + '_project_conv')(x)

        if drop_rate > 0: x = Dropout(drop_rate, noise_shape=(None,1,1), name=name + '_drop')(x)
        if channels_in == channel_size: x = tf.keras.layers.add([x, skip], name=name + '_add')
        return x
    return apply

In [13]:
class MultiHeadSelfAttention(tf.keras.layers.Layer):
    def __init__(self, dim=256, num_heads=4, dropout=0, **kwargs):
        super().__init__(**kwargs)
        self.dim = dim
        self.scale = self.dim ** -0.5
        self.num_heads = num_heads
        self.qkv = Dense(3 * dim, use_bias=False)
        self.drop1 = Dropout(dropout)
        self.proj = Dense(dim, use_bias=False)
        self.supports_masking = True

    def call(self, inputs, mask=None):
        qkv = self.qkv(inputs)
        qkv = Permute((2, 1, 3))(Reshape((-1, self.num_heads, self.dim * 3 // self.num_heads))(qkv))
        q, k, v = tf.split(qkv, [self.dim // self.num_heads] * 3, axis=-1)
        attn = tf.matmul(q, k, transpose_b=True) * self.scale

        if mask is not None: mask = mask[:, None, None, :]
        attn = Softmax(axis=-1)(attn, mask=mask)
        attn = self.drop1(attn)

        x = attn @ v
        x = Reshape((-1, self.dim))(Permute((2, 1, 3))(x))
        return self.proj(x)


def TransformerBlock(dim=256, num_heads=4, expand=4, attn_dropout=0.2, drop_rate=0.2, activation='swish'):
    def apply(inputs):
        x = BatchNormalization(momentum=0.95)(inputs)
        x = MultiHeadSelfAttention(dim=dim,num_heads=num_heads,dropout=attn_dropout)(x)
        x = Dropout(drop_rate, noise_shape=(None,1,1))(x)
        x = Add()([inputs, x])
        attn_out = x

        x = BatchNormalization(momentum=0.95)(x)
        x = Dense(dim*expand, use_bias=False, activation=activation)(x)
        x = Dense(dim, use_bias=False)(x)
        x = Dropout(drop_rate, noise_shape=(None,1,1))(x)
        x = Add()([attn_out, x])
        return x
    return apply


def Conv1DTransformerBlock(x, dim, kernel_size):
    x = Conv1DBlock(dim, kernel_size, drop_rate=0.2)(x)
    x = Conv1DBlock(dim, kernel_size, drop_rate=0.2)(x)
    x = Conv1DBlock(dim, kernel_size, drop_rate=0.2)(x)
    x = TransformerBlock(dim, expand=2)(x)

    x = Conv1DBlock(dim, kernel_size, drop_rate=0.2)(x)
    x = Conv1DBlock(dim, kernel_size, drop_rate=0.2)(x)
    x = Conv1DBlock(dim, kernel_size, drop_rate=0.2)(x)
    return TransformerBlock(dim, expand=2)(x)

In [14]:
def build_GISLR(max_len, channels, dropout_step=0, dim=192, kernel_size=17, is_training=True):
    inputs = tf.keras.Input((max_len, channels))
    if is_training: x = Masking(mask_value=-100., input_shape=(max_len, channels))(inputs)
    x = Dense(dim, use_bias=False,name='stem_conv')(x)
    x = BatchNormalization(momentum=0.95,name='stem_bn')(x)
    x = Conv1DTransformerBlock(x, dim, kernel_size)

    if dim == 384: x = Conv1DTransformerBlock(x, dim, kernel_size) # For the 4x sized model
    x = Dense(dim * 2, activation=None, name='top_conv')(x)
    x = GlobalAveragePooling1D()(x)
    x = LateDropout(0.8, start_step=dropout_step)(x)
    x = Dense(NUM_CLASSES, name='classifier')(x)
    return tf.keras.Model(inputs, x)

# Model Training

In [15]:
tf.keras.backend.clear_session()
gc.collect()
tf.config.optimizer.set_jit(True)

if CFG.fp16:
    try: mixed_precision.set_global_policy(mixed_precision.Policy('mixed_bfloat16'))
    except: mixed_precision.set_global_policy(mixed_precision.Policy('mixed_float16'))
else: mixed_precision.set_global_policy(policy = mixed_precision.Policy('float32'))

steps_per_epoch = num_train // CFG.batch_size
total_steps = (CFG.epoch - CFG.resume) * steps_per_epoch
model = build_GISLR(max_len=MAX_LEN, channels=CHANNELS, dropout_step=CFG.dropout_start_epoch * steps_per_epoch, dim=CFG.dim)
model.summary()

  super().__init__(**kwargs)


In [16]:
optimizer = Adam(learning_rate=CosineDecay(
    initial_learning_rate = 1e-4,
    warmup_target = 2e-4,
    warmup_steps = int(total_steps * 0.15), # 15% of total_steps
    decay_steps = int(total_steps * 0.3), # Next 30% of total_steps
    alpha = 0.1, # Minimum lr for decay as a fraction of initial_learning_rate
))

model.compile(
    optimizer = optimizer,
    loss = [CategoricalCrossentropy(from_logits=True, label_smoothing=0.1)],
    metrics = [CategoricalAccuracy()],
)

if CFG.resume:
    print(f'resume from epoch{CFG.resume}')
    model.load_weights(f'{CFG.output_dir}/{CFG.comment}-last.h5')

In [None]:
model.fit(
    train_ds,
    epochs = CFG.epoch-CFG.resume,
    steps_per_epoch = steps_per_epoch,
    callbacks = [
        CSVLogger(f'{CFG.output_dir}/{CFG.comment}-logs.csv'),
        # ModelCheckpoint(
        #     f'{CFG.output_dir}/{CFG.comment}-best.h5',
        #     monitor='val_loss', mode='min', verbose=1,
        #     save_best_only=True, save_weights_only=True, save_freq='epoch'
        # ),
        EarlyStopping(
            monitor = 'val_loss',
            patience = 3, # Stop if no improvement after 3 epochs
            restore_best_weights = True,
            verbose = 1
        )
    ],
    validation_data = valid_ds,
    verbose = 1
).history

Epoch 1/300




[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7s/step - categorical_accuracy: 0.0048 - loss: 7.0392



[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1724s[0m 15s/step - categorical_accuracy: 0.0048 - loss: 7.0328 - val_categorical_accuracy: 0.0208 - val_loss: 5.3742
Epoch 2/300




[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7s/step - categorical_accuracy: 0.0103 - loss: 5.7789

# Inference

In [None]:
import os
import zipfile
import subprocess

def check_pretrained_weights(): # Check if these files exist, if not download from Kaggle
    pretrained_path = [
        './weights/islr-fp16-192-8-seed42-foldall-last.h5', 
        './weights/islr-fp16-192-8-seed43-foldall-last.h5',
        './weights/islr-fp16-192-8-seed44-foldall-last.h5',
        './weights/islr-fp16-192-8-seed45-foldall-last.h5',
    ]
    if not all(os.path.exists(path) for path in pretrained_path):
        os.makedirs('./weights', exist_ok=True)
        dataset_url = 'https://www.kaggle.com/api/v1/datasets/download/hoyso48/islr-models'
        cmd = ['curl', '-L', '-o', './weights/islr-models.zip', dataset_url]
        print("Downloading weights from Kaggle...")
        try:
            subprocess.run(cmd, check=True)
            with zipfile.ZipFile('./weights/islr-models.zip', 'r') as zip_ref:
                zip_ref.extractall('./weights')
            os.remove('./weights/islr-models.zip')
            print("Weights downloaded and extracted successfully.")
        except Exception as e: raise RuntimeError(f"Failed to download or extract weights: {e}")