In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        os.path.join(dirname, filename)

print("Done!")
        
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

Done!


# Setup and Imports

In [2]:
import os
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
from tensorflow.keras.applications import MobileNetV3Large
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from pathlib import Path
import shutil
from datetime import datetime

print("🚀 ASL Training on Kaggle")
print("=" * 50)
print(f"TensorFlow Version: {tf.__version__}")

# Check GPU
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    print(f"✅ GPU Available: {len(gpus)} GPU(s)")
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
    tf.keras.mixed_precision.set_global_policy('mixed_float16')
    print("✅ Mixed precision enabled")
    HAS_GPU = True
else:
    print("❌ No GPU detected")
    HAS_GPU = False

print("=" * 50)

2025-09-30 07:40:01.499086: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1759218001.667963      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1759218001.717593      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


🚀 ASL Training on Kaggle
TensorFlow Version: 2.18.0
✅ GPU Available: 1 GPU(s)
✅ Mixed precision enabled


# Find Dataset

In [3]:
# Find dataset path
input_dir = Path('/kaggle/input/asl-alphabet')
dataset_path = None

if input_dir.exists():
    print("🔍 Searching for dataset...")
    for item in input_dir.iterdir():
        print(f"   Found: {item}")
        
        # Look for asl_alphabet_train folder (might be nested)
        possible_paths = [
            item / 'asl_alphabet_train',  # Direct path
            item,  # Root might be the dataset itself
        ]
        
        for path in possible_paths:
            if path.exists() and path.is_dir():
                # Check if this directory contains letter folders (A, B, C, etc.)
                subdirs = [d for d in path.iterdir() if d.is_dir()]
                letter_dirs = [d for d in subdirs if d.name in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' or d.name in ['space', 'del', 'nothing']]
                
                if len(letter_dirs) > 10:  # Should have at least 10 letter folders
                    dataset_path = str(path)
                    print(f"✅ Dataset found with {len(letter_dirs)} letter folders: {dataset_path}")
                    break
        
        if dataset_path:
            break

if not dataset_path:
    print("❌ Dataset not found!")
    print("Expected structure: dataset/A/, dataset/B/, dataset/C/, etc.")
else:
    print(f"📁 Using dataset: {dataset_path}")
    
    # Debug: Show what's actually in the dataset
    dataset_check = Path(dataset_path)
    print(f"\n🔍 Dataset contents:")
    for item in sorted(dataset_check.iterdir()):
        if item.is_dir():
            count = len(list(item.glob('*.jpg')) + list(item.glob('*.png')))
            print(f"  📁 {item.name}: {count} images")
        else:
            print(f"  📄 {item.name}")

🔍 Searching for dataset...
   Found: /kaggle/input/asl-alphabet/asl_alphabet_test
   Found: /kaggle/input/asl-alphabet/asl_alphabet_train
✅ Dataset found with 29 letter folders: /kaggle/input/asl-alphabet/asl_alphabet_train/asl_alphabet_train
📁 Using dataset: /kaggle/input/asl-alphabet/asl_alphabet_train/asl_alphabet_train

🔍 Dataset contents:
  📁 A: 3000 images
  📁 B: 3000 images
  📁 C: 3000 images
  📁 D: 3000 images
  📁 E: 3000 images
  📁 F: 3000 images
  📁 G: 3000 images
  📁 H: 3000 images
  📁 I: 3000 images
  📁 J: 3000 images
  📁 K: 3000 images
  📁 L: 3000 images
  📁 M: 3000 images
  📁 N: 3000 images
  📁 O: 3000 images
  📁 P: 3000 images
  📁 Q: 3000 images
  📁 R: 3000 images
  📁 S: 3000 images
  📁 T: 3000 images
  📁 U: 3000 images
  📁 V: 3000 images
  📁 W: 3000 images
  📁 X: 3000 images
  📁 Y: 3000 images
  📁 Z: 3000 images
  📁 del: 3000 images
  📁 nothing: 3000 images
  📁 space: 3000 images


# Prepare dataset

In [4]:
def prepare_dataset(input_dir, output_dir):
    print(f"📁 Preparing dataset from: {input_dir}")
    
    input_path = Path(input_dir)
    output_path = Path(output_dir)
    output_path.mkdir(parents=True, exist_ok=True)
    
    # Get all class directories - filter for actual letter/word folders
    all_dirs = [d for d in input_path.iterdir() if d.is_dir()]
    
    # Filter for valid ASL classes (letters + special classes)
    valid_classes = set('ABCDEFGHIJKLMNOPQRSTUVWXYZ') | {'space', 'del', 'nothing'}
    class_dirs = [d for d in all_dirs if d.name in valid_classes or len(d.name) == 1]
    
    print(f"Found {len(all_dirs)} total directories, {len(class_dirs)} valid classes")
    print(f"Valid classes: {sorted([d.name for d in class_dirs])}")
    
    if len(class_dirs) == 0:
        print("❌ No valid ASL class folders found!")
        print("Expected folders: A, B, C, ..., Z, space, del, nothing")
        print("Available folders:", [d.name for d in all_dirs])
        return None
    
    splits = ['train', 'val', 'test']
    split_ratios = [0.7, 0.15, 0.15]
    
    for split in splits:
        (output_path / split).mkdir(exist_ok=True)
    
    total_files = 0
    split_counts = {'train': 0, 'val': 0, 'test': 0}
    
    for class_dir in class_dirs:
        print(f"Processing: {class_dir.name}")
        
        image_files = list(class_dir.glob('*.jpg')) + list(class_dir.glob('*.png')) + list(class_dir.glob('*.jpeg'))
        image_files.sort()
        
        if len(image_files) == 0:
            print(f"  ⚠️  No images found in {class_dir.name}")
            continue
        
        print(f"  Found {len(image_files)} images")
        total_files += len(image_files)
        
        n_files = len(image_files)
        train_end = int(n_files * split_ratios[0])
        val_end = train_end + int(n_files * split_ratios[1])
        
        for split in splits:
            (output_path / split / class_dir.name).mkdir(exist_ok=True)
        
        for i, img_file in enumerate(image_files):
            if i < train_end:
                split = 'train'
            elif i < val_end:
                split = 'val'
            else:
                split = 'test'
            
            dst = output_path / split / class_dir.name / img_file.name
            shutil.copy2(img_file, dst)
            split_counts[split] += 1
    
    print(f"✅ Dataset prepared: {total_files:,} total files")
    print(f"   Train: {split_counts['train']:,}")
    print(f"   Val: {split_counts['val']:,}")
    print(f"   Test: {split_counts['test']:,}")
    return str(output_path)

# Prepare the dataset - IMPORTANT: Output must be in /kaggle/working/ (writable)
prepared_dataset = prepare_dataset(dataset_path, '/kaggle/working/dataset')

📁 Preparing dataset from: /kaggle/input/asl-alphabet/asl_alphabet_train/asl_alphabet_train
Found 29 total directories, 29 valid classes
Valid classes: ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'del', 'nothing', 'space']
Processing: N
  Found 3000 images
Processing: R
  Found 3000 images
Processing: space
  Found 3000 images
Processing: B
  Found 3000 images
Processing: I
  Found 3000 images
Processing: del
  Found 3000 images
Processing: F
  Found 3000 images
Processing: H
  Found 3000 images
Processing: E
  Found 3000 images
Processing: U
  Found 3000 images
Processing: M
  Found 3000 images
Processing: X
  Found 3000 images
Processing: K
  Found 3000 images
Processing: Q
  Found 3000 images
Processing: Y
  Found 3000 images
Processing: S
  Found 3000 images
Processing: G
  Found 3000 images
Processing: A
  Found 3000 images
Processing: O
  Found 3000 images
Processing: T
  Found 3000 images
Proce

# Create Dataset Loaders

In [5]:
def create_dataset(data_dir, batch_size=32, img_size=(200, 200)):
    dataset = tf.keras.utils.image_dataset_from_directory(
        data_dir,
        seed=42,
        image_size=img_size,
        batch_size=batch_size,
        label_mode='categorical'
    )
    return dataset, dataset.class_names

def augment_dataset(dataset, is_training=True):
    data_augmentation = tf.keras.Sequential([
        layers.Rescaling(1./255),
    ])
    
    if is_training:
        data_augmentation.add(layers.RandomRotation(0.1))
        data_augmentation.add(layers.RandomZoom(0.1))
        data_augmentation.add(layers.RandomContrast(0.2))
        data_augmentation.add(layers.RandomBrightness(0.2))
    
    dataset = dataset.map(
        lambda x, y: (data_augmentation(x), y),
        num_parallel_calls=tf.data.AUTOTUNE
    )
    
    return dataset.prefetch(tf.data.AUTOTUNE)

# Set parameters
BATCH_SIZE = 64 if HAS_GPU else 32
IMG_SIZE = (200, 200)
OUTPUT_DIR = '/kaggle/working/models'

print(f"Batch size: {BATCH_SIZE}")
print(f"Image size: {IMG_SIZE}")

# Load datasets
train_ds, class_names = create_dataset(f"{prepared_dataset}/train", BATCH_SIZE, IMG_SIZE)
val_ds, _ = create_dataset(f"{prepared_dataset}/val", BATCH_SIZE, IMG_SIZE)
test_ds, _ = create_dataset(f"{prepared_dataset}/test", BATCH_SIZE, IMG_SIZE)

print(f"✅ Loaded {len(class_names)} classes: {class_names}")

# Apply augmentation
train_ds = augment_dataset(train_ds, True)
val_ds = augment_dataset(val_ds, False)
test_ds = augment_dataset(test_ds, False)

print("✅ Data augmentation applied")

Batch size: 64
Image size: (200, 200)
Found 60900 files belonging to 29 classes.


I0000 00:00:1759218497.666809      36 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


Found 13050 files belonging to 29 classes.
Found 13050 files belonging to 29 classes.
✅ Loaded 29 classes: ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'del', 'nothing', 'space']
✅ Data augmentation applied


# Create Model

In [6]:
def create_model(num_classes, img_size=(200, 200)):
    base_model = MobileNetV3Large(
        input_shape=(*img_size, 3),
        alpha=1.0,
        minimalistic=False,
        include_top=False,
        weights='imagenet',
        pooling='avg'
    )
    
    base_model.trainable = False
    
    inputs = keras.Input(shape=(*img_size, 3))
    x = base_model(inputs, training=False)
    x = layers.Dropout(0.2)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    
    model = keras.Model(inputs, outputs)
    
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=0.001),
        loss=keras.losses.CategoricalCrossentropy(),
        metrics=['accuracy']
    )
    
    return model, base_model

# Calculate class weights
def calculate_class_weights(dataset, class_names):
    class_counts = np.zeros(len(class_names))
    total_samples = 0
    
    for images, labels in dataset:
        label_indices = tf.argmax(labels, axis=1)
        for idx in label_indices:
            class_counts[idx.numpy()] += 1
        total_samples += len(labels)
    
    class_weights = {}
    for i, count in enumerate(class_counts):
        if count > 0:
            class_weights[i] = total_samples / (len(class_names) * count)
        else:
            class_weights[i] = 1.0
    
    return class_weights

# Create model and calculate weights
model, base_model = create_model(len(class_names), IMG_SIZE)
class_weights = calculate_class_weights(train_ds, class_names)

print(f"✅ Model created with {model.count_params():,} parameters")
print(f"✅ Class weights calculated")

  return MobileNetV3(


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v3/weights_mobilenet_v3_large_224_1.0_float_no_top_v2.h5
[1m12683000/12683000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
✅ Model created with 3,024,221 parameters
✅ Class weights calculated


# Train Model - Phase 1

In [7]:
# Setup output directory
output_path = Path(OUTPUT_DIR)
output_path.mkdir(parents=True, exist_ok=True)

print("🎯 Phase 1: Training classifier head")

# Phase 1 callbacks
callbacks_phase1 = [
    ModelCheckpoint(
        str(output_path / 'best_model_phase1.keras'),
        monitor='val_accuracy',
        save_best_only=True,
        verbose=1
    ),
    EarlyStopping(
        monitor='val_accuracy',
        patience=5,
        restore_best_weights=True,
        verbose=1
    )
]

# Train phase 1 (15 epochs)
history_phase1 = model.fit(
    train_ds,
    epochs=15,
    validation_data=val_ds,
    class_weight=class_weights,
    callbacks=callbacks_phase1,
    verbose=1
)

print("✅ Phase 1 completed")

🎯 Phase 1: Training classifier head
Epoch 1/15


I0000 00:00:1759218808.436136     103 service.cc:148] XLA service 0x7c53c4002660 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1759218808.436913     103 service.cc:156]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
I0000 00:00:1759218810.880498     103 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m  5/952[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m27s[0m 29ms/step - accuracy: 0.0297 - loss: 3.7064 

I0000 00:00:1759218817.235495     103 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m952/952[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 309ms/step - accuracy: 0.0347 - loss: 3.4175
Epoch 1: val_accuracy improved from -inf to 0.03433, saving model to /kaggle/working/models/best_model_phase1.keras
[1m952/952[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m330s[0m 327ms/step - accuracy: 0.0347 - loss: 3.4175 - val_accuracy: 0.0343 - val_loss: 3.3587
Epoch 2/15
[1m951/952[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 304ms/step - accuracy: 0.0391 - loss: 3.3788
Epoch 2: val_accuracy improved from 0.03433 to 0.04851, saving model to /kaggle/working/models/best_model_phase1.keras
[1m952/952[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m297s[0m 311ms/step - accuracy: 0.0391 - loss: 3.3787 - val_accuracy: 0.0485 - val_loss: 3.3411
Epoch 3/15
[1m951/952[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 306ms/step - accuracy: 0.0454 - loss: 3.3700
Epoch 3: val_accuracy improved from 0.04851 to 0.09739, saving model to /kaggle/working/models/best_

# Train Model - Phase 2

In [8]:
print("🔥 Phase 2: Fine-tuning entire model")

# Unfreeze base model
base_model.trainable = True

# Recompile with lower learning rate
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.00002),
    loss=keras.losses.CategoricalCrossentropy(),
    metrics=['accuracy']
)

# Phase 2 callbacks
callbacks_phase2 = [
    ModelCheckpoint(
        str(output_path / 'best_model_final.keras'),
        monitor='val_accuracy',
        save_best_only=True,
        verbose=1
    ),
    EarlyStopping(
        monitor='val_accuracy',
        patience=8,
        restore_best_weights=True,
        verbose=1
    ),
    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.2,
        patience=3,
        min_lr=0.000001,
        verbose=1
    )
]

# Train phase 2 (15 more epochs) - FIXED with proper epoch counting
try:
    print("Starting Phase 2 training...")
    history_phase2 = model.fit(
        train_ds,
        epochs=30,  # Total epochs (15 + 15)
        validation_data=val_ds,
        class_weight=class_weights,
        callbacks=callbacks_phase2,
        verbose=1,
        initial_epoch=15  # Start from epoch 15, train to epoch 30
    )
    print("✅ Phase 2 completed successfully")
    
    # Check if training actually happened
    if len(history_phase2.history) == 0 or len(history_phase2.history.get('loss', [])) == 0:
        print("⚠️  Phase 2 didn't train any epochs (check epoch settings)")
        raise ValueError("No epochs trained in Phase 2")
        
except Exception as e:
    print(f"⚠️  Phase 2 training failed: {e}")
    print("Creating empty history for phase 2...")
    # Create empty history object
    class EmptyHistory:
        def __init__(self):
            self.history = {}
    history_phase2 = EmptyHistory()

# Combine histories - Fix for empty Phase 2 history
combined_history = {}

# Debug: Print available keys
print("Phase 1 history keys:", list(history_phase1.history.keys()))
print("Phase 2 history keys:", list(history_phase2.history.keys()))

# Check if Phase 2 training actually happened
if len(history_phase2.history) == 0:
    print("⚠️  Phase 2 training failed - using only Phase 1 history")
    combined_history = history_phase1.history.copy()
else:
    # Normal case - combine both phases
    common_keys = set(history_phase1.history.keys()) & set(history_phase2.history.keys())
    print(f"Common keys: {common_keys}")

    for key in common_keys:
        combined_history[key] = history_phase1.history[key] + history_phase2.history[key]

    # Add any missing keys from phase 1
    for key in history_phase1.history.keys():
        if key not in combined_history:
            if 'loss' in history_phase2.history:
                combined_history[key] = history_phase1.history[key] + [None] * len(history_phase2.history['loss'])
            else:
                combined_history[key] = history_phase1.history[key]
            print(f"⚠️  Added {key} from phase 1 only")

print("✅ Training completed!")

🔥 Phase 2: Fine-tuning entire model
Starting Phase 2 training...
Epoch 16/30
[1m952/952[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 340ms/step - accuracy: 0.0465 - loss: 5.2038
Epoch 16: val_accuracy improved from -inf to 0.03448, saving model to /kaggle/working/models/best_model_final.keras
[1m952/952[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m413s[0m 356ms/step - accuracy: 0.0465 - loss: 5.2023 - val_accuracy: 0.0345 - val_loss: 3.6559 - learning_rate: 2.0000e-05
Epoch 17/30
[1m952/952[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 310ms/step - accuracy: 0.1613 - loss: 2.9233
Epoch 17: val_accuracy improved from 0.03448 to 0.09655, saving model to /kaggle/working/models/best_model_final.keras
[1m952/952[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m304s[0m 318ms/step - accuracy: 0.1613 - loss: 2.9231 - val_accuracy: 0.0966 - val_loss: 3.5634 - learning_rate: 2.0000e-05
Epoch 18/30
[1m951/952[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 308ms/step

# Evaluate and Export