In [2]:
!pip install tensorflow opencv-python mediapipe matplotlib numpy



In [3]:
# Step 1: Install Required Libraries
!pip install kaggle zipfile36

# Step 2: Import Required Libraries
import os
import json
import zipfile

# Step 3: Set Up Kaggle API Credentials
kaggle_api = {
    "username": "YOUR_USERNAME",  # Replace with your Kaggle username
    "key": "YOUR_API_KEY"         # Replace with your Kaggle API key
}

# Store Kaggle credentials securely in your home directory
kaggle_dir = os.path.expanduser('~/.kaggle')
os.makedirs(kaggle_dir, exist_ok=True)

with open(os.path.join(kaggle_dir, 'kaggle.json'), 'w') as f:
    json.dump(kaggle_api, f)

os.chmod(os.path.join(kaggle_dir, 'kaggle.json'), 0o600)

# Step 4: Download the ASL Dataset Using Kaggle CLI
!kaggle datasets download -d grassknoted/asl-alphabet -p "./"

# Step 5: Unzip the Dataset
dataset_zip = "asl-alphabet.zip"
extract_path = "./asl_dataset"

if not os.path.exists(extract_path):
    os.makedirs(extract_path)

with zipfile.ZipFile(dataset_zip, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print("✅ Dataset downloaded and extracted successfully to", extract_path)



Dataset URL: https://www.kaggle.com/datasets/grassknoted/asl-alphabet
License(s): GPL-2.0
asl-alphabet.zip: Skipping, found more recently modified local copy (use --force to force download)
✅ Dataset downloaded and extracted successfully to ./asl_dataset


In [5]:
import random
import shutil
from pathlib import Path

# Define paths
original_path = Path("asl_dataset/asl_alphabet_train/asl_alphabet_train")
train_path = Path("asl_dataset/asl_alphabet_train")

# Make directories for train and test
train_dir = train_path / "train"
test_dir = train_path / "test"

# Create train and test directories if they don't exist
train_dir.mkdir(parents=True, exist_ok=True)
test_dir.mkdir(parents=True, exist_ok=True)

# Define the number of images per class
images_per_class = 50

# Loop through the A, B, C, D, E classes and sample 50 images
for label in ["A", "B", "C", "D", "E"]:
    all_images = list((original_path / label).glob("*.jpg"))
    selected_images = random.sample(all_images, images_per_class)

    # Create subdirectories in train and test
    (train_dir / label).mkdir(parents=True, exist_ok=True)
    (test_dir / label).mkdir(parents=True, exist_ok=True)

    # Move images to train and test
    for i, img in enumerate(selected_images):
        if i < int(images_per_class * 0.8):  # 80% for training
            shutil.move(str(img), str(train_dir / label / img.name))
        else:  # 20% for testing
            shutil.move(str(img), str(test_dir / label / img.name))


In [6]:
import tensorflow as tf

# Load the training and testing datasets
train_data = tf.keras.preprocessing.image_dataset_from_directory(
    'asl_dataset/asl_alphabet_train',
    image_size=(224, 224),  # Resize for MobileNet
    batch_size=32
)

test_data = tf.keras.preprocessing.image_dataset_from_directory(
    'asl_dataset/asl_alphabet_train',
    image_size=(224, 224),
    batch_size=32
)

print(f"Training data: {len(train_data)} batches")
print(f"Testing data: {len(test_data)} batches")


Found 87000 files belonging to 3 classes.
Found 87000 files belonging to 3 classes.
Training data: 2719 batches
Testing data: 2719 batches


In [19]:
!conda create -n asl_env python=3.10 -y
!conda activate asl_env
!pip install numpy==1.25.2 pandas==2.0.3 pillow==9.5.0
!pip install tensorflow==2.13.0 opencv-python mediapipe matplotlib


Channels:
 - defaults
Platform: osx-arm64
Collecting package metadata (repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: /opt/anaconda3/envs/asl_env

  added / updated specs:
    - python=3.10


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    ca-certificates-2025.2.25  |       hca03da5_0         131 KB
    openssl-3.0.16             |       h02f6b3c_0         4.3 MB
    pip-25.1                   |     pyhc872135_2         1.3 MB
    python-3.10.16             |       hb885b13_1        12.0 MB
    setuptools-78.1.1          |  py310hca03da5_0         1.7 MB
    tzdata-2025b               |       h04d1e81_0         116 KB
    wheel-0.45.1               |  py310hca03da5_0         116 KB
    xz-5.6.4                   |       h80987f9_1         289 KB
    ------------------------------------------------------------
                                  

In [8]:
!pip install pydrive

from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth  # This line won't work in Jupyter, remove it
from oauth2client.client import GoogleCredentials

# Manual auth steps required here if using locally


ModuleNotFoundError: No module named 'google.colab'

In [21]:
import os
print(os.getcwd())

/Users/pritul_vachhani/Documents


In [None]:
import tensorflow as tf

# Paths to the train and test directories
train_dir = 'asl_dataset/asl_alphabet_train/train'
test_dir = 'asl_dataset/asl_alphabet_train/test'

# Load the training data (A, B, C, D, E)
train_data = tf.keras.preprocessing.image_dataset_from_directory(
    train_dir,
    image_size=(224, 224),  # Resize to fit MobileNetV2 input size
    batch_size=32,
    labels='inferred',  # Automatically label based on folder names
    label_mode='int'
)

# Load the test data (A, B, C, D, E)
test_data = tf.keras.preprocessing.image_dataset_from_directory(
    test_dir,
    image_size=(224, 224),  # Resize to fit MobileNetV2 input size
    batch_size=32,
    labels='inferred',  # Automatically label based on folder names
    label_mode='int'
)

# Print out the number of batches and class names
print(f"Training data: {len(train_data)} batches")
print(f"Testing data: {len(test_data)} batches")
print(f"Class names: {train_data.class_names}")  # Ensure class names are A, B, C, D, E


In [None]:
import tensorflow as tf

# Load the pre-trained MobileNetV2 model, excluding the top classification layers
base_model = tf.keras.applications.MobileNetV2(
    input_shape=(224, 224, 3),
    include_top=False,  # Remove the original classifier
    weights='imagenet'  # Use ImageNet weights
)

# Freeze the layers of the base model to prevent them from being trained
base_model.trainable = False

# Create the new model on top of the base model
model = tf.keras.Sequential([
    base_model,
    tf.keras.layers.GlobalAveragePooling2D(),  # Global average pooling layer
    tf.keras.layers.Dense(5, activation='softmax')  # 5 output classes (A-E)
])

# Compile the model
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# Display the model summary
model.summary()


# Train the model for 10 epochs
history = model.fit(
    train_data,
    epochs=10,
    validation_data=test_data
)

# Plot the training and validation accuracy
import matplotlib.pyplot as plt

plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.legend()
plt.show()


In [None]:
# Saving Model
model.save('ASL_MODEL_#1.keras')