In [1]:
from modules.data_visualizer import ImageVisualizer
from modules.data_loader import DataLoaderCreator
from modules.data_prediction import ComprehensiveImagePredictor
from modules.model_eval import ModelEvaluator
from modules.model_performance import ModelPerformanceVisualizer
from modules.model_trainer import ModelTrainer
from modules.model_saver import ModelSaver

#### Split DS

In [6]:
from modules.data_split import DatasetImageSplitter
"""
SPLITTED_DATASET = "DATASET"
PATH = "path/to/your/source/directory"

# Create an instance of DatasetSplitter

# Use the default 70-30 split
splitter()

# Or specify a different split, e.g., 80-20
splitter(train_size=0.8)

# You can also call split_dataset directly if you prefer
splitter.split_dataset(train_size=0.75)
"""

SPLITTED_DATASET = "DATASET"
SRC_DIR = r'D:\ASL_RECOGNITION\ANDROID__DS'
splitter = DatasetImageSplitter(SRC_DIR, SPLITTED_DATASET)
splitter()


Found 300 images in D:\ASL_RECOGNITION\ANDROID__DS\0
Splitting 300 images: 210 for training and 90 for testing in D:\ASL_RECOGNITION\ANDROID__DS\0
--------------------------------------------------------------------
COPYING D:\ASL_RECOGNITION\ANDROID__DS\0\0_20240917_213331 (14).jpg TO DATASET\train\0
COPYING D:\ASL_RECOGNITION\ANDROID__DS\0\0_20240917_213327 (16).jpg TO DATASET\train\0
COPYING D:\ASL_RECOGNITION\ANDROID__DS\0\0_20240917_213325 (9).jpg TO DATASET\train\0
COPYING D:\ASL_RECOGNITION\ANDROID__DS\0\0_20240917_213332 (2).jpg TO DATASET\train\0
COPYING D:\ASL_RECOGNITION\ANDROID__DS\0\0_20240917_213324 (5).jpg TO DATASET\train\0
COPYING D:\ASL_RECOGNITION\ANDROID__DS\0\0_20240917_213324 (10).jpg TO DATASET\train\0
COPYING D:\ASL_RECOGNITION\ANDROID__DS\0\0_20240917_213330 (20).jpg TO DATASET\train\0
COPYING D:\ASL_RECOGNITION\ANDROID__DS\0\0_20240917_213329 (13).jpg TO DATASET\train\0
COPYING D:\ASL_RECOGNITION\ANDROID__DS\0\0_20240917_213334 (7).jpg TO DATASET\train\0
COPYI

In [2]:
import torch
from torchvision import models
from torchvision import transforms
from torchvision.transforms import InterpolationMode
from torch import nn

weights = models.MobileNet_V2_Weights.DEFAULT
mnet_transform = weights.transforms()
print(mnet_transform)

print(f"CUDA available: {torch.cuda.is_available()}")

if torch.cuda.is_available():
    print(f"CUDA version: {torch.version.cuda}")
    print(f"Device name: {torch.cuda.get_device_name(0)}")
    print(f"Device count: {torch.cuda.device_count()}")
else:
    print("CUDA is not available. Using CPU.")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
ASL_mobilenet = models.mobilenet_v2(weights=weights).to(device)

ImageClassification(
    crop_size=[224]
    resize_size=[232]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BILINEAR
)
CUDA available: True
CUDA version: 12.1
Device name: NVIDIA GeForce GTX 1070
Device count: 1


In [11]:
from torch.optim.lr_scheduler import ReduceLROnPlateau

# Freeze Last Layer
for param in ASL_mobilenet.features.parameters():
    param.requires_grad = False

loader_creator = DataLoaderCreator(train_dir=train_dir,
                                   test_dir=test_dir,
                                   transform=train_transforms)

train_dataloader, test_dataloader, class_names = loader_creator.create_dataloaders(batch_size=32)

class_names_count = len(loader_creator.get_class_names())

print(f"Class names: {loader_creator.get_class_names()}")
print(f"Class names Count: {class_names_count}")

trainer = ModelTrainer()

NUM_EPOCHS = 25
EARLY_STOPPING = 2
LEARNING_RATE = 0.001

ASL_mobilenet.classifier = torch.nn.Sequential(
    torch.nn.Dropout(p=0.2, inplace=True), 
    torch.nn.Linear(in_features=1280, 
                    out_features=class_names_count,
                    bias=True)).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=ASL_mobilenet.parameters(), lr=LEARNING_RATE)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, threshold=0.001)

TRAINED_MODEL, RESULTS = trainer.train(
    model=ASL_mobilenet,
    train_loader=train_dataloader,
    test_loader=test_dataloader,
    optimizer=optimizer,
    loss_fn=criterion,
    epochs=NUM_EPOCHS,
    scheduler=scheduler,
    patience=EARLY_STOPPING 
)

visualizer = ModelPerformanceVisualizer(RESULTS)
y_true, y_pred = visualizer.get_preds(model=TRAINED_MODEL, dataloader=test_dataloader, device=device)
visualizer.plot_all(y_true=y_true, y_pred=y_pred, classes=class_names, save_path="model_performance/plot.jpg")

Class names: ['0', '1', '10', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
Class names Count: 37
ModelTrainer initialized with device: cuda
CUDA device: NVIDIA GeForce GTX 1070
CUDA available: True
Current CUDA device: 0
Training on device: cuda
Model is on device: cuda:0
Moving model to cuda
After moving, model is on device: cuda:0


  0%|          | 0/100 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.7644 | train_acc: 0.9348 | test_loss: 0.0801 | test_acc: 0.9997 | lr: 0.001000
Epoch: 2 | train_loss: 0.0616 | train_acc: 1.0000 | test_loss: 0.0285 | test_acc: 1.0000 | lr: 0.001000
Epoch: 3 | train_loss: 0.0281 | train_acc: 0.9999 | test_loss: 0.0151 | test_acc: 1.0000 | lr: 0.001000
Epoch: 4 | train_loss: 0.0166 | train_acc: 1.0000 | test_loss: 0.0095 | test_acc: 1.0000 | lr: 0.001000
Epoch: 5 | train_loss: 0.0112 | train_acc: 1.0000 | test_loss: 0.0066 | test_acc: 1.0000 | lr: 0.001000
Epoch: 6 | train_loss: 0.0083 | train_acc: 1.0000 | test_loss: 0.0048 | test_acc: 1.0000 | lr: 0.001000
Epoch: 7 | train_loss: 0.0063 | train_acc: 1.0000 | test_loss: 0.0038 | test_acc: 1.0000 | lr: 0.001000
Epoch: 8 | train_loss: 0.0051 | train_acc: 1.0000 | test_loss: 0.0029 | test_acc: 1.0000 | lr: 0.001000
Epoch: 9 | train_loss: 0.0038 | train_acc: 1.0000 | test_loss: 0.0026 | test_acc: 1.0000 | lr: 0.001000
Epoch: 10 | train_loss: 0.0033 | train_acc: 1.0000 | test_loss: 

KeyboardInterrupt: 

In [None]:
saver = ModelSaver(target_dir="models")
saver.save_model(model=ASL_mobilenet, model_name="2_CLASSES_MODEL.pth")
loaded_model = saver.load_model(model=ASL_mobilenet, model_name="2_CLASSES_MODEL.pth")
# loaded_model.state_dict()

In [None]:
predictor = ComprehensiveImagePredictor(loaded_model, class_names, train_transforms)
predictor.predict_and_visualize_multiple(model=loaded_model, directory=test_dir, transformer=train_transforms, num_samples=9, seed=None)

file_path = "./2_CLASS_DS/test/1"

In [14]:
evaluator = ModelEvaluator(loaded_model)
results = evaluator.evaluate_and_report(test_dataloader, class_names)

print(f"Accuracy: {results['accuracy']:.4f}")
print("Classification Report:")
print(results['classification_report'])

Accuracy: 1.0000
Classification Report:
              precision    recall  f1-score   support

           1       1.00      1.00      1.00        20
           A       1.00      1.00      1.00        20

    accuracy                           1.00        40
   macro avg       1.00      1.00      1.00        40
weighted avg       1.00      1.00      1.00        40



#### Prep model for Android

#### Save for android

In [18]:
import torch
import torchvision.models as models
import os

OUT_FEATURES = 2
MODEL_NAME = "2_CLASSES_MODEL.pth"
MODEL_PATH = os.path.join("models", MODEL_NAME)
PATH_TO_SAVE_MODEL = os.path.join("models", "android", MODEL_NAME.replace(".pth", ""))

# Ensure the directory exists
os.makedirs(os.path.dirname(PATH_TO_SAVE_MODEL), exist_ok=True)

model = models.mobilenet_v2(pretrained=False)
model.classifier[1] = torch.nn.Linear(in_features=1280, out_features=OUT_FEATURES)

# Load the state dict from the file
model.load_state_dict(torch.load(MODEL_PATH))

model.eval()

example_input = torch.rand(1, 3, 232, 232)  # Batch size 1, 3 channels, 232x232 pixels
traced_model = torch.jit.trace(model, example_input)

# Save the traced model
traced_model._save_for_lite_interpreter(f"{PATH_TO_SAVE_MODEL}.ptl")
print(f"Model saved to {PATH_TO_SAVE_MODEL}.ptl")

Model saved to models\android\2_CLASSES_MODEL.ptl


In [20]:
import torch

lite_model = torch.jit.load(f"./models/android/2_CLASSES_MODEL.ptl")

example_input = torch.rand(1, 3, 224, 224)
output = lite_model(example_input)

print(f"Output shape: {output.shape}")

Output shape: torch.Size([1, 2])
