<a href="https://colab.research.google.com/github/SCCSMARTCODE/Deep-Learning-00/blob/main/ASL_Alphabet/ASL_Alphabet_notebook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
"""
Extracting my zip file
"""
import zipfile

PATH="/content/drive/MyDrive/Deep Learning/ASL Alphabet/archive.zip"
EXTRACTED_FILE_PATH="."

with zipfile.ZipFile(PATH, 'r') as f:
    f.extractall(EXTRACTED_FILE_PATH)

In [2]:
import torch
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, random_split
import torchvision.transforms as tt
from torch import nn
from torch.optim import Adam
from torch.optim.lr_scheduler import OneCycleLR
from torch.cuda.amp import autocast, GradScaler


TRAINING_DATA_PATH = "asl_alphabet_train/asl_alphabet_train"
TESTING_DATA_PATH = "asl_alphabet_test/"

In [3]:
stats = ([0.4666, 0.4576, 0.4699], [0.1974, 0.2367, 0.2523])

train_transform = tt.Compose([
    tt.Resize((128, 128)),
    tt.ToTensor(),
    tt.RandomHorizontalFlip(),
    tt.RandomRotation(degrees=10),
    tt.Normalize(*stats, inplace=False)
])

raw_train_dataset = ImageFolder(root=TRAINING_DATA_PATH, transform=train_transform)
raw_test_dataset = ImageFolder(root=TESTING_DATA_PATH, transform=tt.ToTensor())

In [4]:
print(len(raw_train_dataset))

87000


In [72]:
train_dataset, val_dataset = random_split(raw_train_dataset, [86000, 1000])

train_dl = DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=2, pin_memory=True)
val_dl = DataLoader(val_dataset, batch_size=128, num_workers=2, pin_memory=True)
test_dl = DataLoader(raw_test_dataset, batch_size=8)

In [48]:
"""
Designing The ASLNetwork
"""

class ASLNetwork(nn.Module):
    def __init__(self):
        super(ASLNetwork, self).__init__()

        # Initial Convolutional Block
        self.initial_conv_block = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(64),

            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.BatchNorm2d(128),

            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.BatchNorm2d(128)
        )

        # Residual Block 1
        self.residual_block1 = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(128),

            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(128),

            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(128),
        )

        # Intermediate Convolutional Block
        self.intermediate_conv_block = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(512),

            nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.BatchNorm2d(1024),

            nn.Conv2d(in_channels=1024, out_channels=1024, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.BatchNorm2d(1024)
        )

        # Residual Block 2
        self.residual_block2 = nn.Sequential(
            nn.Conv2d(in_channels=1024, out_channels=1024, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(1024),

            nn.Conv2d(in_channels=1024, out_channels=1024, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(1024),

            nn.Conv2d(in_channels=1024, out_channels=1024, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(1024),
        )

        # Final Convolutional Block
        self.final_conv_block = nn.Sequential(
            nn.Conv2d(in_channels=1024, out_channels=2048, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.BatchNorm2d(2048),

            nn.Conv2d(in_channels=2048, out_channels=2048, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.BatchNorm2d(2048),
        )

        # Fully Connected Layers
        self.fully_connected = nn.Sequential(
            nn.Flatten(),
            nn.Linear(2048 * 2 * 2, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.BatchNorm1d(512),

            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.BatchNorm1d(256),

            nn.Linear(256, 29),
        )

    def forward(self, x):
        x = self.initial_conv_block(x)
        x = x + self.residual_block1(x)
        x = self.intermediate_conv_block(x)
        x = x + self.residual_block2(x)
        x = self.final_conv_block(x)
        x = self.fully_connected(x)
        return x



# Instantiate the model and move to GPU if available
model = ASLNetwork()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


ASLNetwork(
  (initial_conv_block): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU()
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (residual_block1): Sequential(
    (0): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Conv2d(128

In [74]:
LEARNING_RATE=1e-4
MAX_LR=0.0001
EPOCHS=1
WEIGHT_DECAY = 1e-4
BETA1 = 0.9
BETA2 = 0.999

In [73]:
optimizer = Adam(model.parameters(), lr=LEARNING_RATE, betas=(BETA1, BETA2), weight_decay=WEIGHT_DECAY) #Issues with this work
scheduler = OneCycleLR(optimizer, MAX_LR, epochs=EPOCHS, steps_per_epoch=len(train_dl))
criterion = torch.nn.CrossEntropyLoss()
scaler = GradScaler()

In [75]:
def accuracy(batch_loader, model):
    model.eval()
    accurate_pred = 0
    pred_count = 0
    with torch.no_grad():
        for inputs, labels in batch_loader:
            inputs = inputs.to(device).half()
            labels = labels.to(device)

            with autocast():
                preds = model(inputs)
                # print(nn.Softmax(1)(preds))
                percent, predicted = torch.max(preds, 1)
            accurate_pred += (predicted == labels).sum().item()
            pred_count += labels.size(0)

    return accurate_pred / pred_count


def evaluate(batch_loader, model, criterion):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for inputs, labels in batch_loader:
            inputs = inputs.to(device).half()
            labels = labels.to(device)

            with autocast():
                preds = model(inputs)
                loss = criterion(preds, labels)
            total_loss += loss.item()
    return total_loss / len(batch_loader)


In [76]:
def fit(epochs, train_dl, val_dl, optimizer, criterion, scheduler, model):
    train_losses = []
    val_losses = []
    val_accuracies = []

    for epoch in range(epochs):
        model.train()
        train_loss = 0

        for inputs, labels in train_dl:
            inputs = inputs.to(device).half()
            labels = labels.to(device)

            optimizer.zero_grad()

            with autocast():
                pred = model(inputs)
                loss = criterion(pred, labels)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            scheduler.step()

            train_loss += loss.item()

        train_loss /= len(train_dl)
        train_losses.append(train_loss)

        val_loss = evaluate(val_dl, model, criterion)
        val_accuracy = accuracy(val_dl, model)

        val_losses.append(val_loss)
        val_accuracies.append(val_accuracy)

        print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy * 100:.2f}%")

    return train_losses, val_losses, val_accuracies

In [77]:
train_losses, val_losses, val_accuracies = fit(EPOCHS, train_dl, val_dl, optimizer, criterion, scheduler, model)

Epoch 1/1, Train Loss: 0.0014, Val Loss: 0.0001, Val Accuracy: 100.00%


In [78]:
torch.save(model.state_dict(), "/content/drive/MyDrive/Deep Learning/ASL Alphabet/parameter.pth")

In [66]:
train_losses, val_losses, val_accuracies = fit(EPOCHS, val_dl, val_dl, optimizer, criterion, scheduler, model)

Epoch 1/3, Train Loss: 0.0016, Val Loss: 0.0002, Val Accuracy: 100.00%
Epoch 2/3, Train Loss: 0.0019, Val Loss: 0.0002, Val Accuracy: 100.00%
Epoch 3/3, Train Loss: 0.0014, Val Loss: 0.0002, Val Accuracy: 100.00%


In [80]:
print(evaluate(val_dl, model, criterion=criterion))

8.941562646214152e-05


In [14]:
torch.cuda.empty_cache()