In [1]:
import os
import numpy as np
import torch
import torch.nn as nn
from torchvision import transforms, datasets
from torch.utils.data import Dataset, DataLoader, random_split
import pytorch_lightning as pl
from PIL import Image

import kagglehub

In [2]:
#params
BATCH_SIZE = 32
EPOCHS = 20
LEARNING_RATE = 0.001


# Download latest version
DATA_DIR = kagglehub.dataset_download("ardamavi/27-class-sign-language-dataset")

In [3]:
X_PATH = DATA_DIR + "/X.npy"
Y_PATH = DATA_DIR + "/Y.npy"  

IMAGE_SIZE = (64, 64)  

transform = transforms.Compose([
    transforms.ToTensor(), 
    transforms.Resize(IMAGE_SIZE),  
    transforms.Normalize((0.5,), (0.5,))  
])


X = np.load(X_PATH)
Y = np.load(Y_PATH)


print("Размер массива X:", X.shape)
print("Размер массива Y:", Y.shape)

Размер массива X: (22801, 128, 128, 3)
Размер массива Y: (22801, 1)


In [4]:
Y_flattened = np.array([y.item() if isinstance(y, np.ndarray) else y for y in Y])

unique_classes = np.unique(Y_flattened)
class_to_idx = {cls: idx for idx, cls in enumerate(unique_classes)}
Y_numeric = np.array([class_to_idx[cls] for cls in Y_flattened], dtype=np.int64)

num_classes = len(np.unique(Y_numeric))

In [5]:
class NumpyDataset(Dataset):
    def __init__(self, data, labels, transform=None):
        self.data = data
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        
        image = self.data[idx]
        label = self.labels[idx]

        
        if len(image.shape) == 3 and image.shape[-1] in [1, 3]:
            image = np.transpose(image, (2, 0, 1))

        image = np.transpose(image, (1, 2, 0))
        image = Image.fromarray((image * 255).astype(np.uint8))

        if self.transform:
            image = self.transform(image)

        return image, label


dataset = NumpyDataset(X, Y_numeric, transform=transform)


train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])


train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

for images, labels in train_loader:
    print("Размер батча изображений:", images.shape)
    print("Размер батча меток:", labels.shape)
    break


Размер батча изображений: torch.Size([32, 3, 64, 64])
Размер батча меток: torch.Size([32])


In [6]:
class GestureCNN(nn.Module):
    def __init__(self):
        super(GestureCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(128 * 8 * 8, 512)
        self.fc2 = nn.Linear(512, num_classes)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.pool(self.relu(self.conv3(x)))
        x = x.view(-1, 128 * 8 * 8)
        x = self.dropout(self.relu(self.fc1(x)))
        x = self.fc2(x)
        return x


class GestureClassifier(pl.LightningModule):
    def __init__(self):
        super(GestureClassifier, self).__init__()
        self.model = GestureCNN()
        self.loss_fn = nn.CrossEntropyLoss()
        self.learning_rate = LEARNING_RATE

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        inputs, labels = batch
        outputs = self(inputs)
        loss = self.loss_fn(outputs, labels)
        self.log("train_loss", loss)
        return loss

    def validation_step(self, batch, batch_idx):
        inputs, labels = batch
        outputs = self(inputs)
        loss = self.loss_fn(outputs, labels)
        self.log("val_loss", loss)

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=self.learning_rate)

In [7]:
model = GestureClassifier()
trainer = pl.Trainer(max_epochs=EPOCHS, accelerator="gpu", devices=1, logger=True)
trainer.fit(model, train_loader, val_loader)


torch.save(model.model.state_dict(), "gesture_model.pth")

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
C:\Users\Alexg\anaconda3\lib\site-packages\pytorch_lightning\trainer\connectors\logger_connector\logger_connector.py:76: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default

  | Name    | Type             | Params | Mode 
-----------------------------------------------------
0 | model   | GestureCNN       | 4.3 M  | train
1 | loss_fn | CrossEntropyLoss | 0      | train
-----------------------------------------------------
4.3 M     Trainable params
0         Non-trainable params
4.3 M     Total params
17.208    Total estimated mo

Sanity Checking: |                                                                               | 0/? [00:00<…

C:\Users\Alexg\anaconda3\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
C:\Users\Alexg\anaconda3\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Training: |                                                                                      | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…


Detected KeyboardInterrupt, attempting graceful shutdown ...


NameError: name 'exit' is not defined

In [None]:
test = {"twofingers.png": 2, "5fingers.jpg": 5, "3fing.jpg": 3, "wtf.jpg": "?"}

for name in test:
    image = Image.open(name).convert("RGB")
    
    transform = transforms.Compose([
        transforms.Resize((64, 64)),
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])
    image_tensor = transform(image).unsqueeze(0) 

    
    with torch.no_grad():
        outputs = model(image_tensor)
        _, predicted = torch.max(outputs, 1)
    
    if predicted.item() == test[name]:
        print("Predicted class:", predicted.item(), test[name], "Совпало!")
    else:
        print("Predicted class:", predicted.item(), test[name], "Не совпало :(")