In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from PIL import Image
import cv2
import numpy as np
import os
from sklearn.model_selection import train_test_split

# GPU setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [4]:
class BrailleDataset(Dataset):
    def __init__(self, dataset_path, transform=None):
        self.transform = transform
        self.images = []
        self.labels = []
        
        subdirs = ['books', 'handwritten']
        
        for subdir in subdirs:
            subdir_path = os.path.join(dataset_path, subdir)
            train_txt_path = os.path.join(subdir_path, 'train.txt')
            
            if not os.path.exists(train_txt_path):
                print(f"Warning: {train_txt_path} not found. Skipping.")
                continue
            
            with open(train_txt_path, 'r') as f:
                train_files = f.read().splitlines()
            
            for file in train_files:
                img_path = os.path.join(subdir_path, file)
                csv_path = os.path.splitext(img_path)[0] + '.csv'
                
                if not os.path.exists(img_path) or not os.path.exists(csv_path):
                    continue
                
                img = Image.open(img_path).convert('L')
                
                with open(csv_path, 'r') as f:
                    for line in f:
                        try:
                            left, top, right, bottom, label = map(float, line.strip().split(';'))
                            char_img = img.crop((int(left*img.width), int(top*img.height),
                                                 int(right*img.width), int(bottom*img.height)))
                            self.images.append(char_img)
                            self.labels.append(int(label))
                        except ValueError:
                            print(f"Warning: Invalid line in {csv_path}. Skipping.")

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]
        
        if self.transform:
            image = self.transform(image)
        
        return image, label

In [8]:
class BrailleCNN(nn.Module):
    def __init__(self, num_classes):
        super(BrailleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, num_classes)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(-1, 64 * 7 * 7)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [6]:
# Define transformations
transform = transforms.Compose([
    transforms.Resize((28, 28)),
    transforms.ToTensor(),
])

# Load dataset
dataset_path = os.path.join(os.getcwd(), "AngelinaDataset-master")
dataset = BrailleDataset(dataset_path, transform=transform)

# Split dataset
train_data, test_data = train_test_split(dataset, test_size=0.2, random_state=42)

# Create data loaders
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_loader = DataLoader(test_data, batch_size=64, shuffle=False)

print(f"Training samples: {len(train_data)}")
print(f"Testing samples: {len(test_data)}")

Training samples: 46040
Testing samples: 11511


In [7]:
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"Test Accuracy: {accuracy:.2f}%")

NameError: name 'model' is not defined

In [9]:
def translate_braille_realtime():
    cap = cv2.VideoCapture(0)
    
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        
        # Preprocess the frame
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        pil_img = Image.fromarray(gray)
        tensor_img = transform(pil_img).unsqueeze(0).to(device)
        
        # Predict using the model
        with torch.no_grad():
            output = model(tensor_img)
            prediction = torch.argmax(output, dim=1).item()
        
        # Convert prediction to Braille character
        braille_char = chr(0x2800 + prediction)
        
        # Display result
        cv2.putText(frame, f"Detected: {braille_char}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        cv2.imshow('Braille Translator', frame)
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    
    cap.release()
    cv2.destroyAllWindows()

# Uncomment the following line to start real-time translation
# translate_braille_realtime()