In [44]:
import cv2
import numpy as np
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
import torchvision.transforms as transforms
from PIL import Image
import os


In [46]:
# GPU setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [70]:
def load_dataset(dataset_path):
    images = []
    labels = []
    
    # Define subdirectories to process
    subdirs = ['books', 'handwritten']
    
    # Define a fixed size for Braille character images
    BRAILLE_CHAR_SIZE = (28, 28)
    
    for subdir in subdirs:
        subdir_path = os.path.join(dataset_path, subdir)
        train_txt_path = os.path.join(subdir_path, 'train.txt')
        
        if not os.path.exists(train_txt_path):
            print(f"Warning: {train_txt_path} not found. Skipping.")
            continue
        
        # Read train.txt to get list of training images
        with open(train_txt_path, 'r') as f:
            train_files = f.read().splitlines()
        
        for file in train_files:
            img_path = os.path.join(subdir_path, file)
            csv_path = os.path.splitext(img_path)[0] + '.csv'
            
            if not os.path.exists(img_path):
                print(f"Warning: Image file {img_path} not found. Skipping.")
                continue
            
            if not os.path.exists(csv_path):
                print(f"Warning: CSV file {csv_path} not found. Skipping.")
                continue
            
            try:
                # Load image
                img = Image.open(img_path).convert('L')  # Convert to grayscale
                img = transforms.ToTensor()(img)
                
                # Load CSV annotation
                with open(csv_path, 'r') as f:
                    for line in f:
                        try:
                            left, top, right, bottom, label = map(float, line.strip().split(';'))
                            
                            # Extract Braille character
                            char_img = img[:, int(top*img.shape[1]):int(bottom*img.shape[1]), 
                                           int(left*img.shape[2]):int(right*img.shape[2])]
                            
                            # Resize the Braille character to a fixed size
                            char_img_resized = transforms.Resize(BRAILLE_CHAR_SIZE)(char_img)
                            
                            # Flatten the image for SVM input
                            char_vector = char_img_resized.flatten().numpy()
                            
                            images.append(char_vector)
                            labels.append(int(label))
                        except ValueError:
                            print(f"Warning: Invalid line in {csv_path}. Skipping.")
            except Exception as e:
                print(f"Error processing {img_path}: {str(e)}")
    
    if not images:
        raise ValueError("No valid images found in the dataset.")
    
    return np.array(images), np.array(labels)

def train_svm_model(X_train, y_train):
    model = svm.SVC(kernel='rbf', C=1.0, random_state=42)
    model.fit(X_train, y_train)
    return model

def translate_braille_realtime(model, scaler):
    cap = cv2.VideoCapture(0)
    
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        
        # Preprocess the frame
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        resized = cv2.resize(gray, (224, 224))
        normalized = resized / 255.0
        
        # Detect Braille characters (You might need to implement a detection algorithm here)
        # For simplicity, let's assume we're translating the whole frame
        flattened = normalized.flatten()
        scaled = scaler.transform([flattened])
        
        # Predict using SVM
        prediction = model.predict(scaled)[0]
        
        # Convert prediction to Braille character
        braille_char = chr(0x2800 + prediction)  # Unicode Braille patterns start at U+2800
        
        # Display result
        cv2.putText(frame, f"Detected: {braille_char}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        cv2.imshow('Braille Translator', frame)
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    
    cap.release()
    cv2.destroyAllWindows()


if __name__ == "__main__":
    dataset_path = os.path.join(os.getcwd(), "AngelinaDataset-master")
    
    # Load and preprocess dataset
    print("Loading dataset...")
    try:
        X, y = load_dataset(dataset_path)
        
        # Split the data
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        # Normalize the data
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)
        
        # Train SVM model
        print("Training SVM model...")
        model = train_svm_model(X_train_scaled, y_train)
        
        # Evaluate the model
        accuracy = model.score(X_test_scaled, y_test)
        print(f"Model accuracy: {accuracy:.2f}")
        
        # Start real-time translation
        print("Starting real-time translation. Press 'q' to quit.")
        translate_braille_realtime(model, scaler)
    except Exception as e:
        print(f"An error occurred: {str(e)}")


Loading dataset...
Training SVM model...
Model accuracy: 0.96
Starting real-time translation. Press 'q' to quit.
An error occurred: X has 50176 features, but StandardScaler is expecting 784 features as input.
