In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import torch, torch.nn as nn, torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

In [2]:
one = pd.read_csv("C:/Users/okeiy/Downloads/Gesture_Control/BSL/one_hand_dataset.csv")
two = pd.read_csv("C:/Users/okeiy/Downloads/Gesture_Control/BSL/two_hand_dataset.csv")

print("One-hand dataset:", one.shape)
print(one.head())

print("\nTwo-hand dataset:", two.shape)
print(two.head())

One-hand dataset: (10803, 64)
   0.24068817496299744  0.4678746461868286  6.431071142287692e-08  \
0             0.199214            0.830371           1.001268e-07   
1             0.650582            0.463097          -5.370000e-08   
2             0.200531            0.685270           3.237020e-07   
3             0.605458            0.464886           1.527884e-07   
4             0.525861            0.423194           9.633758e-08   

   0.2664448916912079  0.4628382921218872  -0.002034491  0.294532448  \
0            0.228846            0.817112     -0.001888     0.251727   
1            0.653664            0.426893      0.017183     0.663378   
2            0.258192            0.665581     -0.023378     0.320593   
3            0.628675            0.457653      0.001207     0.649279   
4            0.550582            0.418285      0.000873     0.572819   

   0.42700284719467163  -0.007113528  0.3162025511264801  ...  \
0             0.777591     -0.009224            0.272395 

In [3]:
print("One-hand dataset columns:", one.columns.tolist())
print("Two-hand dataset columns:", two.columns.tolist())

print("\nFirst rows (one hand):")
print(one.head())

print("\nFirst rows (two hands):")
print(two.head())

One-hand dataset columns: ['0.24068817496299744', '0.4678746461868286', '6.431071142287692e-08', '0.2664448916912079', '0.4628382921218872', '-0.002034491', '0.294532448', '0.42700284719467163', '-0.007113528', '0.3162025511264801', '0.40683475136756897', '-0.014570555', '0.33585307002067566', '0.4039977192878723', '-0.021814512', '0.2818275988101959', '0.34472230076789856', '-0.002686182', '0.29636070132255554', '0.2983071506023407', '-0.016535193', '0.3137657344341278', '0.2868458330631256', '-0.022923922', '0.3265419006347656', '0.2862139344215393', '-0.025821635', '0.26622167229652405', '0.33503055572509766', '-0.009285464', '0.30033841729164124', '0.32802969217300415', '-0.028072745', '0.2937704920768738', '0.3816888928413391', '-0.02813484', '0.28309571743011475', '0.39803066849708557', '-0.022410495', '0.2517828047275543', '0.33678191900253296', '-0.01746257', '0.28199702501296997', '0.35990774631500244', '-0.037090946', '0.27254512906074524', '0.4049283266067505', '-0.031218749

In [4]:
def pad_one_hand(df):
    # last column is label
    X = df.iloc[:, :-1].values.astype(np.float32)   # all but last
    y = df.iloc[:, -1].values                       # last column
    zeros = np.zeros((X.shape[0], 63), dtype=np.float32)  # pad missing hand
    X_pad = np.hstack([zeros, X])  # left=0, right=hand
    return X_pad, y

def load_two_hand(df):
    # last column is label
    X = df.iloc[:, :-1].values.astype(np.float32)   # all but last
    y = df.iloc[:, -1].values
    return X, y

X1, y1 = pad_one_hand(one)
X2, y2 = load_two_hand(two)

X = np.vstack([X1, X2])
y = np.concatenate([y1, y2])

print("Combined dataset:", X.shape, y.shape)
print("Example labels:", y[:10])

Combined dataset: (30515, 126) (30515,)
Example labels: ['C - c' 'C - c' 'C - c' 'C - c' 'C - c' 'C - c' 'C - c' 'C - c' 'C - c'
 'C - c']


In [5]:
le = LabelEncoder()
y_enc = le.fit_transform(y)

Xtr, Xte, ytr, yte = train_test_split(
    X, y_enc, test_size=0.2, stratify=y_enc, random_state=42
)

print("Train size:", Xtr.shape, "Test size:", Xte.shape)

Train size: (24412, 126) Test size: (6103, 126)


In [6]:
train_ds = TensorDataset(torch.tensor(Xtr, dtype=torch.float32), torch.tensor(ytr))
test_ds  = TensorDataset(torch.tensor(Xte, dtype=torch.float32), torch.tensor(yte))

train_dl = DataLoader(train_ds, batch_size=64, shuffle=True)
test_dl  = DataLoader(test_ds, batch_size=64)

In [7]:
class MLP(nn.Module):
    def __init__(self, in_dim=126, out_dim=len(le.classes_)):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, 256), nn.ReLU(), nn.Dropout(0.3),
            nn.Linear(256, 128), nn.ReLU(),
            nn.Linear(128, out_dim)
        )
    def forward(self, x): return self.net(x)

device = "cuda" if torch.cuda.is_available() else "cpu"
model = MLP().to(device)
opt = optim.Adam(model.parameters(), lr=1e-3)
loss_fn = nn.CrossEntropyLoss()

In [8]:
for epoch in range(30):
    model.train()
    total_loss = 0
    for xb, yb in train_dl:
        xb, yb = xb.to(device), yb.to(device).long()  
        opt.zero_grad()
        loss = loss_fn(model(xb), yb)
        loss.backward()
        opt.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss {total_loss/len(train_dl):.4f}")

Epoch 1, Loss 2.8398
Epoch 2, Loss 2.0258
Epoch 3, Loss 1.3009
Epoch 4, Loss 0.9417
Epoch 5, Loss 0.7467
Epoch 6, Loss 0.6250
Epoch 7, Loss 0.5425
Epoch 8, Loss 0.4869
Epoch 9, Loss 0.4422
Epoch 10, Loss 0.4111
Epoch 11, Loss 0.3792
Epoch 12, Loss 0.3644
Epoch 13, Loss 0.3358
Epoch 14, Loss 0.3156
Epoch 15, Loss 0.2992
Epoch 16, Loss 0.2873
Epoch 17, Loss 0.2591
Epoch 18, Loss 0.2541
Epoch 19, Loss 0.2425
Epoch 20, Loss 0.2312
Epoch 21, Loss 0.2293
Epoch 22, Loss 0.2215
Epoch 23, Loss 0.2106
Epoch 24, Loss 0.2000
Epoch 25, Loss 0.2048
Epoch 26, Loss 0.1921
Epoch 27, Loss 0.1935
Epoch 28, Loss 0.1842
Epoch 29, Loss 0.1715
Epoch 30, Loss 0.1737


In [9]:
model.eval()
correct, total = 0, 0
with torch.no_grad():
    for xb, yb in test_dl:
        xb, yb = xb.to(device), yb.to(device)
        pred = model(xb).argmax(1)
        correct += (pred == yb).sum().item()
        total += yb.size(0)

print("Test Accuracy:", correct/total)

Test Accuracy: 0.9624774700966737


In [10]:
# torch.save({
#     "model_state_dict": model.state_dict(),
#     "classes": le.classes_.tolist()
# }, "bsl_model_126.pth")

torch.save({
    "model_state_dict": model.state_dict(),
    "classes": le.classes_.tolist()
}, "bsl_model_126.pth", _use_new_zipfile_serialization=True)


print("Model saved as bsl_model_126.pth")

Model saved as bsl_model_126.pth


In [11]:
import cv2, mediapipe as mp, pyttsx3, collections, time
import numpy as np
import torch, torch.nn as nn

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

class MLP(nn.Module):
    def __init__(self, in_dim=126, out_dim=10):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, 256), nn.ReLU(), nn.Dropout(0.3),
            nn.Linear(256, 128), nn.ReLU(),
            nn.Linear(128, out_dim)
        )
    def forward(self, x): 
        return self.net(x)

ckpt = torch.load("bsl_model_126.pth", map_location="cpu")
classes = ckpt["classes"]

infer_model = MLP(in_dim=126, out_dim=len(classes)).to(device)
infer_model.load_state_dict(ckpt["model_state_dict"])
infer_model.eval()

mp_hands = mp.solutions.hands
engine = pyttsx3.init()

def hands_to_vec(res):
    if not res.multi_hand_landmarks:
        return None
    left63, right63 = None, None
    for i, hand in enumerate(res.multi_hand_landmarks):
        flat = []
        for lm in hand.landmark:
            flat += [lm.x, lm.y, lm.z]
        handedness = res.multi_handedness[i].classification[0].label
        if handedness.lower().startswith("left"):
            left63 = flat
        else:
            right63 = flat
    def nz(arr): 
        return np.array(arr, dtype=np.float32) if arr is not None else np.zeros(63, dtype=np.float32)
    return np.concatenate([nz(left63), nz(right63)])

def clean_label(label):
    return label.split()[0].split('-')[0].strip().upper()

use_camera = False
video_path = r"C:\Users\okeiy\Downloads\Gesture_Control\1.mp4"

if use_camera:
    cap = cv2.VideoCapture(0)
else:
    cap = cv2.VideoCapture(video_path)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(
        "output_video.mp4", fourcc, cap.get(cv2.CAP_PROP_FPS),
        (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    )

buf = collections.deque(maxlen=8)

last_added_time = 0
cooldown = 0.5    # seconds before accepting next letter
display_time = 5.0  # how long to show a letter before disappearing

current_text = ""
text_timestamp = 0

with mp_hands.Hands(max_num_hands=2, min_detection_confidence=0.7, min_tracking_confidence=0.7) as hands:
    while True:
        ret, frame = cap.read()
        if not ret: break
        if use_camera: frame = cv2.flip(frame, 1)

        rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        res = hands.process(rgb)

        pred_txt = "-"
        vec = hands_to_vec(res)
        if vec is not None:
            x = torch.tensor(vec, dtype=torch.float32).unsqueeze(0).to(device)
            with torch.no_grad():
                probs = torch.softmax(infer_model(x), dim=1).cpu().numpy()[0]
                idx = np.argmax(probs)
                conf = probs[idx]
                pred_class = clean_label(classes[idx])
                pred_txt = f"{pred_class} ({conf:.2f})"

                buf.append(pred_class)

                if len(buf) == buf.maxlen and len(set(buf)) == 1:
                    now = time.time()
                    if now - last_added_time > cooldown:
                        current_text = buf[0]       # only keep current stable letter
                        text_timestamp = now        # reset timer
                        last_added_time = now
                    buf.clear()

        # Clear text after display_time seconds
        if time.time() - text_timestamp > display_time:
            current_text = ""

        # overlay
        cv2.putText(frame, f"Pred: {pred_txt}", (10,30), cv2.FONT_HERSHEY_SIMPLEX, 0.8,(0,255,0),2)
        cv2.putText(frame, f"Text: {current_text}", (10,70), cv2.FONT_HERSHEY_SIMPLEX, 0.8,(255,255,255),2)

        cv2.imshow("BSL Sign → Text", frame)
        if not use_camera: out.write(frame)

        k = cv2.waitKey(1) & 0xFF
        if k == ord('q'): break
        if k == ord('t') and current_text != "":
            engine.say(current_text); engine.runAndWait()

cap.release()
if not use_camera: out.release()
cv2.destroyAllWindows()


Using device: cuda


  ckpt = torch.load("bsl_model_126.pth", map_location="cpu")
