In [4]:
import os
import pandas as pd

def create_manifest(data_dir, split_name, exts=(".mp4", ".avi", ".mov")):
    data = []
    for label in os.listdir(data_dir):
        label_dir = os.path.join(data_dir, label)
        if not os.path.isdir(label_dir):
            continue
        for fname in os.listdir(label_dir):
            if fname.lower().endswith(exts):
                fpath = os.path.join(label_dir, fname)
                data.append({"video_path": fpath, "label": label})
    df = pd.DataFrame(data)
    df.to_csv(f"{split_name}_manifest.csv", index=False)
    print(f"Saved {split_name}_manifest.csv with {len(df)} samples")
    return df

# مثال:
train_manifest = create_manifest("archive/train", "train")
val_manifest   = create_manifest("archive/val", "val")

print(train_manifest.head())


Saved train_manifest.csv with 479 samples
Saved val_manifest.csv with 72 samples
                                          video_path          label
0  archive/train\Alhamdulillah\Alhamdulillah 1 - ...  Alhamdulillah
1  archive/train\Alhamdulillah\Alhamdulillah 1 - ...  Alhamdulillah
2  archive/train\Alhamdulillah\Alhamdulillah 1 - ...  Alhamdulillah
3    archive/train\Alhamdulillah\Alhamdulillah 1.mp4  Alhamdulillah
4  archive/train\Alhamdulillah\Alhamdulillah 2 - ...  Alhamdulillah


In [21]:
import os
import cv2
import torch
import numpy as np
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

# ============= CONFIG =============
TRAIN_DIR = "archive/train"
VAL_DIR   = "archive/val"
BATCH_SIZE = 4
EPOCHS = 5
LR = 1e-3
FRAME_COUNT = 16   # عدد الفريمات اللي بناخدها من كل فيديو
IMG_SIZE = 128
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# ============= DATASET CLASS =============
class SignLanguageDataset(Dataset):
    def __init__(self, root_dir, frame_count=16, img_size=128):
        self.samples = []
        self.frame_count = frame_count
        self.img_size = img_size

        for label in os.listdir(root_dir):
            label_dir = os.path.join(root_dir, label)
            if not os.path.isdir(label_dir):
                continue
            for fname in os.listdir(label_dir):
                if fname.lower().endswith((".mp4", ".avi", ".mov")):
                    self.samples.append({
                        "video_path": os.path.join(label_dir, fname),
                        "label": label
                    })

        # mapping labels -> indices
        labels = sorted({s["label"] for s in self.samples})
        self.label2idx = {label: i for i, label in enumerate(labels)}

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        video_path = self.samples[idx]["video_path"]
        label = self.samples[idx]["label"]
        frames = self._load_video_frames(video_path)
        label_idx = self.label2idx[label]
        return torch.tensor(frames, dtype=torch.float32), torch.tensor(label_idx)

    def _load_video_frames(self, video_path):
        cap = cv2.VideoCapture(video_path)
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        if total_frames <= 0:
            return np.zeros((self.frame_count, 3, self.img_size, self.img_size))

        # ناخد فريمات متوزعة بالتساوي
        frame_indices = np.linspace(0, total_frames - 1, self.frame_count).astype(int)
        frames = []
        for i in frame_indices:
            cap.set(cv2.CAP_PROP_POS_FRAMES, i)
            ret, frame = cap.read()
            if not ret:
                break
            frame = cv2.resize(frame, (self.img_size, self.img_size))
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame = frame / 255.0
            frames.append(frame)
        cap.release()

        # لو الفيديو قصير → نكرر آخر فريم
        while len(frames) < self.frame_count:
            frames.append(frames[-1])

        return np.array(frames).transpose(0, 3, 1, 2)  # [T, C, H, W]

# ============= MODEL =============
class SignLanguageModel(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 32, 3, stride=2, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, stride=2, padding=1),
            nn.ReLU(),
            nn.AdaptiveAvgPool2d((4, 4))
        )
        self.lstm = nn.LSTM(64 * 4 * 4, 256, batch_first=True)
        self.fc = nn.Linear(256, num_classes)

    def forward(self, x):
        # x: [B, T, C, H, W]
        B, T, C, H, W = x.shape
        x = x.view(B * T, C, H, W)
        feats = self.cnn(x)                # [B*T, 64, 4, 4]
        feats = feats.reshape(B, T, -1)    # [B, T, 1024]
        _, (h, _) = self.lstm(feats)       # h = [1, B, 256]
        out = self.fc(h[-1])               # [B, num_classes]
        return out

# ============= TRAINING LOOP =============
def train_model():
    train_dataset = SignLanguageDataset(TRAIN_DIR, FRAME_COUNT, IMG_SIZE)
    val_dataset   = SignLanguageDataset(VAL_DIR, FRAME_COUNT, IMG_SIZE)

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader   = DataLoader(val_dataset, batch_size=BATCH_SIZE)

    model = SignLanguageModel(num_classes=len(train_dataset.label2idx)).to(DEVICE)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=LR)

    for epoch in range(EPOCHS):
        model.train()
        total_loss = 0
        for videos, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}"):
            videos, labels = videos.to(DEVICE), labels.to(DEVICE)

            optimizer.zero_grad()
            outputs = model(videos)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        print(f"Epoch [{epoch+1}/{EPOCHS}] Loss: {total_loss/len(train_loader):.4f}")

    # تقييم
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for videos, labels in val_loader:
            videos, labels = videos.to(DEVICE), labels.to(DEVICE)
            outputs = model(videos)
            preds = outputs.argmax(dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    #print(f"Validation Accuracy: {100 * correct/total:.2f}%")

    # لازم يرجع الموديل والـ datasets
    return model, train_dataset, val_dataset

if __name__ == "__main__":
    train_model()


Epoch 1: 100%|███████████████████████████████████████████████████████████████████████| 120/120 [11:16<00:00,  5.63s/it]


Epoch [1/5] Loss: 2.4851


Epoch 2: 100%|███████████████████████████████████████████████████████████████████████| 120/120 [11:14<00:00,  5.62s/it]


Epoch [2/5] Loss: 2.4054


Epoch 3: 100%|███████████████████████████████████████████████████████████████████████| 120/120 [12:43<00:00,  6.36s/it]


Epoch [3/5] Loss: 2.3194


Epoch 4: 100%|███████████████████████████████████████████████████████████████████████| 120/120 [13:28<00:00,  6.74s/it]


Epoch [4/5] Loss: 2.1500


Epoch 5: 100%|███████████████████████████████████████████████████████████████████████| 120/120 [12:38<00:00,  6.32s/it]


Epoch [5/5] Loss: 1.9347


In [24]:
# تدريب مرة واحدة فقط
model, train_dataset, val_dataset = train_model()

# دلوقتي تقدر تحفظه
torch.save({
    "model_state": model.state_dict(),
    "label2idx": train_dataset.label2idx
}, "sign_model.pth")

print("✅ Model saved as sign_model.pth")


Epoch 1: 100%|███████████████████████████████████████████████████████████████████████| 120/120 [13:01<00:00,  6.51s/it]


Epoch [1/5] Loss: 2.4919


Epoch 2: 100%|███████████████████████████████████████████████████████████████████████| 120/120 [12:35<00:00,  6.30s/it]


Epoch [2/5] Loss: 2.4078


Epoch 3: 100%|███████████████████████████████████████████████████████████████████████| 120/120 [12:48<00:00,  6.40s/it]


Epoch [3/5] Loss: 2.2917


Epoch 4: 100%|███████████████████████████████████████████████████████████████████████| 120/120 [12:40<00:00,  6.33s/it]


Epoch [4/5] Loss: 2.1741


Epoch 5: 100%|███████████████████████████████████████████████████████████████████████| 120/120 [11:32<00:00,  5.77s/it]


Epoch [5/5] Loss: 2.0750
✅ Model saved as sign_model.pth


In [39]:
import os
import cv2
from IPython.display import Video, display

# مسار الداتا بتاعتك (هنا بناخد من train بس، ممكن تعملي نسخة من val كمان)
DATA_DIR = "archive/train"

# 1. نبني قاموس: النص → قائمة فيديوهات
def build_text2video_map(data_dir):
    text2videos = {}
    for label in os.listdir(data_dir):
        label_dir = os.path.join(data_dir, label)
        if not os.path.isdir(label_dir):
            continue
        videos = [os.path.join(label_dir, f) for f in os.listdir(label_dir) 
                  if f.lower().endswith((".mp4", ".avi", ".mov"))]
        if videos:
            text2videos[label.lower()] = videos
    return text2videos

text2videos = build_text2video_map(DATA_DIR)

# 2. دالة تعرض فيديو لأي نص مدخل

# نسخة جديدة مناسبة للـ Notebook
def show_sign_video(text):
    text = text.lower()
    if text not in text2videos:
        print(f"❌ No sign video found for: {text}")
        return
    video_path = text2videos[text][0]
    print(f"✅ Showing sign video for: {text} -> {video_path}")
    display(Video(video_path, embed=True))  # هيعرض الفيديو جوه الخلية

# مثال للتشغيل
user_input = "Good bye"
show_sign_video(user_input)




✅ Showing sign video for: good bye -> archive/train\Good bye\Copy of good bye 1 (1).mp4


In [44]:
import torch
import cv2
import numpy as np

# ===== 1) تحميل الموديل المحفوظ =====
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
FRAME_COUNT = 16   # لازم يكون نفس اللي استخدمتيه في التدريب
IMG_SIZE = 128     # برضه نفس اللي في التدريب

# تحميل checkpoint
checkpoint = torch.load("sign_model.pth", map_location=DEVICE)

# إعادة بناء الموديل
model = SignLanguageModel(num_classes=len(checkpoint["label2idx"])).to(DEVICE)
model.load_state_dict(checkpoint["model_state"])
model.eval()

# تحويل من index → label
idx2label = {v: k for k, v in checkpoint["label2idx"].items()}

# ===== 2) دالة لتحميل فيديو وتحويله لفريمات =====
def load_video_frames(video_path, frame_count=FRAME_COUNT, img_size=IMG_SIZE):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while len(frames) < frame_count:
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, (img_size, img_size))
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frames.append(frame)
    cap.release()
    
    # لو عدد الفريمات أقل → نكمل بزيرو
    while len(frames) < frame_count:
        frames.append(np.zeros((img_size, img_size, 3), dtype=np.uint8))
    
    frames = np.stack(frames, axis=0)          # [T, H, W, C]
    frames = frames.transpose(0, 3, 1, 2) / 255.0  # [T, C, H, W]
    return frames

# ===== 3) دالة التنبؤ =====
def predict(video_path):
    frames = load_video_frames(video_path)
    frames = torch.tensor(frames, dtype=torch.float32).unsqueeze(0).to(DEVICE)  # [1, T, C, H, W]
    
    with torch.no_grad():
        output = model(frames)
        pred_idx = output.argmax(dim=1).item()
    return idx2label[pred_idx]

# ===== 4) تجربة بفيديو من الفال =====
video_path = r"archive/val/Good evening/Copy of good evening 1 (1).mp4"  # حطي هنا أي فيديو من val
print("✅ Prediction:", predict(video_path))


✅ Prediction: Good evening


In [42]:
# نسخة خاصة للـ inference
class InferenceDataset(SignLanguageDataset):
    def __init__(self, frame_count, img_size):
        self.frame_count = frame_count
        self.img_size = img_size

# دالة التنبؤ
def predict(video_path):
    dataset = InferenceDataset(FRAME_COUNT, IMG_SIZE)
    frames = dataset._load_video_frames(video_path)   # [T, C, H, W]
    frames = torch.tensor(frames, dtype=torch.float32).unsqueeze(0).to(DEVICE)  # [1, T, C, H, W]

    with torch.no_grad():
        output = model(frames)
        pred = output.argmax(dim=1).item()
    return idx2label[pred]

# مثال
print("Prediction:", predict("test_video.mp4"))


Prediction: Thanks
