In [None]:
import numpy as np
import ast
import pandas as pd
import numpy as np
import joblib
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout, GlobalAveragePooling1D, Concatenate, LayerNormalization, MultiHeadAttention, Masking
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow as tf
from collections import Counter
from sklearn.utils import class_weight


![الوصف هنا](image/pipe.png)

In [None]:
train = pd.read_csv('/home/shatha/projects_unix/sign_model/data_labels/.csv')
test = pd.read_csv('/home/shatha/projects_unix/sign_model/data_labels/.csv')

In [None]:
test.tail()

In [None]:
print(train.columns.tolist())


In [None]:
non_feature_cols = ['signerID', 'sign', 'NoFrames', 'SignID', 'Sign-Arabic', 'Sign-English']
feature_cols = [col for col in train.columns if col not in non_feature_cols]


feature_cols = [
    col for col in train.columns
    if col not in non_feature_cols and any(k in col for k in feature_cols) and col.endswith(('_X', '_Y'))
]

feature_cols = sorted(feature_cols)
print(f"عدد الأعمدة المختارة: {len(feature_cols)}")  # لازم يطلع 106


In [None]:
import numpy as np
import ast

def build_input_matrix(df, num_frames, feature_cols):
    num_features = len(feature_cols)
    X = np.zeros((len(df), num_frames, num_features), dtype=np.float32)

    for i, col in enumerate(feature_cols):
        for j in range(len(df)):
            try:
                raw = ast.literal_eval(df[col].iloc[j])
                length = len(raw)
                X[j, :min(length, num_frames), i] = raw[:num_frames]
            except Exception as e:
                print(f" خطأ في العمود {col} والسطر {j}: {e}")
                X[j, :, i] = 0.0

    return X


In [None]:
X_train = build_input_matrix(train, num_frames=30, feature_cols=feature_cols)
X_test = build_input_matrix(test, num_frames=30, feature_cols=feature_cols)

print(" X_train shape:", X_train.shape)
print(" X_test shape:", X_test.shape)


4-ترميز التسميات (Label Encoding)



In [None]:
le = LabelEncoder()
y_train = le.fit_transform(train['Sign-Arabic'])
y_test = le.transform(test['Sign-Arabic'])

In [None]:
def scale_video_sequences(X, scaler=None):
    reshaped = X.reshape(-1, X.shape[-1])  # (num_samples * 30, 108)
    
    valid_rows = ~np.all(reshaped == 0, axis=1)
    valid_data = reshaped[valid_rows]

    if scaler is None:
        scaler = StandardScaler()
        scaler.fit(valid_data)
    
    scaled_data = reshaped.copy()
    scaled_data[valid_rows] = scaler.transform(valid_data)
    
    return scaled_data.reshape(X.shape), scaler


5-تسوية البيانات (Scaling)


In [None]:
X_train, scaler = scale_video_sequences(X_train)
X_test, _ = scale_video_sequences(X_test, scaler)


In [None]:
np.save('X_train.npy', X_train)
joblib.dump(scaler, '/home/shatha/projects_unix/sign_model/hybird_models/hybird_v3_toptransfprm/scaler.joblib')
joblib.dump(le, '/home/shatha/projects_unix/sign_model/hybird_models/hybird_v3_toptransfprm/label_encoder.joblib')

In [None]:
print(" الأبعاد النهائية:")
print(f"X_train: {X_train.shape} (عينات, إطارات, سمات)")
print(f"X_test: {X_test.shape}")
print(f"y_train: {y_train.shape} (تصنيفات)")
print(f"عدد الفئات: {len(le.classes_)}")

import matplotlib.pyplot as plt
plt.bar(*np.unique(y_train, return_counts=True))
plt.title("توزيع الفئات في y_train")
plt.show()

In [None]:
import numpy as np
unique, counts = np.unique(y_train, return_counts=True)
for u, c in zip(unique, counts):
    print(f"{le.inverse_transform([u])[0]}: {c}")


In [None]:

i = 50  # أي رقم عينة
sample = X_test[i]
print("قبل التقييس:", sample.shape, sample.min(), sample.max())
print(np.isnan(sample).any())  # هل فيه NaN؟

scaled = scaler.transform(sample.reshape(-1, 108)).reshape(1, 30, 108)
print("بعد التقييس:", scaled.shape, scaled.min(), scaled.max())
print(np.isnan(scaled).any())  # هل فيه NaN؟


In [None]:


class_counts = Counter(y_train)
sorted_counts = dict(sorted(class_counts.items()))

plt.figure(figsize=(12, 4))
plt.bar(sorted_counts.keys(), sorted_counts.values())
plt.title("توزيع الفئات في y_train")
plt.xlabel("الفئة")
plt.ylabel("عدد العينات")
plt.show()

counts = np.array(list(class_counts.values()))
max_count = counts.max()
min_count = counts.min()
print(f"أكبر فئة: {max_count} | أصغر فئة: {min_count}")
print(f"النسبة بين أكبر وأصغر فئة: {round(max_count / min_count, 2)}")


soft class weight معالجة عدم توازن بين الفئات

In [None]:


raw_class_weights = class_weight.compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_train),
    y=y_train
)

attenuation_factor = 0.3  
soft_weights = raw_class_weights ** attenuation_factor

class_weights = dict(enumerate(soft_weights))

for i, w in class_weights.items():
    print(f"الفئة {i}: الوزن = {round(w, 3)}")


In [None]:


def transformer_encoder(inputs, head_size=96, num_heads=4, ff_dim=4, dropout=0.1):
    x = LayerNormalization(epsilon=1e-6)(inputs)
    x = MultiHeadAttention(
        key_dim=head_size // num_heads,
        num_heads=num_heads,
        dropout=dropout
    )(x, x)
    x = Dropout(dropout)(x)
    res = x + inputs

    x = LayerNormalization(epsilon=1e-6)(res)
    x = Dense(ff_dim, activation="gelu")(x)
    x = Dropout(dropout)(x)
    x = Dense(inputs.shape[-1])(x)
    return x + res

def build_hybrid_model(input_shape, num_classes, mask_value=-10.0):
    inputs = Input(shape=input_shape)
    
    # Masking Layer
    masked = Masking(mask_value=mask_value)(inputs)
    
    # Transformer Branch
    x_trans = transformer_encoder(masked, head_size=96)
    x_trans = transformer_encoder(x_trans, head_size=48)
    x_trans = GlobalAveragePooling1D()(x_trans)
    
    # LSTM Branch
    x_lstm = LSTM(64, return_sequences=True)(masked)
    x_lstm = Dropout(0.3)(x_lstm)
    x_lstm = LSTM(32)(x_lstm)
    
    x = Concatenate()([x_trans, x_lstm])
    x = Dense(128, activation='gelu')(x)
    x = Dropout(0.5)(x)
    outputs = Dense(num_classes, activation='softmax')(x)
    
    return Model(inputs, outputs)

input_shape = (X_train.shape[1], X_train.shape[2])  # (30, 108)
num_classes = len(np.unique(y_train))
model = build_hybrid_model(input_shape, num_classes)
model.summary()

model.compile(
    optimizer=Adam(learning_rate=1e-3),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=20,
    batch_size=64,
    class_weight=class_weights
)

loss, accuracy = model.evaluate(X_test, y_test)
print(f"\n accuracy on test set :accuracy")
print(f"Loss: {loss:.4f}, Accuracy: {accuracy:.4f}")

In [None]:
import matplotlib.pyplot as plt
print(f"\n accuracy on test set :accuracy")
print(f"Loss: {loss:.4f}, Accuracy: {accuracy:.4f}")
# الدقة
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train')
plt.plot(history.history['val_accuracy'], label='Validation')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

# الخسارة
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train')
plt.plot(history.history['val_loss'], label='Validation')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()


In [None]:
model.save("hybrid_model_final_108.keras") 


In [None]:
import joblib
import numpy as np

# تحميل المقيس والمحول
scaler = joblib.load("/home/shatha/projects_unix/sign_model/hybird_models/hybird_v2_toptransfprm/scaler.joblib")
label_encoder = joblib.load("/home/shatha/projects_unix/sign_model/hybird_models/hybird_v2_toptransfprm/label_encoder.joblib")
sample = X_train[9000].reshape(1, 30, 108)
pred = model.predict(sample)
pred_label = label_encoder.inverse_transform([np.argmax(pred)])
true_label = label_encoder.inverse_transform([y_train[9000]])

print(" التوقع:", pred_label[0])
print(" الحقيقة:", true_label[0])


In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Input, LSTM, Dense, Dropout, 
    MultiHeadAttention, LayerNormalization, 
    GlobalAveragePooling1D, Concatenate, Masking
)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau


def transformer_encoder(inputs, head_size=128, num_heads=4, ff_dim=64, dropout=0.2):
    x = LayerNormalization(epsilon=1e-6)(inputs)
    x = MultiHeadAttention(
        key_dim=head_size // num_heads,
        num_heads=num_heads,
        dropout=dropout
    )(x, x)
    x = Dropout(dropout)(x)
    res = x + inputs

    x = LayerNormalization(epsilon=1e-6)(res)
    x = Dense(ff_dim, activation="gelu")(x)
    x = Dropout(dropout)(x)
    x = Dense(inputs.shape[-1])(x)
    return x + res


def build_hybrid_model_v2(input_shape, num_classes, mask_value=-10.0):
    inputs = Input(shape=input_shape)
    masked = Masking(mask_value=mask_value)(inputs)

    # Transformer Branch
    x_trans = transformer_encoder(masked)
    x_trans = transformer_encoder(x_trans, head_size=96)
    x_trans = transformer_encoder(x_trans, head_size=64)
    x_trans = GlobalAveragePooling1D()(x_trans)

    # LSTM Branch
    x_lstm = LSTM(128, return_sequences=True)(masked)
    x_lstm = Dropout(0.4)(x_lstm)
    x_lstm = LSTM(64)(x_lstm)

    # Concatenation & Output
    x = Concatenate()([x_trans, x_lstm])
    x = Dense(128, activation='gelu')(x)
    x = Dropout(0.5)(x)
    outputs = Dense(num_classes, activation='softmax')(x)

    return Model(inputs, outputs)


# Compilation and training section
input_shape = (X_train.shape[1], X_train.shape[2])  # (30, 108)
num_classes = len(np.unique(y_train))
model_v2 = build_hybrid_model_v2(input_shape, num_classes)

model_v2.compile(
    optimizer=Adam(learning_rate=1e-3),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

callbacks = [
    EarlyStopping(patience=5, restore_best_weights=True),
    ReduceLROnPlateau(patience=2, factor=0.5, min_lr=1e-6)
]

history_v2 = model_v2.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=30,
    batch_size=64,
    class_weight=class_weights,
    callbacks=callbacks
)

loss_v2, accuracy_v2 = model_v2.evaluate(X_test, y_test)
print(f"\n accuracy on test set :accuracy")


In [None]:
import matplotlib.pyplot as plt

print(f"\n accuracy on test set : {accuracy_v2}")
print(f"Loss: {loss_v2:.4f}, Accuracy: {accuracy_v2:.4f}")

plt.figure(figsize=(12, 5))

# الدقة
plt.subplot(1, 2, 1)
plt.plot(history_v2.history['accuracy'], label='Train')
plt.plot(history_v2.history['val_accuracy'], label='Validation')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

# الخسارة
plt.subplot(1, 2, 2)
plt.plot(history_v2.history['loss'], label='Train')
plt.plot(history_v2.history['val_loss'], label='Validation')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()


In [None]:
model_v2.save("/home/shatha/projects_unix/sign_model/hybird_models/hybird_v2_toptransfprm/hybrid_model_v100.keras") 


In [None]:
import joblib
import numpy as np
scaler = joblib.load("/home/shatha/projects_unix/sign_model/hybird_models/hybird_v2_toptransfprm/scaler.joblib")
label_encoder = joblib.load("/home/shatha/projects_unix/sign_model/hybird_models/hybird_v2_toptransfprm/label_encoder.joblib")
sample = X_test[2900].reshape(1, 30, 108)
pred = model_v2.predict(sample)
pred_label = label_encoder.inverse_transform([np.argmax(pred)])
true_label = label_encoder.inverse_transform([y_test[2900]])

print(" التوقع:", pred_label[0])
print(" الحقيقة:", true_label[0])


In [None]:

for i in range(10):
    sample = X_test[i].reshape(1, 30, 108)
    pred = model_v2.predict(sample)
    print(f"{i}: التوقع:", label_encoder.inverse_transform([np.argmax(pred)]),
          "| الحقيقة:", label_encoder.inverse_transform([y_test[i]]) )


In [None]:
import joblib
label_encoder = joblib.load("label_encoder.joblib")

print(label_encoder.classes_)
