In [2]:
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt


print("TensorFlow Version:", tf.__version__)

TensorFlow Version: 2.20.0


In [3]:
DATASET_PATH = r"D:/Samvad_Setu_final/datasets/HaGRID dataset/hagrid-sample-30k-384p/hagrid_30k"
IMG_SIZE = 224
BATCH_SIZE = 16 # reduced for CPU / Windows stability
SEED = 42
NUM_CLASSES = 18

In [4]:
BATCH_SIZE = 16
IMG_SIZE = 224

train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    DATASET_PATH,
    validation_split=0.2,
    subset="training",
    seed=42,
    image_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE
)

val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    DATASET_PATH,
    validation_split=0.2,
    subset="validation",
    seed=42,
    image_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE
)


Found 31833 files belonging to 18 classes.
Using 25467 files for training.
Found 31833 files belonging to 18 classes.
Using 6366 files for validation.


In [5]:
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.prefetch(AUTOTUNE)
val_ds = val_ds.prefetch(AUTOTUNE)


In [6]:

AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

In [7]:
data_augmentation = tf.keras.Sequential([
layers.RandomFlip("horizontal"),
layers.RandomRotation(0.15),
layers.RandomZoom(0.15),
layers.RandomContrast(0.2),
layers.RandomBrightness(0.2),
])

In [8]:
base_model = tf.keras.applications.EfficientNetB0(
include_top=False,
input_shape=(IMG_SIZE, IMG_SIZE, 3),
weights="imagenet"
)


In [8]:
base_model = tf.keras.applications.EfficientNetB4(
include_top=False,
input_shape=(IMG_SIZE, IMG_SIZE, 3),
weights="imagenet"
)


base_model.trainable = False

In [9]:
model = models.Sequential([
data_augmentation,
layers.Lambda(tf.keras.applications.efficientnet.preprocess_input),
base_model,
layers.GlobalAveragePooling2D(),
layers.BatchNormalization(),
layers.Dense(512, activation="relu"),
layers.Dropout(0.4),
layers.Dense(NUM_CLASSES, activation="softmax")
])


model.summary()

In [10]:
model.compile(
optimizer=tf.keras.optimizers.AdamW(learning_rate=1e-3, weight_decay=1e-4),
loss=tf.keras.losses.SparseCategoricalCrossentropy(),
metrics=["accuracy"]
)

In [11]:
model.compile(
optimizer=tf.keras.optimizers.AdamW(learning_rate=1e-3, weight_decay=1e-4),
loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1),
metrics=["accuracy"]
)

In [None]:
for images, labels in train_ds.take(1):
    print(images.shape, labels.shape)


In [12]:
inputs = tf.keras.Input(shape=(224, 224, 3))

x = tf.keras.applications.efficientnet.preprocess_input(inputs)
x = base_model(x, training=False)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Dense(256, activation="relu")(x)
x = tf.keras.layers.Dropout(0.4)(x)
outputs = tf.keras.layers.Dense(NUM_CLASSES, activation="softmax")(x)

model = tf.keras.Model(inputs, outputs)


In [None]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-3),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    metrics=["accuracy"]
)

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=10
)


Epoch 1/10


In [None]:
base_model.trainable = True


for layer in base_model.layers[:-30]:
layer.trainable = False


model.compile(
optimizer=tf.keras.optimizers.AdamW(learning_rate=1e-4),
loss=tf.keras.losses.SparseCategoricalCrossentropy(),
metrics=["accuracy"]
)
```python
base_model.trainable = True


for layer in base_model.layers[:-30]:
layer.trainable = False


model.compile(
optimizer=tf.keras.optimizers.AdamW(learning_rate=1e-4),
loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1),
metrics=["accuracy"]
)

In [None]:
model = models.Sequential([
    data_augmentation,
    layers.Lambda(tf.keras.applications.efficientnet.preprocess_input),
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.BatchNormalization(),
    layers.Dense(512, activation="relu"),
    layers.Dropout(0.4),
    layers.Dense(NUM_CLASSES, activation="softmax")
])

model.summary()


In [None]:
model.compile(
    optimizer=tf.keras.optimizers.AdamW(
        learning_rate=1e-3,
        weight_decay=1e-4
    ),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    metrics=["accuracy"]
)


In [None]:
base_model = tf.keras.applications.EfficientNetB0(
    include_top=False,
    input_shape=(IMG_SIZE, IMG_SIZE, 3),
    weights="imagenet"
)


In [None]:
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)


In [None]:
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=10
)

In [None]:
base_model.trainable = True

for layer in base_model.layers[:-30]:
    layer.trainable = False

model.compile(
    optimizer=tf.keras.optimizers.AdamW(learning_rate=1e-4),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    metrics=["accuracy"]
)


In [None]:
history_fine = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=15
)

In [None]:
y_true, y_pred = [], []

for images, labels in val_ds:
    preds = model.predict(images)
    y_true.extend(np.argmax(labels.numpy(), axis=1))
    y_pred.extend(np.argmax(preds, axis=1))

print(classification_report(y_true, y_pred, target_names=class_names))

In [None]:
model.save("hagrid_gesture_efficientnet.h5")

In [None]:
img_path = r"D:/Samvad_Setu_final/datasets/HaGRID dataset/hagrid-sample-30k-384p/hagrid_30k/train_val_rock/0a3aa1a5-b1c8-4936-ab74-999f1545e742.jpg"

img = cv2.imread(img_path)
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img_resized = cv2.resize(img_rgb, (IMG_SIZE, IMG_SIZE))

input_img = np.expand_dims(img_resized, axis=0)
input_img = tf.keras.applications.efficientnet.preprocess_input(input_img)

prediction = model.predict(input_img)
class_id = np.argmax(prediction)

plt.imshow(img_rgb)
plt.title(f"Predicted Gesture: {class_names[class_id]}")
plt.axis("off")

In [None]:
cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    img = cv2.resize(frame, (IMG_SIZE, IMG_SIZE))
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    input_img = np.expand_dims(img_rgb, axis=0)
    input_img = tf.keras.applications.efficientnet.preprocess_input(input_img)

    preds = model.predict(input_img, verbose=0)
    class_id = np.argmax(preds)
    label = class_names[class_id]

    cv2.putText(frame, label, (20, 50),
                cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 255, 0), 3)

    cv2.imshow("HaGRID Gesture Recognition", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()