In [None]:
import os
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from PIL import Image, UnidentifiedImageError

# Paths
train_dir = 'ASL_dataset/asl_alphabet_train/asl_alphabet_train'

# Image properties
img_width, img_height = 64, 64
batch_size = 32

# Data generator with validation split
datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2
)



def clean_dataset(folder):
    for root, _, files in os.walk(folder):
        for file in files:
            file_path = os.path.join(root, file)
            try:
                with Image.open(file_path) as img:
                    img.verify()  # Verify image integrity
            except (UnidentifiedImageError, OSError):
                print(f"Deleting invalid image: {file_path}")
                os.remove(file_path)

# Use it on your training folder
clean_dataset('ASL_dataset/asl_alphabet_train/asl_alphabet_train')


# Training data (80%)
train_data = datagen.flow_from_directory(
    train_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='categorical',
    subset='training',
    shuffle=True
)

# Validation data (20%)
val_data = datagen.flow_from_directory(
    train_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation',
    shuffle=True
)

# Model architecture
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(img_width, img_height, 3)),
    MaxPooling2D(pool_size=(2, 2)),

    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),

    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(train_data.num_classes, activation='softmax')
])

# Compile model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Summary
model.summary()

# Train model
model.fit(
    train_data,
    validation_data=val_data,
    epochs=10
)

# Save model
model.save("asl_cnn_model2.keras", save_format="keras_v3")



In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# Load CSV file
df = pd.read_csv("fer2013.csv")

# Emotion labels
emotion_labels = {
    0: 'Angry',
    1: 'Disgust',
    2: 'Fear',
    3: 'Happy',
    4: 'Sad',
    5: 'Surprise',
    6: 'Neutral'
}

# Preprocessing
def preprocess_data(df):
    pixels = df['pixels'].tolist()
    faces = []
    for pixel_sequence in pixels:
        face = [int(pixel) for pixel in pixel_sequence.split()]
        face = np.asarray(face).reshape(48, 48)
        faces.append(face)
    faces = np.asarray(faces)
    faces = np.expand_dims(faces, -1)
    faces = faces / 255.0  # normalize
    emotions = to_categorical(df['emotion'], num_classes=7)
    return faces, emotions

# Split data
train_df = df[df['Usage'] == 'Training']
test_df = df[df['Usage'] == 'PublicTest']  # or 'PrivateTest'

x_train, y_train = preprocess_data(train_df)
x_test, y_test = preprocess_data(test_df)

# Build CNN model
model = Sequential([
    Conv2D(64, (3, 3), activation='relu', input_shape=(48, 48, 1)),
    MaxPooling2D(2, 2),
    
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    
    Conv2D(256, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(7, activation='softmax')  # 7 emotion classes
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

# Train model
history = model.fit(
    x_train, y_train,
    validation_data=(x_test, y_test),
    epochs=10,
    batch_size=64
)

# Save model
model.save("fer2013_emotion_model2.keras", save_format="keras_v3")
print("Model saved as fer2013_emotion_model.keras")

# Plot accuracy
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
import cv2
import numpy as np
import tensorflow as tf
import pyttsx3
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import img_to_array
import time

# Load models
asl_model = load_model("asl_cnn_model.keras")
emotion_model = load_model("fer2013_emotion_model.keras")

# Load label maps
asl_labels = sorted(list("ABCDEFGHIJKLMNOPQRSTUVWXYZ"))  # Assuming 26 classes
emotion_labels = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']

# Initialize TTS engine
engine = pyttsx3.init()

# Set up webcam
cap = cv2.VideoCapture(0)

sentence = ""
frame_count = 0
predicted_char = ""

def get_emotion(face_img_gray):
    resized = cv2.resize(face_img_gray, (48, 48))
    resized = resized.reshape(1, 48, 48, 1) / 255.0
    prediction = emotion_model.predict(resized, verbose=0)
    return emotion_labels[np.argmax(prediction)]

def get_sign_language_char(hand_img_color):
    resized = cv2.resize(hand_img_color, (64, 64))
    resized = resized.reshape(1, 64, 64, 3) / 255.0
    prediction = asl_model.predict(resized, verbose=0)
    return asl_labels[np.argmax(prediction)]

def speak_text(text, emotion):
    if emotion == "Sad":
        engine.setProperty("rate", 130)
        engine.setProperty("volume", 0.6)
    elif emotion == "Happy":
        engine.setProperty("rate", 180)
        engine.setProperty("volume", 1.0)
    elif emotion == "Angry":
        engine.setProperty("rate", 160)
        engine.setProperty("volume", 1.0)
    else:
        engine.setProperty("rate", 150)
        engine.setProperty("volume", 0.9)

    engine.say(text)
    engine.runAndWait()

print("Press 's' to save predicted character to sentence")
print("Press 't' to speak the sentence")
print("Press 'q' to quit")

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Flip for natural interaction
    frame = cv2.flip(frame, 1)

    # Define regions
    hand_region = frame[100:300, 400:600]
    face_region = frame[50:250, 50:250]
    face_gray = cv2.cvtColor(face_region, cv2.COLOR_BGR2GRAY)

    # Predict sign every 30 frames
    if frame_count % 30 == 0:
        predicted_char = get_sign_language_char(hand_region)

    frame_count += 1

    # Display hand and face regions
    cv2.rectangle(frame, (400, 100), (600, 300), (255, 0, 0), 2)
    cv2.rectangle(frame, (50, 50), (250, 250), (0, 255, 0), 2)

    # Display current predictions
    cv2.putText(frame, f"Predicted Sign: {predicted_char}", (20, 350), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
    cv2.putText(frame, f"Sentence: {sentence}", (20, 400), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (50, 255, 50), 2)

    cv2.imshow("Sign Language to Speech", frame)

    key = cv2.waitKey(1) & 0xFF
    if key == ord('s'):
        sentence += predicted_char
    elif key == ord('t'):
        emotion = get_emotion(face_gray)
        print(f"Detected emotion: {emotion}")
        speak_text(sentence, emotion)
        sentence = ""
    elif key == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


In [None]:
!pip uninstall protobuf


In [None]:
print("Hello")

In [None]:
!pip install tensorflow==2.10.1

In [None]:
import tensorflow as tf
import numpy as np

# Create dummy data
x = np.array([[1.0], [2.0], [3.0], [4.0]])
y = np.array([[2.0], [4.0], [6.0], [8.0]])

# Build simple model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(units=1, input_shape=[1])
])

# Compile and train
model.compile(optimizer='sgd', loss='mean_squared_error')
model.fit(x, y, epochs=5)

# Predict
print(model.predict([[5.0]]))


In [None]:
!pip install opencv-python


In [None]:
import cv2
import numpy as np
import pyttsx3
from tensorflow.keras.models import load_model

# Load models
asl_model = load_model("asl_cnn_model.keras")
emotion_model = load_model("fer2013_emotion_model.keras")

# Correct label mapping based on dataset
asl_labels = ['A', 'B', 'C', 'D', 'del', 'E', 'F', 'G', 'H', 'I',
              'J', 'K', 'L', 'M', 'N', 'nothing', 'O', 'P', 'Q',
              'R', 'S', 'space', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']

emotion_labels = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']

# Initialize text-to-speech engine
engine = pyttsx3.init()

# Open webcam
cap = cv2.VideoCapture(0)

# Variables
sentence = ""
stable_char = ""
live_prediction = ""
frame_count = 0
last_pred = ""
same_count = 0
stable_threshold = 15

def get_emotion(face_img_gray):
    resized = cv2.resize(face_img_gray, (48, 48))
    resized = resized.reshape(1, 48, 48, 1) / 255.0
    prediction = emotion_model.predict(resized, verbose=0)
    return emotion_labels[np.argmax(prediction)]

def get_sign_language_char(hand_img_color):
    resized = cv2.resize(hand_img_color, (64, 64))
    resized = resized.reshape(1, 64, 64, 3) / 255.0
    prediction = asl_model.predict(resized, verbose=0)
    confidence = np.max(prediction)
    label_index = np.argmax(prediction)

    if label_index >= len(asl_labels):
        return "", 0.0
    return asl_labels[label_index], confidence

def speak_text(text, emotion):
    if emotion == "Sad":
        engine.setProperty("rate", 130)
        engine.setProperty("volume", 0.6)
    elif emotion == "Happy":
        engine.setProperty("rate", 180)
        engine.setProperty("volume", 1.0)
    elif emotion == "Angry":
        engine.setProperty("rate", 160)
        engine.setProperty("volume", 1.0)
    else:
        engine.setProperty("rate", 150)
        engine.setProperty("volume", 0.9)

    engine.say(text)
    engine.runAndWait()

print("Press 's' to save predicted character to sentence")
print("Press 't' to speak the sentence")
print("Press 'q' to quit")

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.flip(frame, 1)
    hand_region = frame[100:300, 400:600]
    face_region = frame[50:250, 50:250]
    face_gray = cv2.cvtColor(face_region, cv2.COLOR_BGR2GRAY)

    # Predict every 10 frames
    if frame_count % 10 == 0:
        label, conf = get_sign_language_char(hand_region)
        live_prediction = label  # Always show current prediction live

        if label == last_pred:
            same_count += 1
        else:
            same_count = 0
        last_pred = label

        if same_count >= stable_threshold:
            stable_char = label
            same_count = 0

        print(f"[Live] {label} (Conf: {conf:.2f})")

    frame_count += 1

    # Draw ROIs
    cv2.rectangle(frame, (400, 100), (600, 300), (255, 0, 0), 2)
    cv2.rectangle(frame, (50, 50), (250, 250), (0, 255, 0), 2)

    # Display live prediction and sentence
    cv2.putText(frame, f"Live Sign: {live_prediction}", (20, 350), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
    cv2.putText(frame, f"Sentence: {sentence}", (20, 400), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (50, 255, 50), 2)

    cv2.imshow("Sign Language to Speech", frame)

    key = cv2.waitKey(1) & 0xFF
    if key == ord('s') and stable_char != "":
        if stable_char == "space":
            sentence += " "
        elif stable_char == "del" and len(sentence) > 0:
            sentence = sentence[:-1]
        elif stable_char not in ["nothing", "space", "del"]:
            sentence += stable_char
        print(f"Updated sentence: {sentence}")
    elif key == ord('t'):
        emotion = get_emotion(face_gray)
        print(f"Detected emotion: {emotion}")
        speak_text(sentence, emotion)
        sentence = ""
    elif key == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


In [None]:
asl_labels = sorted(list("ABCDEFGHIJKLMNOPQRSTUVWXYZ"))
print(asl_labels)

In [1]:
import cv2
import numpy as np
import pyttsx3
from tensorflow.keras.models import load_model

# Load models
asl_model = load_model("asl_cnn_model.keras")
emotion_model = load_model("fer2013_emotion_model.keras")

asl_labels = ['A', 'B', 'C', 'D', 'del', 'E', 'F', 'G', 'H', 'I',
              'J', 'K', 'L', 'M', 'N', 'nothing', 'O', 'P', 'Q',
              'R', 'S', 'space', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']

emotion_labels = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']

engine = pyttsx3.init()

cap = cv2.VideoCapture(0)

# Variables
sentence = ""
live_prediction = ""
stable_char = ""
last_pred = ""
same_count = 0
frame_count = 0
cooldown = 0

stable_threshold = 20  # How many same predictions to stabilize
predict_interval = 15  # Predict every N frames
cooldown_frames = 20   # Cooldown after saving a letter

def get_emotion(face_img_gray):
    resized = cv2.resize(face_img_gray, (48, 48))
    resized = resized.reshape(1, 48, 48, 1) / 255.0
    prediction = emotion_model.predict(resized, verbose=0)
    return emotion_labels[np.argmax(prediction)]

def get_sign_language_char(hand_img_color):
    resized = cv2.resize(hand_img_color, (64, 64))
    resized = resized.reshape(1, 64, 64, 3) / 255.0
    prediction = asl_model.predict(resized, verbose=0)
    confidence = np.max(prediction)
    label_index = np.argmax(prediction)
    if label_index >= len(asl_labels):
        return "", 0.0
    return asl_labels[label_index], confidence

def speak_text(text, emotion):
    if emotion == "Sad":
        engine.setProperty("rate", 130)
        engine.setProperty("volume", 0.6)
    elif emotion == "Happy":
        engine.setProperty("rate", 180)
        engine.setProperty("volume", 1.0)
    elif emotion == "Angry":
        engine.setProperty("rate", 160)
        engine.setProperty("volume", 1.0)
    else:
        engine.setProperty("rate", 150)
        engine.setProperty("volume", 0.9)
    engine.say(text)
    engine.runAndWait()

print("Press 's' to save predicted character to sentence")
print("Press 't' to speak the sentence")
print("Press 'q' to quit")

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.flip(frame, 1)
    hand_region = frame[100:300, 400:600]
    face_region = frame[50:250, 50:250]
    face_gray = cv2.cvtColor(face_region, cv2.COLOR_BGR2GRAY)

    if frame_count % predict_interval == 0 and cooldown == 0:
        label, conf = get_sign_language_char(hand_region)
        live_prediction = label

        if label == last_pred:
            same_count += 1
        else:
            same_count = 1
        last_pred = label

        if same_count >= stable_threshold:
            stable_char = label
            same_count = 0
            print(f"[Stable Prediction]: {stable_char}")

    frame_count += 1
    if cooldown > 0:
        cooldown -= 1

    # Draw ROIs
    cv2.rectangle(frame, (400, 100), (600, 300), (255, 0, 0), 2)
    cv2.rectangle(frame, (50, 50), (250, 250), (0, 255, 0), 2)

    # Display predictions
    cv2.putText(frame, f"Live Sign: {live_prediction}", (20, 350), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
    cv2.putText(frame, f"Sentence: {sentence}", (20, 400), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (50, 255, 50), 2)

    cv2.imshow("Sign Language to Speech", frame)

    key = cv2.waitKey(1) & 0xFF
    if key == ord('s') and stable_char != "":
        if stable_char == "space":
            sentence += " "qq
        elif stable_char == "del" and len(sentence) > 0:
            sentence = sentence[:-1]
        elif stable_char not in ["nothing", "space", "del"]:
            sentence += stable_char
        cooldown = cooldown_frames  # Start cooldown
        print(f"[Added]: {stable_char} -> {sentence}")

    elif key == ord('t'):
        emotion = get_emotion(face_gray)
        print(f"[Emotion]: {emotion}")
        speak_text(sentence, emotion)
        sentence = ""

    elif key == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()



SyntaxError: invalid syntax (280513042.py, line 111)