In [None]:
!pip install mediapipe==0.10.14 opencv-python-headless

In [None]:
import cv2
import numpy as np
import base64
import json

# --- CONFIGURATION: The signs we need ---
SIGNS_TO_GENERATE = [
    ("HELLO", (0, 255, 0)),    # Green Text
    ("THANKS", (0, 255, 255)), # Yellow Text
    ("YES", (255, 0, 0)),      # Blue Text
    ("NO", (0, 0, 255)),       # Red Text
    ("PLEASE", (255, 0, 255)), # Purple Text
    ("LOVE", (203, 192, 255)), # Pink Text
    ("PEACE", (255, 255, 255)),# White Text
    ("YOU", (100, 255, 100)),
    ("OK", (100, 100, 255))
]

def create_flashcard(text, color):
    """Draws a professional looking flashcard image using code"""
    # 1. Create black background (300x300 pixels)
    img = np.zeros((300, 300, 3), dtype=np.uint8)

    # 2. Add a colored border
    cv2.rectangle(img, (10,10), (290,290), color, 4)

    # 3. Add "SIGN LANGUAGE" header
    font = cv2.FONT_HERSHEY_SIMPLEX
    cv2.putText(img, "SIGN FOR:", (75, 100), font, 0.8, (150, 150, 150), 2)

    # 4. Center the main text
    text_size = cv2.getTextSize(text, font, 1.5, 3)[0]
    text_x = (300 - text_size[0]) // 2
    text_y = 170

    # 5. Draw the text
    cv2.putText(img, text, (text_x, text_y), font, 1.5, color, 3)

    # 6. Convert to Browser Format (Base64)
    _, buffer = cv2.imencode('.jpg', img)
    img_str = base64.b64encode(buffer).decode('utf-8')
    return f"data:image/jpeg;base64,{img_str}"

print("üöÄ Generating Synthetic Database...")
final_db = {}

for label, color in SIGNS_TO_GENERATE:
    # Key is lowercase because speech recognition returns lowercase
    key = label.lower()
    final_db[key] = create_flashcard(label, color)
    print(f"   ‚úÖ Created Card: {label}")

# Convert to JSON for the App
json_db = json.dumps(final_db)
print("üéâ Database Ready! (100% Offline & Safe)")

In [None]:
import cv2
import mediapipe as mp
import numpy as np
import math
import json
import os
import base64
import time
from IPython.display import display, Javascript
from google.colab.output import eval_js
from base64 import b64decode

# --- 1. SETUP BRAIN ---
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=1,
    min_detection_confidence=0.7,
    min_tracking_confidence=0.5
)

# --- 2. JAVASCRIPT ENGINE (With Sentence Display) ---
def video_stream(custom_db):
  js = Javascript(f'''
    var video;
    var div = null;
    var stream;
    var captureCanvas;
    var imgElement;
    var labelElement;
    var sentenceElement;
    var speechElement;
    var signContainer;

    var pendingResolve = null;
    var shutdown = false;

    var signLanguageDB = {custom_db};
    var synth = window.speechSynthesis;
    var recognition = new (window.SpeechRecognition || window.webkitSpeechRecognition)();
    recognition.continuous = false;
    recognition.lang = 'en-US';
    recognition.interimResults = false;

    // --- SPEECH TO SIGN IMAGES (Visual Sentence) ---
    recognition.onresult = function(event) {{
        var transcript = event.results[0][0].transcript.toLowerCase();
        speechElement.innerText = "Heard: " + transcript;
        speechElement.style.color = "lime";

        // Clear previous images
        signContainer.innerHTML = "";

        // Split sentence into words and find images
        var words = transcript.split(" ");
        var foundAny = false;

        words.forEach(word => {{
            // Remove punctuation
            var cleanWord = word.replace(/[^a-zA-Z]/g, "");

            // Check if we have an image for this word
            // We verify if the key exists in our DB
            var match = Object.keys(signLanguageDB).find(key => key === cleanWord);

            if (match) {{
                var img = document.createElement("img");
                img.src = signLanguageDB[match];
                img.style.width = "100px";
                img.style.border = "2px solid lime";
                img.style.margin = "5px";
                img.style.borderRadius = "10px";
                signContainer.appendChild(img);
                foundAny = true;
            }}
        }});

        if (!foundAny) {{
            signContainer.innerHTML = "<span style='color:white; background:black; padding:5px;'>No sign images found for this sentence.</span>";
        }}
    }};

    function speakText(text) {{
        if (synth.speaking) return;
        var utterThis = new SpeechSynthesisUtterance(text);
        utterThis.rate = 0.9;
        synth.speak(utterThis);
    }}

    function removeDom() {{
       stream.getVideoTracks().forEach(track => track.stop());
       video.remove();
       div.remove();
       video = null;
       div = null;
       stream = null;
       imgElement = null;
       captureCanvas = null;
       labelElement = null;
    }}

    function onAnimationFrame() {{
      if (!shutdown) {{
        window.requestAnimationFrame(onAnimationFrame);
      }}
      if (pendingResolve) {{
        var result = "";
        if (!shutdown) {{
          captureCanvas.getContext('2d').drawImage(video, 0, 0, 640, 480);
          result = captureCanvas.toDataURL('image/jpeg', 0.8)
        }}
        var lp = pendingResolve;
        pendingResolve = null;
        lp(result);
      }}
    }}

    async function createDom() {{
      if (div !== null) {{
        return stream;
      }}
      div = document.createElement('div');
      div.style.border = '2px solid black';
      div.style.padding = '3px';
      div.style.width = '100%';
      div.style.maxWidth = '600px';
      div.style.position = 'relative';
      document.body.appendChild(div);

      // 1. AI STATUS
      const modelOut = document.createElement('div');
      modelOut.innerHTML = "<span>AI Sees:</span>";
      labelElement = document.createElement('span');
      labelElement.innerText = '...';
      labelElement.style.fontWeight = 'bold';
      labelElement.style.color = 'blue';
      labelElement.style.fontSize = '20px';
      modelOut.appendChild(labelElement);
      div.appendChild(modelOut);

      // 2. SENTENCE BUILDER (New!)
      const sentenceOut = document.createElement('div');
      sentenceOut.style.backgroundColor = "#eee";
      sentenceOut.style.padding = "5px";
      sentenceOut.style.marginTop = "5px";
      sentenceOut.innerHTML = "<span>üìù Sentence: </span>";
      sentenceElement = document.createElement('span');
      sentenceElement.innerText = '';
      sentenceElement.style.fontWeight = 'bold';
      sentenceElement.style.color = 'black';
      sentenceOut.appendChild(sentenceElement);
      div.appendChild(sentenceOut);

      // 3. MIC SECTION
      const speechOut = document.createElement('div');
      speechOut.style.marginTop = '5px';
      speechOut.innerHTML = "<span>Voice:</span>";
      speechElement = document.createElement('span');
      speechElement.innerText = '(Click mic to speak)';
      speechElement.style.fontWeight = 'bold';
      speechElement.style.color = 'gray';
      speechOut.appendChild(speechElement);
      div.appendChild(speechOut);

      const micBtn = document.createElement('button');
      micBtn.innerText = "üé§ Speak Back";
      micBtn.style.margin = "5px";
      micBtn.onclick = () => {{
          speechElement.innerText = "Listening...";
          speechElement.style.color = "orange";
          recognition.start();
      }};
      div.appendChild(micBtn);

      // 4. SIGN IMAGE CONTAINER
      signContainer = document.createElement('div');
      signContainer.style.position = 'absolute';
      signContainer.style.bottom = '10px';
      signContainer.style.right = '10px';
      signContainer.style.zIndex = '100';
      signContainer.style.display = 'flex'; // Allow multiple images
      div.appendChild(signContainer);

      video = document.createElement('video');
      video.style.display = 'block';
      video.width = div.clientWidth - 6;
      video.setAttribute('playsinline', '');
      video.onclick = () => {{ shutdown = true; }};

      stream = await navigator.mediaDevices.getUserMedia({{
          video: {{ facingMode: "environment"}},
          audio: true
      }});
      video.muted = true;

      div.appendChild(video);
      imgElement = document.createElement('img');
      imgElement.style.position = 'absolute';
      imgElement.style.zIndex = 1;
      div.appendChild(imgElement);

      const instruction = document.createElement('div');
      instruction.innerHTML = '<span style="color: red;">CLICK VIDEO TO STOP</span>';
      div.appendChild(instruction);
      instruction.onclick = () => {{ shutdown = true; }};

      video.srcObject = stream;
      await video.play();
      captureCanvas = document.createElement('canvas');
      captureCanvas.width = 640;
      captureCanvas.height = 480;
      window.requestAnimationFrame(onAnimationFrame);

      return stream;
    }}

    async function stream_frame(label, sentence_text, speak_command) {{
      if (shutdown) {{
        removeDom();
        shutdown = false;
        return '';
      }}
      var preCreate = Date.now();
      stream = await createDom();

      if (label != "") labelElement.innerHTML = label;
      if (sentence_text != "") sentenceElement.innerHTML = sentence_text;
      if (speak_command != "") speakText(speak_command);

      var result = await new Promise(function(resolve, reject) {{ pendingResolve = resolve; }});
      shutdown = false;
      return {{'img': result}};
    }}
    ''')
  display(js)

def video_frame(label, sentence_text, speak_command):
  js_script = 'stream_frame({}, {}, {})'.format(json.dumps(label), json.dumps(sentence_text), json.dumps(speak_command))
  data = eval_js(js_script)
  return data

def js_to_image(js_reply):
  image_bytes = b64decode(js_reply.split(',')[1])
  jpg_as_np = np.frombuffer(image_bytes, dtype=np.uint8)
  img = cv2.imdecode(jpg_as_np, flags=1)
  return img

def get_dist(p1, p2):
    return math.hypot(p1.x - p2.x, p1.y - p2.y)

# --- 3. DATABASE (Include your new words here!) ---
# Note: You can upload 'you.jpg' or 'ok.jpg' to use them!
backup_db = {
    "hello": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/d3/Asl_hello.svg/200px-Asl_hello.svg.png",
    "thanks": "https://upload.wikimedia.org/wikipedia/commons/thumb/e/eb/ASL_sign_Thank_You.jpg/220px-ASL_sign_Thank_You.jpg",
    "yes": "https://upload.wikimedia.org/wikipedia/commons/thumb/0/05/Asl_y.svg/166px-Asl_y.svg.png",
    "no": "https://upload.wikimedia.org/wikipedia/commons/thumb/7/77/Asl_n.svg/151px-Asl_n.svg.png",
    "please": "https://upload.wikimedia.org/wikipedia/commons/thumb/6/69/ASL_sign_Please.jpg/220px-ASL_sign_Please.jpg",
    "love": "https://upload.wikimedia.org/wikipedia/commons/thumb/f/f0/I_Love_You_Hand_Symbol.svg/1200px-I_Love_You_Hand_Symbol.svg.png",
    "peace": "https://upload.wikimedia.org/wikipedia/commons/thumb/6/63/Hand_V_sign.jpg/220px-Hand_V_sign.jpg",
    "you": "https://upload.wikimedia.org/wikipedia/commons/thumb/a/a2/Asl_you.png/220px-Asl_you.png", # Placeholder
    "ok": "https://upload.wikimedia.org/wikipedia/commons/thumb/1/1d/Asl_f9.svg/220px-Asl_f9.svg.png" # Placeholder
}
json_db = json.dumps(backup_db)

# --- 4. MAIN LOOP ---
print("Starting Sentence Mode AI...")
video_stream(json_db)

label_html = '...'
sentence_display = ""
current_sentence = []
last_gesture = "..."
stable_frames = 0
SPEAK_THRESHOLD = 5

# Timing for auto-clear
last_add_time = time.time()

while True:
    speak_cmd = ""

    # Send data to JS
    js_reply = video_frame(label_html, sentence_display, speak_cmd)
    if not js_reply: break

    img = js_to_image(js_reply["img"])
    rgb_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    result = hands.process(rgb_img)

    current_gesture = "..."

    if result.multi_hand_landmarks:
        for hand_landmarks in result.multi_hand_landmarks:
            # GEOMETRY LOGIC
            thumb_tip = hand_landmarks.landmark[4]
            index_tip = hand_landmarks.landmark[8]
            middle_tip = hand_landmarks.landmark[12]
            ring_tip = hand_landmarks.landmark[16]
            pinky_tip = hand_landmarks.landmark[20]

            index_pip = hand_landmarks.landmark[6]
            middle_pip = hand_landmarks.landmark[10]
            ring_pip = hand_landmarks.landmark[14]
            pinky_pip = hand_landmarks.landmark[18]

            # Check Folds
            index_folded = index_tip.y > index_pip.y
            middle_folded = middle_tip.y > middle_pip.y
            ring_folded = ring_tip.y > ring_pip.y
            pinky_folded = pinky_tip.y > pinky_pip.y

            thumb_out = get_dist(thumb_tip, index_pip) > 0.15

            # Distance between Thumb and Index (For "OK" Sign)
            thumb_index_dist = get_dist(thumb_tip, index_tip)

            # --- DEFINITIONS ---

            # 1. YOU / POINT (Index UP, others DOWN)
            if not index_folded and middle_folded and ring_folded and pinky_folded:
                current_gesture = "YOU"

            # 2. HELLO (Palm Open)
            elif not index_folded and not middle_folded and not ring_folded and not pinky_folded:
                current_gesture = "HELLO"

            # 3. YES (Fist + Thumb Up) [Wait, 'YOU' and 'YES' conflict? Yes usually has thumb active]
            # Let's adjust: YES needs Thumb UP and ABOVE Index Knuckle
            elif index_folded and middle_folded and ring_folded and pinky_folded and (thumb_tip.y < index_pip.y):
                current_gesture = "YES"

            # 4. OK (Thumb touches Index, others UP)
            elif thumb_index_dist < 0.05 and not middle_folded and not ring_folded and not pinky_folded:
                current_gesture = "OK"

            # 5. I LOVE YOU (Spider-man)
            elif not index_folded and middle_folded and ring_folded and not pinky_folded and thumb_out:
                current_gesture = "LOVE"

            # 6. PEACE
            elif not index_folded and not middle_folded and ring_folded and pinky_folded:
                current_gesture = "PEACE"

            # 7. CLEAR / STOP (Fist, but thumb tucked in or just plain fist)
            elif index_folded and middle_folded and ring_folded and pinky_folded and not (thumb_tip.y < index_pip.y):
                current_gesture = "CLEAR"

    # --- SENTENCE BUILDING LOGIC ---
    if current_gesture == last_gesture and current_gesture != "...":
        stable_frames += 1
        if stable_frames == SPEAK_THRESHOLD:

            # ACTION: Add word to sentence
            if current_gesture == "CLEAR":
                # If Fist -> Clear sentence
                current_sentence = []
                speak_cmd = "Cleared."
            else:
                # Add word only if it's not the same as the very last word (prevent "Hello Hello")
                if not current_sentence or current_sentence[-1] != current_gesture:
                    current_sentence.append(current_gesture)
                    speak_cmd = current_gesture # Speak the word as you sign it

            # Update Display
            sentence_display = " ".join(current_sentence)
            stable_frames = 0 # Reset
            last_add_time = time.time()

    else:
        stable_frames = 0
        last_gesture = current_gesture
        label_html = f'<span>Seeing: {current_gesture}</span>'

    # AUTO-SPEAK SENTENCE: If you stop signing for 4 seconds, say the whole thing
    if len(current_sentence) > 0 and (time.time() - last_add_time > 4.0):
        full_text = " ".join(current_sentence)
        speak_cmd = f"Sentence complete. {full_text}"
        current_sentence = [] # Reset after speaking
        sentence_display = "[Sent]"
        last_add_time = time.time()

    if speak_cmd != "":
         video_frame(label_html, sentence_display, speak_cmd)