In [1]:
!pip install SpeechRecognition

Collecting SpeechRecognition
  Downloading SpeechRecognition-3.12.0-py3-none-any.whl.metadata (30 kB)
Downloading SpeechRecognition-3.12.0-py3-none-any.whl (32.8 MB)
   ---------------------------------------- 0.0/32.8 MB ? eta -:--:--
   ---------------------------------------- 0.3/32.8 MB ? eta -:--:--
    --------------------------------------- 0.5/32.8 MB 2.1 MB/s eta 0:00:16
    --------------------------------------- 0.8/32.8 MB 1.3 MB/s eta 0:00:25
    --------------------------------------- 0.8/32.8 MB 1.3 MB/s eta 0:00:25
   - -------------------------------------- 1.0/32.8 MB 825.2 kB/s eta 0:00:39
   - -------------------------------------- 1.0/32.8 MB 825.2 kB/s eta 0:00:39
   - -------------------------------------- 1.0/32.8 MB 825.2 kB/s eta 0:00:39
   - -------------------------------------- 1.0/32.8 MB 825.2 kB/s eta 0:00:39
   - -------------------------------------- 1.0/32.8 MB 825.2 kB/s eta 0:00:39
   - -------------------------------------- 1.3/32.8 MB 573.6 kB/s e

In [None]:
import cv2
import numpy as np
import time
import speech_recognition as sr
from sklearn.cluster import KMeans

# Function to recognize voice commands
def recognize_voice_command():
    recognizer = sr.Recognizer()
    with sr.Microphone() as source:
        print("Listening for commands...")
        recognizer.adjust_for_ambient_noise(source)
        audio = recognizer.listen(source)

    try:
        command = recognizer.recognize_google(audio)
        print(f"Command received: {command}")
        return command.lower()
    except sr.UnknownValueError:
        print("Could not understand the command.")
        return None
    except sr.RequestError:
        print("Could not request results; check your internet connection.")
        return None

def create_background(cap, num_frames=30):
    print("Capturing background. Please move out of frame.")
    backgrounds = []
    for i in range(num_frames):
        ret, frame = cap.read()
        if ret:
            backgrounds.append(frame)
        else:
            print(f"Warning: Could not read frame {i+1}/{num_frames}")
        time.sleep(0.1)
    if backgrounds:
        return np.median(backgrounds, axis=0).astype(np.uint8)
    else:
        raise ValueError("Could not capture any frames for background")

def create_mask(frame, lower_color, upper_color):
    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
    mask = cv2.inRange(hsv, lower_color, upper_color)
    mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, np.ones((3, 3), np.uint8), iterations=2)
    mask = cv2.morphologyEx(mask, cv2.MORPH_DILATE, np.ones((3, 3), np.uint8), iterations=1)
    return mask

def apply_kmeans_color_quantization(frame, k=16):
    data = frame.reshape((-1, 3))
    kmeans = KMeans(n_clusters=k, random_state=0)
    labels = kmeans.fit_predict(data)
    quantized_data = kmeans.cluster_centers_.astype(np.uint8)[labels]
    return quantized_data.reshape(frame.shape)

def apply_cloak_effect_with_video(frame, mask, galaxy_video):
    ret, galaxy_frame = galaxy_video.read()
    if not ret:
        galaxy_video.set(cv2.CAP_PROP_POS_FRAMES, 0)
        ret, galaxy_frame = galaxy_video.read()
    
    galaxy_resized = cv2.resize(galaxy_frame, (frame.shape[1], frame.shape[0]))
    mask_inv = cv2.bitwise_not(mask)
    fg = cv2.bitwise_and(frame, frame, mask=mask_inv)
    bg = cv2.bitwise_and(galaxy_resized, galaxy_resized, mask=mask)
    return cv2.add(fg, bg)

def main():
    print("OpenCV version:", cv2.__version__)

    cap = cv2.VideoCapture(0)

    if not cap.isOpened():
        print("Error: Could not open camera.")
        return

    galaxy_video = cv2.VideoCapture('galaxy.mp4')
    if not galaxy_video.isOpened():
        print("Error: Could not open galaxy video.")
        cap.release()
        return

    try:
        background = create_background(cap)
    except ValueError as e:
        print(f"Error: {e}")
        cap.release()
        galaxy_video.release()
        return

    lower_blue = np.array([90, 50, 50])
    upper_blue = np.array([130, 255, 255])

    print("Starting main loop. Press 'q' to quit.")

    # Define initial settings
    cloak_color = "blue"
    galaxy_video_file = 'galaxy.mp4'

    while True:
        ret, frame = cap.read()
        if not ret:
            print("Error: Could not read frame.")
            time.sleep(1)
            continue

        quantized_frame = apply_kmeans_color_quantization(frame)
        mask = create_mask(quantized_frame, lower_blue, upper_blue)
        result = apply_cloak_effect_with_video(quantized_frame, mask, galaxy_video)

        # Listen for commands or use text input
        command = recognize_voice_command()

        if command:
            if 'blue' in command:
                cloak_color = 'blue'
                lower_blue = np.array([90, 50, 50])
                upper_blue = np.array([130, 255, 255])
                print("Switched cloak color to blue.")
            elif 'red' in command:
                cloak_color = 'red'
                lower_blue = np.array([0, 50, 50])
                upper_blue = np.array([10, 255, 255])
                print("Switched cloak color to red.")
            elif 'green' in command:
                cloak_color = 'green'
                lower_blue = np.array([35, 50, 50])
                upper_blue = np.array([85, 255, 255])
                print("Switched cloak color to green.")
            elif 'galaxy' in command:
                galaxy_video_file = 'galaxy.mp4'
                print("Switched to galaxy background video.")
            elif 'forest' in command:
                galaxy_video_file = 'forest.mp4'
                print("Switched to forest background video.")
            else:
                print("Command not recognized.")

        # Load the current background video
        galaxy_video = cv2.VideoCapture(galaxy_video_file)

        # Show the result
        cv2.imshow('Augmented Invisibility Cloak', result)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    galaxy_video.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()


OpenCV version: 4.10.0
Capturing background. Please move out of frame.
Starting main loop. Press 'q' to quit.
Listening for commands...
Command received: hello hello
Command not recognized.
Listening for commands...
Could not understand the command.
Listening for commands...
Command received: invisible
Command not recognized.
Listening for commands...
Could not understand the command.
Listening for commands...
Could not understand the command.
Listening for commands...


In [3]:
!pip install pyaudio


Collecting pyaudio
  Downloading PyAudio-0.2.14-cp312-cp312-win_amd64.whl.metadata (2.7 kB)
Downloading PyAudio-0.2.14-cp312-cp312-win_amd64.whl (164 kB)
Installing collected packages: pyaudio
Successfully installed pyaudio-0.2.14


In [1]:
import cv2
import numpy as np
import time
import speech_recognition as sr
from sklearn.cluster import KMeans

def create_background(cap, num_frames=30):
    print("Capturing background. Please move out of frame.")
    backgrounds = []
    for i in range(num_frames):
        ret, frame = cap.read()
        if ret:
            backgrounds.append(frame)
        else:
            print(f"Warning: Could not read frame {i+1}/{num_frames}")
        time.sleep(0.1)
    if backgrounds:
        return np.median(backgrounds, axis=0).astype(np.uint8)
    else:
        raise ValueError("Could not capture any frames for background")

def create_mask(frame, lower_color, upper_color):
    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
    mask = cv2.inRange(hsv, lower_color, upper_color)
    mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, np.ones((3, 3), np.uint8), iterations=2)
    mask = cv2.morphologyEx(mask, cv2.MORPH_DILATE, np.ones((3, 3), np.uint8), iterations=1)
    return mask

def apply_kmeans_color_quantization(frame, k=16):
    # Reshape the frame to a 2D array of pixels
    data = frame.reshape((-1, 3))
    
    # Apply K-means clustering
    kmeans = KMeans(n_clusters=k, random_state=0)
    labels = kmeans.fit_predict(data)
    quantized_data = kmeans.cluster_centers_.astype(np.uint8)[labels]

    # Reshape back to the original frame shape
    return quantized_data.reshape(frame.shape)

def apply_cloak_effect_with_video(frame, mask, galaxy_video):
    # Read a frame from the galaxy video
    ret, galaxy_frame = galaxy_video.read()
    if not ret:
        # Restart video if it ends
        galaxy_video.set(cv2.CAP_PROP_POS_FRAMES, 0)
        ret, galaxy_frame = galaxy_video.read()

    # Resize galaxy frame to match the size of the input frame
    galaxy_resized = cv2.resize(galaxy_frame, (frame.shape[1], frame.shape[0]))

    # Create the inverse of the mask
    mask_inv = cv2.bitwise_not(mask)

    # Apply the mask to separate the foreground and background
    fg = cv2.bitwise_and(frame, frame, mask=mask_inv)
    bg = cv2.bitwise_and(galaxy_resized, galaxy_resized, mask=mask)

    # Combine the foreground and the new background
    return cv2.add(fg, bg)

def recognize_voice_command():
    recognizer = sr.Recognizer()
    with sr.Microphone() as source:
        print("Listening for commands...")
        recognizer.adjust_for_ambient_noise(source)
        audio = recognizer.listen(source)
        
    try:
        command = recognizer.recognize_google(audio).lower()
        print(f"Recognized command: {command}")
        return command
    except sr.UnknownValueError:
        print("Could not understand the audio")
    except sr.RequestError:
        print("Could not request results from Google Speech Recognition service")
    return None

def process_command(command):
    if 'blue' in command:
        print("Changing cloak color to blue")
        return 'blue'
    elif 'galaxy' in command:
        print("Changing background to galaxy")
        return 'galaxy'
    elif 'activate' in command and 'invisibility' in command:
        print("Activating invisibility cloak")
        return 'activate'
    elif 'deactivate' in command and 'cloak' in command:
        print("Deactivating cloak")
        return 'deactivate'
    elif 'pause' in command and 'video' in command:
        print("Pausing background video")
        return 'pause'
    elif 'play' in command and 'video' in command:
        print("Resuming background video")
        return 'play'
    elif 'exit' in command or 'quit' in command:
        print("Exiting program")
        return 'exit'
    return None

def main():
    print("OpenCV version:", cv2.__version__)

    cap = cv2.VideoCapture(0)

    if not cap.isOpened():
        print("Error: Could not open camera.")
        return

    # Load the galaxy video
    galaxy_video = cv2.VideoCapture('galaxy.mp4')
    if not galaxy_video.isOpened():
        print("Error: Could not open galaxy video.")
        cap.release()
        return

    try:
        background = create_background(cap)
    except ValueError as e:
        print(f"Error: {e}")
        cap.release()
        galaxy_video.release()
        return

    # Define the color range for the cloak (e.g., blue)
    lower_blue = np.array([90, 50, 50])
    upper_blue = np.array([130, 255, 255])

    current_color = lower_blue
    background_mode = 'galaxy'
    cloak_active = False

    print("Starting main loop. Press 'q' to quit.")
    while True:
        ret, frame = cap.read()
        if not ret:
            print("Error: Could not read frame.")
            time.sleep(1)
            continue

        # Apply K-means color quantization to the frame
        quantized_frame = apply_kmeans_color_quantization(frame)

        # Create a mask for the cloak
        mask = create_mask(quantized_frame, current_color, upper_blue)

        # Apply the augmented background overlay effect
        result = apply_cloak_effect_with_video(quantized_frame, mask, galaxy_video)

        # Show the result
        cv2.imshow('Augmented Invisibility Cloak', result)

        # Listen for commands and process them
        command = recognize_voice_command()
        if command:
            action = process_command(command)
            if action == 'blue':
                current_color = np.array([90, 50, 50])  # blue
            elif action == 'galaxy':
                background_mode = 'galaxy'
            elif action == 'activate':
                cloak_active = True
            elif action == 'deactivate':
                cloak_active = False
            elif action == 'pause':
                galaxy_video.set(cv2.CAP_PROP_POS_FRAMES, galaxy_video.get(cv2.CAP_PROP_POS_FRAMES))  # Pause video
            elif action == 'play':
                pass  # Continue playing
            elif action == 'exit':
                break

        # Quit when 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    galaxy_video.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()


OpenCV version: 4.10.0
Capturing background. Please move out of frame.
Starting main loop. Press 'q' to quit.
Listening for commands...
Recognized command: activate invisibility
Activating invisibility cloak
Listening for commands...
Recognized command: change clock colour to blue
Changing cloak color to blue
Listening for commands...
Recognized command: activate invisibility
Activating invisibility cloak
Listening for commands...
Recognized command: play video
Resuming background video
Listening for commands...
Recognized command: pause video
Pausing background video
Listening for commands...
Could not understand the audio
Listening for commands...
Recognized command: activate invisibility
Activating invisibility cloak
Listening for commands...
Recognized command: play video
Resuming background video
Listening for commands...
Recognized command: play video
Resuming background video
Listening for commands...
Recognized command: play video
Resuming background video
Listening for command