In [5]:
#MAIN CODE FINALLLL --WITH ALL FEATURESSSS -- WITH INSTUCTIONS

import os
import cv2
import pyttsx3
import speech_recognition as sr
import threading
import numpy as np
import pyaudio
import wave
import subprocess

# Function to convert text to speech
def SpeakText(command):
    # Initialize the engine
    engine = pyttsx3.init()
    engine.say(command)
    engine.runAndWait()

# Function to capture photo
photo_count = 0
zoomed_frame = None  # Global variable to hold the zoomed frame
current_filter_index = 0  # Global variable to track the current filter index
recording = False
paused = False
paused_lock = threading.Lock()  # Lock for synchronizing pausing/resuming

# Audio recording parameters
audio_format = pyaudio.paInt16
channels = 1
sample_rate = 44100
chunk_size = 1024

# Global variables for audio recording
audio_stream = None
audio_frames = []

# Flag to indicate whether audio recording is completed
audio_completed = threading.Event()

def capture_photo():
    global photo_count, zoomed_frame, current_filter_index
    if zoomed_frame is not None:
        # Apply the current filter to the zoomed frame
        filtered_frame = apply_filters(zoomed_frame)
        # Filename for saving the image
        filename = f"captured_images/image_{photo_count}.jpg"
        # Write the image with the current filter applied
        cv2.imwrite(filename, filtered_frame)
        # Speak the recognized text
        SpeakText("captured photo")
        print(f"Image captured and saved as '{filename}'")
        photo_count += 1
    else:
        SpeakText("Failed to capture photo")
        print("Failed to capture photo!")

# Function for zooming in and out
scale = 1
def zoom_frame(img):
    global scale
    height, width = img.shape[:2]
    center_x, center_y = width / 2, height / 2
    radius_x, radius_y = int(scale * width / 2), int(scale * height / 2)
    min_x, max_x = int(center_x - radius_x), int(center_x + radius_x)
    min_y, max_y = int(center_y - radius_y), int(center_y + radius_y)
    cropped = img[min_y:max_y, min_x:max_x]
    return cv2.resize(cropped, (width, height))

def zoom_out():
    global scale
    if scale < 1:
        scale += 0.1

def zoom_in():
    global scale
    if scale > 0.2:
        scale -= 0.1

# Function to apply different color space conversions
def apply_filters(frame):
    global current_filter_index

    # Function to invert the frame colors
    def apply_invert(frame):
        return cv2.bitwise_not(frame)

    # Function to verify alpha channel
    def verify_alpha_channel(frame):
        try:
            frame.shape[3] # 4th position
        except IndexError:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA)
        return frame

    # Function to apply color overlay
    def apply_color_overlay(frame, intensity=0.2, blue=0, green=0, red=0):
        frame = verify_alpha_channel(frame)
        frame_h, frame_w, frame_c = frame.shape
        color_bgra = (blue, green, red, 1)
        overlay = np.full((frame_h, frame_w, 4), color_bgra, dtype='uint8')
        cv2.addWeighted(overlay, intensity, frame, 1.0, 0, frame)
        frame = cv2.cvtColor(frame, cv2.COLOR_BGRA2BGR)
        return frame

    # Function to apply sepia effect
    def apply_sepia(frame, intensity=0.5):
        blue = 20
        green = 66 
        red = 112
        frame = apply_color_overlay(frame, intensity=intensity, blue=blue, green=green, red=red)
        return frame

    # Apply filters based on current index
    if current_filter_index == 0:
        return frame
    elif current_filter_index == 1:
        return cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    elif current_filter_index == 2:
        return cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
    elif current_filter_index == 3:
        return cv2.cvtColor(frame, cv2.COLOR_BGR2RGBA)
    elif current_filter_index == 4:
        return cv2.cvtColor(frame, cv2.COLOR_BGR2LAB)
    elif current_filter_index == 5:
        return cv2.cvtColor(frame, cv2.COLOR_BGR2YUV)
    elif current_filter_index == 6:
        return cv2.cvtColor(frame, cv2.COLOR_BGR2HLS)
    elif current_filter_index == 7:
        return cv2.cvtColor(frame, cv2.COLOR_BGR2XYZ)
    elif current_filter_index == 8:
        return cv2.cvtColor(frame, cv2.COLOR_BGR2LUV)
    elif current_filter_index == 9:
        return cv2.cvtColor(frame, cv2.COLOR_BGR2YCrCb)
    elif current_filter_index == 10:
        return apply_invert(frame)
    elif current_filter_index == 11:
        return apply_color_overlay(frame.copy(), intensity=.5, red=230, blue=10)
    elif current_filter_index == 12:
        return apply_sepia(frame.copy())

# Function to listen for voice commands
def listen_for_commands():
    global running
    recognizer = sr.Recognizer()
    with sr.Microphone() as source:
        print("Listening for commands...")
        recognizer.adjust_for_ambient_noise(source)
        while running:
            print("Listening...")
            try:
                audio = recognizer.listen(source)  # Set timeout to 1 second
                command = recognizer.recognize_google(audio)
                print("Command:", command)
                handle_command(command)
            except:
                print("no command detected")

def handle_command(command):
    global recording, paused
    command=command.lower()
    if "capture" in command:
        capture_photo()
    elif "stop camera" in command:
        print("stopping camera")
        SpeakText("stopping camera")
        stop_camera()
    elif "zoom in" in command:
        SpeakText("zoomed in")
        zoom_in()
    elif "zoom out" in command:
        SpeakText("zoomed out")
        zoom_out()
    elif "change filter" in command:
        next_filter()
    elif "default view" in command:
        SpeakText("default view")
        set_default_view()
    elif "start recording" in command:
        if not recording:
            record_start()
    elif "stop recording" in command:
        if recording:
            record_stop()
    elif "pause recording" in command:
        if recording and not paused:
            record_pause()
    elif "resume recording" in command:
        if recording and paused:
            record_resume()

def next_filter():
    global current_filter_index
    current_filter_index = (current_filter_index + 1) % 13  # Change 13 to the total number of filters added above

def set_default_view():
    global current_filter_index
    current_filter_index = 0

def record_audio():
    global recording, audio_frames, audio_completed, paused
    audio = pyaudio.PyAudio()
    audio_frames = []
    audio_stream = audio.open(format=audio_format, channels=channels,
                              rate=sample_rate, input=True,
                              frames_per_buffer=chunk_size)
    print("Audio recording started...")
    while recording:
        with paused_lock:
            if paused:
                continue
        audio_data = audio_stream.read(chunk_size)
        audio_frames.append(audio_data)
    print("Audio recording stopped...")
    audio_stream.stop_stream()
    audio_stream.close()
    audio.terminate()
    audio_completed.set()

def record_video():
    global recording, paused, audio_frames
    # Define the codec and create VideoWriter object
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter('captured_images/output.mp4', fourcc, 13.0, (640, 480))  # Adjust frame rate 

    while recording:
        with paused_lock:
            if paused:
                continue
        ret, frame = camera.read()
        if ret:
            zoomed_frame = zoom_frame(frame)
            filtered_frame = apply_filters(zoomed_frame)
            cv2.imshow("Camera", filtered_frame)
            out.write(filtered_frame)

            key = cv2.waitKey(1)
            if key == ord('p'):
                print("Recording Paused...")
                SpeakText("recording paused")
                out.release()
                with paused_lock:
                    paused = True
                while paused:
                    continue
                with paused_lock:
                    paused = False
                print("Recording Resumed...")
                SpeakText("recording resumed")
            elif key == ord('q'):
                print("stopping camera")
                SpeakText("stopping camera")
                stop_camera()
    # Release the VideoWriter object
    out.release()

def record_start():
    global recording
    recording = True
    SpeakText("recording started")
    print("Recording Started...")
    # Start record_audio() in a separate thread
    audio_thread = threading.Thread(target=record_audio)
    audio_thread.start()
    # Start record_video() in a separate thread
    video_thread = threading.Thread(target=record_video)
    video_thread.start()

def record_stop():
    global recording, audio_completed
    recording = False
    SpeakText("recording stopped")
    print("Recording Stopped.")
    # Wait for audio recording to complete
    audio_completed.wait()
    # Save recorded audio to file
    save_audio()

def save_audio():
    global audio_frames
    wave_file = wave.open("captured_images/audio.wav", 'wb')
    wave_file.setnchannels(channels)
    wave_file.setsampwidth(pyaudio.PyAudio().get_sample_size(audio_format))
    wave_file.setframerate(sample_rate)
    wave_file.writeframes(b''.join(audio_frames))
    wave_file.close()

def record_pause():
    global paused
    with paused_lock:
        paused = True
    SpeakText("recording paused")
    print("Recording Paused...")

def record_resume():
    global paused
    with paused_lock:
        paused = False
    SpeakText("recording resumed")
    print("Recording Resumed...")

def stop_camera():
    global running
    running = False

# Create a directory to save the captured images
if not os.path.exists("captured_images"):
    os.makedirs("captured_images")

# Initialize camera
camera = cv2.VideoCapture(0)
print("camera ON")

# Flag to indicate whether the threads should continue running
running = True

# Start voice command thread
listen_thread = threading.Thread(target=listen_for_commands)
listen_thread.start()

# Start camera loop
while running:
    ret, frame = camera.read()
    if ret:
        # Create a blank image for displaying text
        text_frame = 255 * np.ones((frame.shape[0], frame.shape[1] // 2, 3), dtype=np.uint8)

        # Add text to the blank image
        commands = [
            "               INSTRUCTIONS         ",
            "                             ",
            "Say 'capture' or press 'c' ",
            "                to capture photo",
            "Say 'stop camera' or press 'q'",
            "                to stop the camera",
            "Say 'zoom in' or press 'z'",
            "                to zoom in the screen",
            "Say 'zoom out'or press 'x' ",
            "                to zoom out the screen",
            "Say 'change filter' or press 'n'",
            "                to change the filter",
            "Say 'default view' or press 'd' ",
            "                 to set to default view",
            "Say 'start recording' or press 'r' ",
            "                 to start recording the video",
            "Say 'stop recording' or press 'r' ",
            "                 to stop the recording video",
            "Say 'pause recording' or press 'p' ",
            "                 to pause the recording",
            "Say 'resume recording' or press 'p' ",
            "                  to resume the recording"
        ]
        
        for i, command in enumerate(commands):
            org = (20, 50 + (i+1)*15)  # Position of the text, adjusted for smaller font
            cv2.putText(text_frame, command, org, cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 0, 0), 1, cv2.LINE_AA)  # Adjusted font size

        
        # Display instructions
        cv2.imshow('Instructions', text_frame)

        # cv2.setWindowProperty("Instructions", cv2.WND_PROP_TOPMOST, 1)  # Set window always on top

        zoomed_frame = zoom_frame(frame)
        filtered_frame = apply_filters(zoomed_frame)
        cv2.imshow("Camera", filtered_frame)

        # cv2.setWindowProperty("Camera", cv2.WND_PROP_TOPMOST, 1)  # Set window always on top
        
        key = cv2.waitKey(1)
        if key == ord('q'):
            print("stopping camera")
            SpeakText("stopping camera")
            stop_camera()
        elif key == ord('z'):
            zoom_in()
        elif key == ord('x'):
            zoom_out()
        elif key == ord('c'):
            capture_photo()
        elif key == ord('n'):
            next_filter()
        elif key == ord('d'):
            set_default_view()
        elif key == ord('r'):
            if not recording:
                record_start()
            else:
                record_stop()
        elif key == ord('p'):
            if recording:
                if not paused:
                    record_pause()
                else:
                    record_resume()

# Release camera
camera.release()
cv2.destroyAllWindows()

# to merge audio(wav) with video(mp4)
def merge_video_audio(video_path,audio_path,output_path):
    command = ['ffmpeg', '-i', video_path, '-i', audio_path, '-c:v', 'copy', '-c:a', 'aac', output_path]
    subprocess.run(command, capture_output=True, check=True)

# Paths to video and audio files
video_file = "captured_images/output.mp4"
audio_file = "captured_images/audio.wav"
output_file = "captured_images/final.mp4"

# Merge video and audio files
merge_video_audio(video_file,audio_file,output_file)

os.remove(video_file)
os.remove(audio_file)


camera ON
Listening for commands...
Listening...
Command: hello hello 123 mike testing
Listening...
no command detected
Listening...
Command: change filter
Listening...
Command: change filter
Listening...
Command: change filter
Listening...
Command: default view
Listening...
Command: zoom
Listening...
Command: zoom in
Listening...
Command: zoom in
Listening...
Command: zoom out
Listening...
Command: capture
Image captured and saved as 'captured_images/image_0.jpg'
Listening...
no command detected
Listening...
Command: start recording
Recording Started...
Listening...
Audio recording started...
Command: hello 1234
Listening...
no command detected
Listening...
Command: pause recording
Recording Paused...
Listening...
Command: resume recording
Recording Resumed...
Listening...
Command: zoom out
Listening...
Command: change filter
Listening...
Command: change filter
Listening...
Command: now I am changing filters using keys
Listening...
no command detected
Listening...
Command: capture
Ima