### Use this cell to make installations

In [4]:
!pip install playsound
!pip install pyttsx3
!pip install -U openai-whisper

### `FFmpeg` Installation

#### On Windows:

##### Download
Go to the FFmpeg Official Website and download the latest build for Windows.

##### Extract
Extract the downloaded ZIP file to a directory, for example, C:\FFmpeg.

##### Environment Variable:
- Right-click on 'This PC' or 'Computer' on your desktop or File Explorer, and select 'Properties'.

- Click on 'Advanced system settings' and then 'Environment Variables'.

- Under 'System Variables', find and select 'Path', then click 'Edit'.

- Click 'New' and add the path to your FFmpeg bin directory, e.g., C:\FFmpeg\bin.

- Click 'OK' to close all dialog boxes.


#### On macOS:

You can install `ffmpeg` using Homebrew:

`brew install ffmpeg`

#### On Linux:
For Ubuntu and other Debian-based distributions, you can install ffmpeg from the apt repository:

`sudo apt update`

`sudo apt install ffmpeg`



### Use this cell to import any libraries

In [2]:
from openai import OpenAI #Only for testing purposes
import os
import openai
from dotenv import load_dotenv, find_dotenv
from playsound import playsound
import pyaudio
import wave
import numpy as np
import whisper
import warnings

### Fetching the API Key and selecting the LLM Model

In [28]:
# Load environment variables from .env file
_ = load_dotenv(find_dotenv())

# Get OpenAI API key from environment variables
openai_api_key = os.getenv('OPENAI_API_KEY')
if not openai_api_key:
    raise ValueError("OpenAI API key is not set. Please set the 'OPENAI_API_KEY' environment variable in your .env file.")

# Account for deprecation of LLM model
import datetime
# Get the current date
current_date = datetime.datetime.now().date()

# Define the date after which the model should be set to "gpt-3.5-turbo"
target_date = datetime.date(2024, 6, 12)

# Set the model variable based on the current date
if current_date > target_date:
    llm_model = "gpt-3.5-turbo"
else:
    llm_model = "gpt-3.5-turbo-0301"


### Main Code Cell
#### Recording Audio using `pyAudio`
#### Speech to Text using `Whisper`
#### GPT Model: `gpt-3.5-turbo`

In [22]:
def get_API():
    # Load environment variables from .env file
    _ = load_dotenv(find_dotenv())

    # Get OpenAI API key from environment variables
    openai_api_key = os.getenv('OPENAI_API_KEY')
    if not openai_api_key:
        raise ValueError("OpenAI API key is not set. Please set the 'OPENAI_API_KEY' environment variable in your .env file.")
    return openai_api_key

def get_Model():
    # Get the current date
    current_date = datetime.datetime.now().date()

    # Define the date after which the model should be set to "gpt-3.5-turbo"
    target_date = datetime.date(2024, 6, 12)

    # Set the model variable based on the current date
    if current_date > target_date:
        llm_model = "gpt-3.5-turbo"
    else:
        llm_model = "gpt-3.5-turbo-0301"
    return llm_model


def speech_to_text():
    audio = getAudio()
    # Suppress the FP16 warning
    warnings.filterwarnings("ignore", category=UserWarning, message="FP16 is not supported on CPU; using FP32 instead")

    # Load the Whisper model
    model = whisper.load_model("base")

    # Transcribe the audio file
    result = model.transcribe(audio)
    text = result['text']
    write_to_file(text)
    return text

def list_audio_devices():
    p = pyaudio.PyAudio()
    devices = []
    for i in range(p.get_device_count()):
        device_info = p.get_device_info_by_index(i)
        devices.append((i, device_info['name'], device_info['maxInputChannels'], device_info['defaultSampleRate']))
    p.terminate()
    return devices

def get_device_index_by_name(name):
    devices = list_audio_devices()
    for index, device_name, _, _ in devices:
        if name.lower() in device_name.lower():
            return index
    return None

def speech_to_text():
    audio = getAudio()
    # Suppress the FP16 warning
    warnings.filterwarnings("ignore", category=UserWarning, message="FP16 is not supported on CPU; using FP32 instead")

    # Load the Whisper model
    model = whisper.load_model("base")
    print('Processing...')
    # Transcribe the audio file
    result = model.transcribe(audio)
    print('Processed!')
    text = result['text']
    write_to_file(text)
    return text


def getAudio(output_filename="recorded_speech.wav", device_name="MacBook Pro Microphone", chunk_size=1024, 
             format=pyaudio.paInt16, channels=1, rate=16000, silence_threshold=500, silence_duration=3):
    """
    Records audio until a period of silence is detected and saves it to a file.
    
    Args:
    - output_filename (str): Name of the output WAV file.
    - device_name (str): Name of the input audio device.
    - chunk_size (int): Number of frames per buffer.
    - format: Audio format (e.g., pyaudio.paInt16).
    - channels (int): Number of audio channels.
    - rate (int): Sampling rate in Hz.
    - silence_threshold (int): Amplitude threshold for silence detection.
    - silence_duration (int): Duration of silence required to stop recording (in seconds).
    
    Returns:
    - str: The name of the saved audio file.
    """
    device_index = get_device_index_by_name(device_name)
    if device_index is None:
        raise ValueError(f"Device '{device_name}' not found.")

    # Variables to store audio frames and silence detection
    audio_frames = []
    silent_chunks = 0
    max_silent_chunks = int(rate / chunk_size * silence_duration)

    def is_silent(data, threshold=silence_threshold):
        """Returns 'True' if below the silence threshold."""
        max_amplitude = np.max(np.abs(data))
        return max_amplitude < threshold

    def callback(in_data, frame_count, time_info, status):
        nonlocal silent_chunks, audio_frames
        audio_frames.append(in_data)
        audio_data = np.frombuffer(in_data, dtype=np.int16)
        if is_silent(audio_data):
            silent_chunks += 1
        else:
            silent_chunks = 0
        if silent_chunks > max_silent_chunks:
            return (None, pyaudio.paComplete)
        return (in_data, pyaudio.paContinue)

    # Initialize PyAudio
    p = pyaudio.PyAudio()

    try:
        # Open stream
        stream = p.open(format=format,
                        channels=channels,
                        rate=rate,
                        input=True,
                        frames_per_buffer=chunk_size,
                        stream_callback=callback,
                        input_device_index=device_index)

        print("Please start speaking. Recording...")
        stream.start_stream()

        # Keep the stream active while recording
        while stream.is_active():
            pass

        # Stop and close the stream
        stream.stop_stream()
        stream.close()

    except KeyboardInterrupt:
        print("Recording interrupted by user.")
    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        p.terminate()

    # Save the recorded audio to a file
    try:
        with wave.open(output_filename, 'wb') as wf:
            wf.setnchannels(channels)
            wf.setsampwidth(p.get_sample_size(format))
            wf.setframerate(rate)
            wf.writeframes(b''.join(audio_frames))
        print(f"Audio saved to {output_filename}")
    except Exception as e:
        print(f"Failed to save audio file: {e}")

    return output_filename
    
def text_to_speech(text):
    response = openai.audio.speech.create(
        model="tts-1",
        voice="onyx",
        input=text
    )
    file_path = 'response_voice.mp3'

    warnings.filterwarnings("ignore", category=DeprecationWarning)
    response.stream_to_file(file_path)
    play_audio(file_path)
    
def write_to_file(text):
    '''
    1. Write the text to a file
    2. Returns the file path
    '''
    with open('STT_file.txt','w') as file:
        file.write(text)
    return os.getcwd() + '/STT_file.txt'

def read_from_file(file_path):
    with open(file_path, 'r', encoding = 'utf-8') as file:
        text = file.read()
    return text

def play_audio(file_path):
    playsound(file_path)

def process_prompt(conversation_text):
    intro_path = os.path.join(os.getcwd(), 'intro', 'intro_prompt_voice.mp3')
    play_audio(intro_path)
    prompt = speech_to_text()
    print("Recognized Prompt:", prompt)  # Testing
    if prompt:
        response = openai.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are a memory assistant, listening to my conversations."},
                {"role": "user", "content": conversation_text},  # Provide the conversation context
                {"role": "user", "content": prompt}  # User's question
            ]
        )
        
        response_text = response.choices[0].message.content
        text_to_speech(response_text)

api = get_API()
text = speech_to_text()
process_prompt(text)

Please start speaking. Recording...
Recording interrupted by user.
Audio saved to recorded_speech.wav
Processing...
Processed!
Please start speaking. Recording...
Recording interrupted by user.
Audio saved to recorded_speech.wav
Processing...
Processed!
Recognized Prompt:  Where did she go to school?
