In [3]:
import os
from openai import AzureOpenAI
import json
import requests
import speech_recognition as sr
import pyttsx3
import PyPDF2

with open('config.json', 'r') as config_file:
    config = json.load(config_file)

key = config['key']
endpoint_url = config['endpoint_url']
whisper_model = config['whisper_model']
chat_model = config['chat_model']
region = config['region']

final_url = f"{endpoint_url}/openai/deployments/{whisper_model}/audio/transcriptions?api-version=2023-09-01-preview"

headers = {
    "api-key": key,
}

client = AzureOpenAI(
    api_version="2024-02-01",
    azure_endpoint=endpoint_url,
    api_key=key
)

recognizer = sr.Recognizer()
engine = pyttsx3.init()


def get_speech_input():
    with sr.Microphone() as source:
        print("Please say your prompt:")
        audio = recognizer.listen(source)
        try:
            prompt = recognizer.recognize_google(audio)
            print(f"You said: {prompt}")
            return prompt
        except sr.UnknownValueError:
            print("Sorry, I could not understand the audio.")
            return None
        except sr.RequestError as e:
            print(f"Could not request results; {e}")
            return None


def get_text_input():
    prompt = input("Please type your prompt: ")
    return prompt


def get_document_input():
    file_path = input("Please provide the path to the document: ").strip()
    if file_path.endswith('.pdf'):
        text = extract_text_from_pdf(file_path)
    
    else:
        print("Unsupported document format. Please use PDF or DOCX.")
        return None

    print("Document text extracted. Please provide a prompt related to the document:")
    prompt = input("Please type your prompt: ")
    return f"{text}\n\n{prompt}"


def extract_text_from_pdf(file_path):
    text = ""
    with open(file_path, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        for page_num in range(len(reader.pages)):
            text += reader.pages[page_num].extract_text()
    return text




while True:
    input_method = input(
        "Would you like to give your prompt via speech, text, or document? (speech/text/document): ").strip().lower()
    if input_method == 'speech':
        user_prompt = get_speech_input()
    elif input_method == 'text':
        user_prompt = get_text_input()
    elif input_method == 'document':
        user_prompt = get_document_input()
    else:
        print("Invalid input method. Please choose 'speech', 'text', or 'document'.")
        continue

    if not user_prompt or user_prompt.lower() in ['bye', 'end', 'ok bye', 'exit']:
        print("Exiting the process.")
        break

    completion = client.chat.completions.create(
        model=chat_model,
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": user_prompt}
        ]
    )

    response = completion.choices[0].message.content
    print(response)
    engine.say(response)
    engine.runAndWait()

Hello! How can I assist you today?
Exiting the process.
