In [None]:
import os
from datetime import datetime
from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from msrest.authentication import CognitiveServicesCredentials
from openai import AzureOpenAI
import azure.cognitiveservices.speech as speechsdk
import httpx

# Azure Vision API settings
VISION_ENDPOINT = "https://obgr56.cognitiveservices.azure.com/"
VISION_KEY = "4t8bhp9U06l0n9xUgTsfxRnREmxUPVYs1Sf8mzZSlYcSaDMhZd0DJQQJ99ALACYeBjFXJ3w3AAAFACOGus8x"

# Azure OpenAI API settings
OPENAI_API_KEY = "5O55vdSCkfwS7PxNgow6tJKQsqoW5KM6UjVd9FE68r0oAbPST46vJQQJ99ALACfhMk5XJ3w3AAAAACOGTjAt"
OPENAI_ENDPOINT = "https://abill-m4abvruz-swedencentral.openai.azure.com/"
DEPLOYMENT_NAME = "gpt-4"

# Azure Speech service settings
SPEECH_KEY = "1oAXVuYvHfhthYHFyNC0plSsRhWzqSLk4AuP1jFe9q2DqAHmOIRHJQQJ99ALACYeBjFXJ3w3AAAYACOG21H7"
SPEECH_REGION = "eastus"

def initialize_vision_client():
    """Initialize Azure Vision client"""
    try:
        return ComputerVisionClient(
            endpoint=VISION_ENDPOINT,
            credentials=CognitiveServicesCredentials(VISION_KEY)
        )
    except Exception as e:
        print(f"Error initializing Vision client: {str(e)}")
        return None

def initialize_openai_client():
    """Initialize Azure OpenAI client"""
    try:
        http_client = httpx.Client()
        client = AzureOpenAI(
            api_key=OPENAI_API_KEY,
            api_version="2024-02-15-preview",
            azure_endpoint=OPENAI_ENDPOINT,
            http_client=http_client
        )
        return client
    except Exception as e:
        print(f"Error initializing OpenAI client: {str(e)}")
        return None

def analyze_image(vision_client, image_path):
    """Analyze image using Azure Vision API"""
    try:
        with open(image_path, "rb") as image_file:
            description = vision_client.describe_image_in_stream(image_file)
            image_file.seek(0)
            text_result = vision_client.recognize_printed_text_in_stream(image_file)

            image_description = []

            if description.captions:
                image_description.append(f"Main caption: {description.captions[0].text}")

            if text_result:
                text_content = []
                for region in text_result.regions:
                    for line in region.lines:
                        line_text = ' '.join([word.text for word in line.words])
                        text_content.append(line_text)
                if text_content:
                    image_description.append("Text found in image: " + " ".join(text_content))

            return "\n".join(image_description)
    except Exception as e:
        return f"Error analyzing image: {str(e)}"

def generate_story(openai_client, image_analysis):
    """Generate a story based on image analysis"""
    if not openai_client:
        return "Error: OpenAI client not properly initialized"

    prompt = f"""
    Based on the following image analysis, create an engaging short story:
    {image_analysis}

    Please create a creative and descriptive story that captures the essence of this image.
    The story should be 3-4 paragraphs long and include vivid details from the image description.
    """

    try:
        response = openai_client.chat.completions.create(
            model=DEPLOYMENT_NAME,
            messages=[
                {"role": "system", "content": "You are a creative storyteller who crafts engaging narratives based on visual descriptions."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.7,
            max_tokens=500
        )
        return response.choices[0].message.content
    except Exception as e:
        return f"Error generating story: {str(e)}"

def text_to_speech_file(text, output_file=None):
    """Convert text to speech using Azure Speech Service and save to file"""
    try:
        speech_config = speechsdk.SpeechConfig(subscription=SPEECH_KEY, region=SPEECH_REGION)
        speech_config.speech_synthesis_voice_name = "en-US-JennyNeural"

        if output_file is None:
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            output_file = f"speech_output_{timestamp}.wav"

        audio_config = speechsdk.audio.AudioOutputConfig(filename=output_file)
        speech_synthesizer = speechsdk.SpeechSynthesizer(
            speech_config=speech_config,
            audio_config=audio_config
        )

        result = speech_synthesizer.speak_text_async(text).get()

        if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
            print(f"Speech synthesis completed successfully. Audio saved to: {output_file}")
            return output_file
        else:
            print(f"Error synthesizing audio: {result.reason}")
            if result.reason == speechsdk.ResultReason.Canceled:
                cancellation_details = result.cancellation_details
                print(f"Speech synthesis canceled: {cancellation_details.reason}")
                if cancellation_details.reason == speechsdk.CancellationReason.Error:
                    print(f"Error details: {cancellation_details.error_details}")
            return None

    except Exception as e:
        print(f"Error in text-to-speech conversion: {str(e)}")
        return None

def main():
    print("Initializing clients...")
    try:
        # Create output directory
        output_dir = "speech_output"
        os.makedirs(output_dir, exist_ok=True)

        # Initialize clients
        vision_client = initialize_vision_client()
        openai_client = initialize_openai_client()

        if not vision_client or not openai_client:
            print("Failed to initialize one or more clients. Exiting...")
            return

        while True:
            # Get image path from user
            image_path = input("\nEnter the path to your image (or 'quit' to exit): ")

            if image_path.lower() == 'quit':
                break

            if not os.path.exists(image_path):
                print(f"Error: File '{image_path}' does not exist.")
                continue

            # Process image and generate story
            print("\nAnalyzing image...")
            image_analysis = analyze_image(vision_client, image_path)

            print("\nImage Analysis Results:")
            print(image_analysis)

            print("\nGenerating story...")
            story = generate_story(openai_client, image_analysis)

            print("\nGenerated Story:")
            print(story)

            # Convert story to speech
            print("\nConverting story to speech...")
            output_file = os.path.join(output_dir, f"story_{datetime.now().strftime('%Y%m%d_%H%M%S')}.wav")
            result_file = text_to_speech_file(story, output_file)

            if result_file:
                print(f"\nAudio file size: {os.path.getsize(result_file) / 1024:.2f} KB")

            print("\n" + "="*50 + "\n")

    except Exception as e:
        print(f"An error occurred: {str(e)}")

if __name__ == "__main__":
    main()