## Import Libraries

In [None]:
import os
from dotenv import load_dotenv
from langchain_community.document_loaders import JSONLoader
from langchain_openai import AzureOpenAIEmbeddings, AzureChatOpenAI
from langchain_chroma import Chroma
import time
import requests
import json
load_dotenv()

## Environment Variables

In [35]:
AZURE_AI_ENDPOINT = os.environ.get("AZURE_AI_ENDPOINT")
AZURE_AI_KEY = os.environ.get("AZURE_AI_KEY")
API_VERSION = "2024-12-01-preview"
VIDEO_ANALYZER_NAME = "video-analyzer"
VIDEO_SAS_URL = os.environ.get("VIDEO_SAS_URL")
PDF_ANALYZER_NAME = "pdf-analyzer"
PDF_SAS_URL = os.environ.get("PDF_SAS_URL")

## Helper Functions

In [36]:
def delete_analyzer(analyzer_name, endpoint, api_key):
    """
    Deletes an analyzer from the Azure AI Content Understanding service.

    Parameters:
        analyzer_name (str): The name of the analyzer to delete.
        endpoint (str): The Azure AI endpoint.
        api_key (str): The API key for the Azure AI service.

    Returns:
        str: The response text from the API.
    """
    url = f"{endpoint}/contentunderstanding/analyzers/{analyzer_name}?api-version=2024-12-01-preview"
    headers = {
        "Ocp-Apim-Subscription-Key": api_key,
        "Content-Type": "application/json"
    }

    response = requests.delete(url, headers=headers)
    return response.text


def create_or_update_video_analyzer(analyzer_name, description, scenario, field_schema, endpoint, api_key, api_version):
    """
    Creates or updates an analyzer in the Azure AI Content Understanding service.

    Parameters:
        analyzer_name (str): The name of the analyzer to create or update.
        description (str): A description of the analyzer.
        scenario (str): The scenario associated with the analyzer (e.g., 'videoShot').
        field_schema (dict): The schema of the fields for the analyzer.
        endpoint (str): The Azure AI endpoint.
        api_key (str): The API key for the Azure AI service.
        api_version (str): The API version to use.

    Returns:
        dict: A dictionary with the response status code and text.
    """
    url = f"{endpoint}/contentunderstanding/analyzers/{analyzer_name}?api-version={api_version}"
    headers = {
        "Ocp-Apim-Subscription-Key": api_key,
        "Content-Type": "application/json"
    }

    # Construct the request body
    request_body = {
        "description": description,
        "scenario": scenario,
        "fieldSchema": field_schema
    }

    # Send the PUT request
    response = requests.put(url, headers=headers, data=json.dumps(request_body))

    # Return the response details
    return {
        "status_code": response.status_code,
        "response_text": response.text
    }

def create_or_update_document_analyzer(analyzer_name, description, scenario, field_schema, config, endpoint, api_key, api_version):
    """
    Creates or updates an analyzer in the Azure AI Content Understanding service.

    Parameters:
        analyzer_name (str): The name of the analyzer.
        description (str): A description of the analyzer.
        scenario (str): The scenario type for the analyzer.
        field_schema (dict): The field schema definition.
        config (dict): Configuration settings for the analyzer.
        endpoint (str): The Azure AI endpoint.
        api_key (str): The API key for the Azure AI service.
        api_version (str): The API version to use.

    Returns:
        dict: A dictionary with the response status code and text.
    """
    url = f"{endpoint}/contentunderstanding/analyzers/{analyzer_name}?api-version={api_version}"
    headers = {
        "Ocp-Apim-Subscription-Key": api_key,
        "Content-Type": "application/json"
    }

    request_body = {
        "description": description,
        "scenario": scenario,
        "config": config,
        "fieldSchema": field_schema
    }

    # Send the PUT request
    response = requests.put(url, headers=headers, data=json.dumps(request_body))

    return {
        "status_code": response.status_code,
        "response_text": response.text
    }
  

def list_analyzers(endpoint, api_key, api_version):
    """
    Lists all analyzers in the Azure AI Content Understanding service.

    Parameters:
        endpoint (str): The Azure AI endpoint.
        api_key (str): The API key for the Azure AI service.
        api_version (str): The API version to use.

    Returns:
        dict: A dictionary with the response status code and text.
    """
    url = f"{endpoint}/contentunderstanding/analyzers?api-version={api_version}"
    headers = {
        "Ocp-Apim-Subscription-Key": api_key,
        "Content-Type": "application/json"
    }

    # Send the GET request
    response = requests.get(url, headers=headers)

    # Return the response details
    return {
        "status_code": response.status_code,
        "response_text": response.text
    }

def analyze_content(analyzer_name, content_url, endpoint, api_key, api_version):
    """
    Analyzes content using a specified analyzer in the Azure AI Content Understanding service.

    Parameters:
        analyzer_name (str): The name of the analyzer to use.
        content_url (str): The URL of the content to analyze.
        endpoint (str): The Azure AI endpoint.
        api_key (str): The API key for the Azure AI service.
        api_version (str): The API version to use.

    Returns:
        dict: A dictionary with the response status code, headers, and text.
    """
    url = f"{endpoint}/contentunderstanding/analyzers/{analyzer_name}:analyze?api-version={api_version}"
    headers = {
        "Ocp-Apim-Subscription-Key": api_key,
        "Content-Type": "application/json"
    }

    request_body = {
        "url": content_url
    }

    # Send the POST request
    response = requests.post(url, headers=headers, data=json.dumps(request_body))

    # Return the response details
    return {
        "status_code": response.status_code,
        "headers": response.headers,
        "response_text": response.text
    }

def get_result_details(operation_location, api_key, poll_interval=5, timeout=300):
    """
    Retrieves the result details from the Azure AI Content Understanding service,
    polling until the operation is complete or a timeout occurs.

    Parameters:
        operation_location (str): The URL for the operation location.
        api_key (str): The API key for the Azure AI service.
        poll_interval (int): The time in seconds between polls (default is 5 seconds).
        timeout (int): The maximum time in seconds to wait for the operation to complete (default is 300 seconds).

    Returns:
        dict: The JSON response from the operation or an error message if the operation fails or times out.
    """
    headers = {
        "Ocp-Apim-Subscription-Key": api_key
    }

    start_time = time.time()

    while True:
        print("Running analysis...")  # Indicate that the analysis is in progress

        # Send the GET request to check the operation status
        response = requests.get(operation_location, headers=headers)

        if response.status_code != 200:
            return {
                "status_code": response.status_code,
                "error": f"Failed to retrieve result details. Response: {response.text}"
            }

        # Parse the response JSON
        result = response.json()
        status = result.get("status")

        if status == "Succeeded":
            print("Analysis succeeded.")
            return {
                "status_code": response.status_code,
                "json_response": result,
                "response_text": response.text
            }
        elif status == "Failed":
            print("Analysis failed.")
            return {
                "status_code": response.status_code,
                "json_response": result,
                "response_text": response.text
            }
        elif status == "Running":
            # Continue polling
            if time.time() - start_time > timeout:
                print("Operation timed out.")
                return {
                    "status_code": 408,
                    "error": "Operation timed out after waiting for completion."
                }

            time.sleep(poll_interval)
        else:
            # Handle unexpected statuses
            print(f"Unexpected status: {status}")
            return {
                "status_code": response.status_code,
                "json_response": result,
                "response_text": response.text
            }


## Delete Existing Analyzer If Exists

In [None]:
print(delete_analyzer(VIDEO_ANALYZER_NAME, AZURE_AI_ENDPOINT, AZURE_AI_KEY))
print(delete_analyzer(PDF_ANALYZER_NAME, AZURE_AI_ENDPOINT, AZURE_AI_KEY))

## Create Video Analyzer

In [38]:
VIDEO_ANALYZER_FIELD_SCHEMA = {
    "fields": {
        "Description": {
            "type": "string",
            "description": "Detailed summary of the video segment, focusing on scene characteristics, lighting, and color palette."
        },
        "Sentiment": {
            "type": "string",
            "method": "classify",
            "enum": ["Positive", "Neutral", "Negative"]
        }
    }
}

result = create_or_update_video_analyzer(
    VIDEO_ANALYZER_NAME,
    "video analyzer",
    "videoShot",
    VIDEO_ANALYZER_FIELD_SCHEMA,
    AZURE_AI_ENDPOINT,
    AZURE_AI_KEY,
    API_VERSION
)

## Get Video Analyzer

In [None]:
result = list_analyzers(AZURE_AI_ENDPOINT, AZURE_AI_KEY, API_VERSION)
print(result)

## Analyze Video

In [None]:
result = analyze_content(
    VIDEO_ANALYZER_NAME,
    VIDEO_SAS_URL,
    AZURE_AI_ENDPOINT,
    AZURE_AI_KEY,
    API_VERSION
)

print(result)

In [None]:
operation_location = result["headers"].get("Operation-Location")
print(operation_location)

## Get Results

In [None]:
result = get_result_details(operation_location, AZURE_AI_KEY)
print(result)

## Write Data to JSON File

In [None]:
filename = 'video_data.json'

with open(filename, 'w') as json_file:
    json.dump(result, json_file, indent=4)

print(f"Data has been written to {filename}")

## Create PDF Analyzer

In [None]:
description = "pdf analyzer"
scenario = "document"
config = {"returnDetails": False}
field_schema = {
    "fields": {
        "Overview": {
            "type": "string",
            "method": "extract",
            "description": "Provide an overview of the service mentioned in the document"
        },
        "Capabilities": {
            "type": "string",
            "method": "extract",
            "description": "the capabilities the service has"
        },
        "Service_Name": {
            "type": "string",
            "method": "extract",
            "description": "the name of the service"
        },
        "Date_Published": {
            "type": "date",
            "method": "extract",
            "description": "the date the document was published"
        },
        "Use_Cases": {
            "type": "string",
            "method": "extract",
            "description": "summary of use cases for this technology"
        }
    }
}

result = create_or_update_document_analyzer(
    PDF_ANALYZER_NAME,
    description,
    scenario,
    field_schema,
    config,
    AZURE_AI_ENDPOINT,
    AZURE_AI_KEY,
    API_VERSION
)

print(result)

## Get PDF Analyzer

In [None]:
result = list_analyzers(AZURE_AI_ENDPOINT, AZURE_AI_KEY, API_VERSION)
print(result)

## Analyze PDF

In [None]:
result = analyze_content(
    PDF_ANALYZER_NAME,
    PDF_SAS_URL,
    AZURE_AI_ENDPOINT,
    AZURE_AI_KEY,
    API_VERSION
)

print(result)

In [None]:
operation_location = result["headers"].get("Operation-Location")
print(operation_location)

In [None]:
result = get_result_details(operation_location, AZURE_AI_KEY)
print(result)

## Write Data to JSON File

In [None]:
filename = 'pdf_data.json'

with open(filename, 'w') as json_file:
    json.dump(result, json_file, indent=4)

print(f"Data has been written to {filename}")

In [53]:
loader = JSONLoader(
    file_path='./pdf_data.json',
    jq_schema='.json_response.result.contents',
    text_content=False)

pdf_data = loader.load()

In [None]:
print(pdf_data)

In [55]:
loader = JSONLoader(
    file_path='./video_data.json',
    jq_schema='.json_response.result.contents',
    text_content=False)

video_data = loader.load()

In [None]:
print(video_data)

## Initialize OpenAI Connections

In [57]:
embeddings = AzureOpenAIEmbeddings(azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"), api_key=os.environ.get("AZURE_OPENAI_API_KEY"))
chat = AzureChatOpenAI(azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"), api_key=os.environ.get("AZURE_OPENAI_API_KEY"), azure_deployment=os.environ.get("AZURE_OPENAI_CHAT_MODEL_NAME"), api_version="2024-08-01-preview")

## Initialize ChromaDB

In [58]:
vector_db = Chroma.from_documents(pdf_data, embeddings)

vector_db = Chroma.from_documents(video_data, embeddings)

## Question #1

In [None]:
query = "What part in the video shows a sleepy animal?"
docs = vector_db.similarity_search(query)
print(docs[0].page_content)

In [None]:
chat.invoke(f"answer the users question: {query} based on the following information: {docs}")

## Question #2

In [None]:
query = "What date was content understanding published?"
docs = vector_db.similarity_search(query)
print(docs[0].page_content)

In [None]:
chat.invoke(f"answer the users question: {query} based on the following information: {docs}")