# Language Detection

https://learn.microsoft.com/en-us/azure/ai-services/language-service/language-detection/overview?wt.mc_id=MVP_322781

## Install Library

In [None]:
%pip install azure-ai-textanalytics

## Load Azure Configurations

In [1]:
import os

# Load Azure configurations from environment variables
# Ensure that AZURE_AI_LANGUAGE_KEY and AZURE_AI_LANGUAGE_ENDPOINT are set in your environment
language_key = os.environ.get('AZURE_AI_LANGUAGE_KEY')
language_endpoint = os.environ.get('AZURE_AI_LANGUAGE_ENDPOINT')

## Create a Text Analysis client

In [2]:
from azure.ai.textanalytics import TextAnalyticsClient
from azure.core.credentials import AzureKeyCredential

# Authenticate the client using Azure Key and Endpoint
def authenticate_client():
    """
    Authenticates the Azure Text Analytics client using the provided key and endpoint.

    Returns:
        TextAnalyticsClient: An authenticated client for Azure Text Analytics.
    """
    ta_credential = AzureKeyCredential(language_key)
    text_analytics_client = TextAnalyticsClient(
        endpoint=language_endpoint,
        credential=ta_credential
    )
    return text_analytics_client

# Initialize the client
client = authenticate_client()

## Detect Language function

In [29]:
def detect_language(client, documents):
    """
    Detects the language of each document in the provided list.

    Args:
        client (TextAnalyticsClient): The authenticated Azure Text Analytics client.
        documents (list): A list of documents to analyze.

    Returns:
        None
    """
    # Call the Azure Text Analytics API to detect the language of the documents
    result = client.detect_language(documents)
    
    # Filter out documents that encountered errors during processing
    reviewed_docs = [doc for doc in result if not doc.is_error]
    
    # Iterate over the successfully processed documents
    for idx, doc in enumerate(reviewed_docs, start=1):
        # Print the detected language name and its ISO639-1 code for each document
        print("Document #{} is in '{}', which has ISO639-1 name '{}'\\n".format(
            idx, doc.primary_language.name, doc.primary_language.iso6391_name
        ))

In [30]:
documents = [
    """
    This document is written in English.
    """,
    
    """
    Ce document est rédigé en Français.
    """,

    """
    Este documento está escrito en español.
    """,

    """
    Dieses Dokument ist auf Deutsch verfasst.
    """,

    """
    これは日本語で書かれたドキュメントです。
    """,

    """
    هذا مستند مكتوب باللغة العربية.
    """,

    """
    I love visiting Paris because the architecture is magnifique and the food is always délicieux.
    """,

    """
    I love visiting Paris because the architecture is magnifique and the food is always délicieux. 
    La Seine est particulièrement belle au coucher du soleil.
    """,
]

In [31]:
# Call the detect_language function to analyze the documents
detect_language(client, documents)

Document #1 is in 'English', which has ISO639-1 name 'en'\n
Document #2 is in 'French', which has ISO639-1 name 'fr'\n
Document #3 is in 'Spanish', which has ISO639-1 name 'es'\n
Document #4 is in 'German', which has ISO639-1 name 'de'\n
Document #5 is in 'Japanese', which has ISO639-1 name 'ja'\n
Document #6 is in 'Arabic', which has ISO639-1 name 'ar'\n
Document #7 is in 'English', which has ISO639-1 name 'en'\n
Document #8 is in 'French', which has ISO639-1 name 'fr'\n
