# Custom Text Classification

## Load Azure Configurations

In [2]:
import os

# Load Azure configurations from environment variables
# Ensure that AZURE_AI_LANGUAGE_KEY and AZURE_AI_LANGUAGE_ENDPOINT are set in your environment
language_key = os.environ.get('AZURE_AI_LANGUAGE_KEY')
language_endpoint = os.environ.get('AZURE_AI_LANGUAGE_ENDPOINT')

project_name = "Movies01"
model_name = "movieclassifiermodel"

## Import Project Job

In [33]:
import json

# Path to the JSON file
file_path = "movieLabels.json"

# Load the JSON file
with open(file_path, "r") as file:
    json_data = json.load(file)

In [34]:
import requests
import json

# Create a new analyzer
def import_project(request_body):
    
    url = f"{language_endpoint}/language/authoring/analyze-text/projects/{project_name}/:import?api-version=2022-05-01"
    headers = {
        "Ocp-Apim-Subscription-Key": language_key,
    }
    
    response = requests.post(url, headers=headers, data=json.dumps(request_body))
    # The 201 (Created) response includes an Operation-Location header containing a URL that you can use to track the status of this asynchronous creation operation.
    operation_location = response.headers.get("Operation-Location")
    return operation_location

import_result = import_project(json_data)
print(f"Import result: {import_result}")

Import result: https://ziggylanguagedemocomplete.cognitiveservices.azure.com/language/authoring/analyze-text/projects/Movies01/import/jobs/a134841a-603b-4132-8241-3a4df37cdfd8_638803584000000000?api-version=2022-05-01


## Get Import Job Status

In [49]:
def import_project_status(import_result):
    
    headers = {
        "Ocp-Apim-Subscription-Key": language_key,
    }
    
    response = requests.get(import_result, headers=headers)

    if response.status_code == 200:  # Success
        # Parse the JSON response
        response_data = response.json()
        # Pretty print the JSON data with indentation
        print(json.dumps(response_data, indent=4))
    else:
        print(f"Error: {response.status_code} - {response.text}")


In [50]:
import_project_status(import_result)

{
    "jobId": "a134841a-603b-4132-8241-3a4df37cdfd8_638803584000000000",
    "createdDateTime": "2025-04-16T12:46:52Z",
    "lastUpdatedDateTime": "2025-04-16T12:46:53Z",
    "expirationDateTime": "2025-04-23T12:46:52Z",
    "status": "succeeded"
}


## Train your model

In [37]:
train_body = {
	"modelLabel": model_name,
	"trainingConfigVersion": "2022-05-01",
	"evaluationOptions": {
		"kind": "percentage",
		"trainingSplitPercentage": 80,
		"testingSplitPercentage": 20
	}
}

In [None]:
import requests
import json

# Create a new analyzer
def train_model(request_body):
    
    url = f"{language_endpoint}/language/authoring/analyze-text/projects/{project_name}/:train?api-version=2022-05-01"

    headers = {
        "Ocp-Apim-Subscription-Key": language_key,
    }
    
    response = requests.post(url, headers=headers, data=json.dumps(request_body))
    # The 201 (Created) response includes an Operation-Location header containing a URL that you can use to track the status of this asynchronous creation operation.
    operation_location = response.headers.get("Operation-Location")
    return operation_location



In [None]:
train_result = train_model(train_body)
print(f"Train result: {train_result}")

## Get training job status

In [47]:
def train_model_status(train_result):
    
    headers = {
        "Ocp-Apim-Subscription-Key": language_key,
    }
        
    response = requests.get(train_result, headers=headers)
        
    if response.status_code == 200:  # Success
        # Parse the JSON response
        response_data = response.json()
        # Pretty print the JSON data with indentation
        print(json.dumps(response_data, indent=4))
    else:
        print(f"Error: {response.status_code} - {response.text}")

In [56]:
train_model_status(train_result)

{
    "result": {
        "modelLabel": "movieclassifiermodel",
        "trainingConfigVersion": "2022-05-01",
        "trainingStatus": {
            "percentComplete": 100,
            "startDateTime": "2025-04-16T12:55:38.1310105Z",
            "endDateTime": "2025-04-16T13:02:59.1926631Z",
            "status": "succeeded"
        },
        "evaluationStatus": {
            "percentComplete": 100,
            "startDateTime": "2025-04-16T13:03:03.1852857Z",
            "endDateTime": "2025-04-16T13:03:33.1877571Z",
            "status": "succeeded"
        }
    },
    "jobId": "e7c9a348-fa35-4efb-8cd5-8f140be7c907_638803584000000000",
    "createdDateTime": "2025-04-16T12:47:59Z",
    "lastUpdatedDateTime": "2025-04-16T13:04:03Z",
    "expirationDateTime": "2025-04-23T12:47:59Z",
    "status": "succeeded",
}


## View your text classification model's evaluation and details

In [55]:
def view_model_details():
    
    url = f"{language_endpoint}/language/authoring/analyze-text/projects/{project_name}/models/{model_name}/evaluation/summary-result?api-version=2022-05-01"

    headers = {
        "Ocp-Apim-Subscription-Key": language_key,
    }
        
    response = requests.get(url, headers=headers)
        
    if response.status_code == 200:  # Success
        # Parse the JSON response
        response_data = response.json()
        # Pretty print the JSON data with indentation
        print(json.dumps(response_data, indent=4))
    else:
        print(f"Error: {response.status_code} - {response.text}")

In [57]:
view_model_details()

{
    "projectKind": "CustomMultiLabelClassification",
    "customMultiLabelClassificationEvaluation": {
        "classes": {
            "Drama": {
                "f1": 0.8484848141670227,
                "precision": 0.875,
                "recall": 0.8235294222831726,
                "truePositiveCount": 28,
                "trueNegativeCount": 4,
                "falsePositiveCount": 4,
                "falseNegativeCount": 6
            },
            "Comedy": {
                "f1": 0.5909090638160706,
                "precision": 0.48148149251937866,
                "recall": 0.7647058963775635,
                "truePositiveCount": 13,
                "trueNegativeCount": 11,
                "falsePositiveCount": 14,
                "falseNegativeCount": 4
            },
            "Action": {
                "f1": 0.5945945978164673,
                "precision": 0.5789473652839661,
                "recall": 0.6111111044883728,
                "truePositiveCount": 11,
       

## Deploy your model

In [61]:
def deploy_model():
    
    url = f"{language_endpoint}/language/authoring/analyze-text/projects/{project_name}/deployments/{model_name}?api-version=2022-05-01"

    body = {
        "trainedModelLabel": model_name
    }

    headers = {
        "Ocp-Apim-Subscription-Key": language_key,
    }
        
    response = requests.put(url, headers=headers, data=json.dumps(body))
        
    # The 201 (Created) response includes an Operation-Location header containing a URL that you can use to track the status of this asynchronous creation operation.
    operation_location = response.headers.get("Operation-Location")
    return operation_location

In [62]:
deploy_result = deploy_model()
print(f"Deploy result: {deploy_result}")

Deploy result: https://ziggylanguagedemocomplete.cognitiveservices.azure.com/language/authoring/analyze-text/projects/Movies01/deployments/movieclassifiermodel/jobs/ee6c35dc-3a98-4eee-ae32-c27c88e82091_638803584000000000?api-version=2022-05-01


## Get deployment job status

In [63]:
def view_deployment_status(deploy_result):
    
    
    headers = {
        "Ocp-Apim-Subscription-Key": language_key,
    }
        
    response = requests.get(deploy_result, headers=headers)
        
    if response.status_code == 200:  # Success
        # Parse the JSON response
        response_data = response.json()
        # Pretty print the JSON data with indentation
        print(json.dumps(response_data, indent=4))
    else:
        print(f"Error: {response.status_code} - {response.text}")

In [64]:
view_deployment_status(deploy_result)

{
    "jobId": "ee6c35dc-3a98-4eee-ae32-c27c88e82091_638803584000000000",
    "createdDateTime": "2025-04-16T13:10:01Z",
    "lastUpdatedDateTime": "2025-04-16T13:10:03Z",
    "expirationDateTime": "2025-04-23T13:10:01Z",
    "status": "succeeded"
}


## Perform Classification

In [5]:
from azure.ai.textanalytics import TextAnalyticsClient
from azure.core.credentials import AzureKeyCredential

# Authenticate the client using Azure Key and Endpoint
def authenticate_client():
    """
    Authenticates the Azure Text Analytics client using the provided key and endpoint.

    Returns:
        TextAnalyticsClient: An authenticated client for Azure Text Analytics.
    """
    ta_credential = AzureKeyCredential(language_key)
    text_analytics_client = TextAnalyticsClient(
        endpoint=language_endpoint,
        credential=ta_credential
    )
    return text_analytics_client

# Initialize the client
text_analytics_client = authenticate_client()

In [8]:
document = [
    """
        Ethan Hunt and his IMF team are back, and this time they're up against their most formidable foe yet: a rogue AI known as "The Entity." 
        The adventure kicks off with a bang when a next-generation Russian stealth submarine, the Sevastopol, is tricked by the AI into sinking 
        itself with its own torpedo. The AI, now sentient and mischievous, is out to control the world’s defense and financial systems.
        
        Ethan, ever the daredevil, must retrieve a two-piece cruciform key that can control the Entity. The key is scattered across the globe, 
        leading to a series of high-octane chases and explosive stunts. Picture Ethan skydiving into the Arabian Desert, only to land in the 
        middle of a camel race, narrowly avoiding a collision with a particularly grumpy camel named Humphrey.
        
        Meanwhile, Ethan's team, including the tech-savvy Benji (Simon Pegg) and the ever-reliable Luther (Ving Rhames), 
        provide comic relief amidst the chaos. Benji's attempts to hack into the Entity's systems often result in hilarious mishaps, 
        like accidentally ordering 100 pizzas to the IMF headquarters.
        
        The action ramps up as Ethan fakes the death of MI6 agent Ilsa Faust (Rebecca Ferguson) to protect her from a $50 million bounty. 
        The team infiltrates high-security locations, including a thrilling sequence where Ethan and Benji disguise themselves as clowns 
        to sneak into a circus-themed gala. The plan goes awry when Benji's oversized shoes trip an alarm, leading to a wild chase through 
        a maze of funhouse mirrors.
        
        As the stakes get higher, Ethan faces off against the Entity in a final showdown atop a speeding bullet train. With explosions, fistfights, and a heart-stopping moment where Ethan dangles from the train by a single hand, the climax is nothing short of spectacular.
        In the end, Ethan and his team manage to outsmart the Entity, saving the world once again. The film concludes with a light-hearted scene of the team celebrating their victory with a well-deserved pizza party, courtesy of Benji's earlier mishap.
    """            
]

In [10]:
def multi_label_classify(document):
    """
    Classifies the given document into multiple labels using the Azure Text Analytics client.

    Args:
        document (list): A list of documents to classify.

    Returns:
        list: A list of classification results for each document.
    """
    # Start the multi-label classification process for the given document
    poller = text_analytics_client.begin_multi_label_classify(
        document,
        project_name=project_name,
        deployment_name=model_name
    )

    # Retrieve the classification results once the operation is complete
    document_results = poller.result()

    # Iterate through the documents and their corresponding classification results
    for doc, classification_result in zip(document, document_results):
        # Check if the result is of type CustomDocumentClassification
        if classification_result.kind == "CustomDocumentClassification":
            classifications = classification_result.classifications
            print(f"\nThe movie plot '{doc}' was classified as the following genres:\n")
            # Print each classification category and its confidence score
            for classification in classifications:
                print("'{}' with confidence score {}.".format(
                    classification.category, classification.confidence_score
                ))
        # Handle cases where there is an error in classification
        elif classification_result.is_error is True:
            print("Movie plot '{}' has an error with code '{}' and message '{}'".format(
                doc, classification_result.error.code, classification_result.error.message
            ))

In [11]:
multi_label_classify(document)


The movie plot '
        Ethan Hunt and his IMF team are back, and this time they're up against their most formidable foe yet: a rogue AI known as "The Entity." 
        The adventure kicks off with a bang when a next-generation Russian stealth submarine, the Sevastopol, is tricked by the AI into sinking 
        itself with its own torpedo. The AI, now sentient and mischievous, is out to control the world’s defense and financial systems.
        
        Ethan, ever the daredevil, must retrieve a two-piece cruciform key that can control the Entity. The key is scattered across the globe, 
        leading to a series of high-octane chases and explosive stunts. Picture Ethan skydiving into the Arabian Desert, only to land in the 
        middle of a camel race, narrowly avoiding a collision with a particularly grumpy camel named Humphrey.
        
        Meanwhile, Ethan's team, including the tech-savvy Benji (Simon Pegg) and the ever-reliable Luther (Ving Rhames), 
        provide com