### Custom text clasification with Azure cognitive service API

Based on [this](https://microsoftlearning.github.io/mslearn-ai-language/Instructions/Exercises/04-text-classification.html) Microsoft learn manual.

In [None]:
%pip install azure-ai-textanalytics==5.3.0

In [1]:
from dotenv import load_dotenv
import os
from azure.core.credentials import AzureKeyCredential
from azure.ai.textanalytics import TextAnalyticsClient

# Get Configuration Settings
load_dotenv()
ai_endpoint = os.getenv('AI_SERVICE_ENDPOINT')
ai_key = os.getenv('AI_SERVICE_KEY')
project_name = os.getenv('PROJECT')
deployment_name = os.getenv('DEPLOYMENT')
print(f'Project: {project_name} - Deployment: {deployment_name}')
print(f'Endpoint: {ai_endpoint}')

Project: ClassifyLab - Deployment: articles
Endpoint: https://ai-language423.cognitiveservices.azure.com/


In [2]:
# Read each text file in the articles folder
batchedDocuments = []
articles_folder = os.path.join (os.getcwd(), "data", "articles")
files = os.listdir(articles_folder)
for file_name in files:
    # Read the file contents
    text = open(os.path.join(articles_folder, file_name), encoding='utf8').read()
    batchedDocuments.append(text)
    print(f'Processing {file_name}')
print(f'-------------\nProcessed {len(batchedDocuments)} documents')
print(batchedDocuments)

Processing test1.txt
Processing test2.txt
-------------
Processed 2 documents
['Investigating the potential for life around the stars\n\nWhen the world’s most powerful telescope launches into space this year, scientists will learn whether Earth-sized planets in our \'solar neighborhood\' have a key prerequisite for life — an atmosphere.\n\nThese planets orbit an M-dwarf, the smallest and most common type of star in the galaxy. Scientists do not currently know how common it is for Earth-like planets around this type of star to have characteristics that would make them habitable.\n\n"As a starting place, it is important to know whether small, rocky planets orbiting M-dwarfs have atmospheres," said Sydney Mattos, a doctoral student in Bellows College’s Department of Earth and Planetary Sciences. "If so, it opens up our search for life outside our solar system."', "League best, worst XIs: Man United stars Pogba, Maguire had season to forget; Kane, Son shone for Spurs\n\nAfter a final day o

In [3]:
# Create client using endpoint and key
credential = AzureKeyCredential(ai_key)
ai_client = TextAnalyticsClient(endpoint=ai_endpoint, credential=credential)

In [23]:
# Get Classifications
operation = ai_client.begin_single_label_classify(
    batchedDocuments,
    project_name=project_name,
    deployment_name=deployment_name
)
document_results = operation.result()

In [24]:
# Display the results
for doc, classification_result in zip(files, document_results):
    if classification_result.kind == "CustomDocumentClassification":
        classification = classification_result.classifications[0]
        print(f"{doc} was classified as '{classification.category}' with confidence score {classification.confidence_score}.")
    elif classification_result.is_error is True:
        print(f"{doc} has an error with code '{classification_result.error.code}' and message '{classification_result.error.message}'")

test1.txt was classified as 'Entertainment' with confidence score 0.28.
test2.txt was classified as 'Sports' with confidence score 0.33.
