In [None]:
pip install --upgrade azure-cognitiveservices-vision-contentmoderator

In [1]:
import os.path
from pprint import pprint
import time
from io import BytesIO
from random import random
import uuid

from azure.cognitiveservices.vision.contentmoderator import ContentModeratorClient
import azure.cognitiveservices.vision.contentmoderator.models
from msrest.authentication import CognitiveServicesCredentials

In [2]:
CONTENT_MODERATOR_ENDPOINT = "https://my-cog-services-1.cognitiveservices.azure.com/"
subscription_key = "81d80fb56b7e461e9ee8d919578d7276"

In [3]:
client = ContentModeratorClient(endpoint=CONTENT_MODERATOR_ENDPOINT,
    credentials=CognitiveServicesCredentials(subscription_key)
)

In [4]:
TEXT_FOLDER = os.path.join(os.path.dirname(os.path.realpath("text_moderation.txt")), "text_files")

In [5]:
# Screen the input text: check for profanity, autocorrect text, and check for personally identifying information (PII)
with open(os.path.join(TEXT_FOLDER, 'text_moderation.txt'), "rb") as text_fd:
    screen = client.text_moderation.screen_text(text_content_type="text/plain",text_content=text_fd,language="eng",
        autocorrect=True,
        pii=True
    )
    pprint(screen.as_dict())

{'auto_corrected_text': 'Is this a garbage email abcdef@abcd.com, phone: '
                        '4255550111, IP: 255.255.255.255, 1234 Main Boulevard, '
                        'Pentapolis WA 96555.\r\n'
                        'Crap is the profanity here. Is this information PII? '
                        'phone 2065550111',
 'language': 'eng',
 'normalized_text': '   grabage email abcdef@abcd.com, phone: 4255550111, IP: '
                    '255.255.255.255, 1234 Main Boulevard, Panapolis WA '
                    '96555.\r\n'
                    'Crap   profanity .   information PII? phone 2065550111',
 'original_text': 'Is this a grabage email abcdef@abcd.com, phone: 4255550111, '
                  'IP: 255.255.255.255, 1234 Main Boulevard, Panapolis WA '
                  '96555.\r\n'
                  'Crap is the profanity here. Is this information PII? phone '
                  '2065550111',
 'pii': {'address': [{'index': 81,
                      'text': '1234 Main Boulevar

In [6]:
TEXT_FOLDER = os.path.join(os.path.dirname(os.path.realpath("custom_term_list.txt")), "text_files")

In [7]:
# Create list
print("\nCreating list")
custom_list = client.list_management_term_lists.create(content_type="application/json",
    body={
        "name": "Term list name",
        "description": "Term list description",
    }
)
print("List created:")
pprint(custom_list.as_dict())
list_id = custom_list.id


Creating list
List created:
{'description': 'Term list description', 'id': 832, 'name': 'Term list name'}


In [8]:
# Update list details
print("\nUpdating details for list {}".format(list_id))
updated_list = client.list_management_term_lists.update(
    list_id=list_id,
    content_type="application/json",
    body={
        "name": "New Term list name",
        "description": "New list description"
    }
)
pprint(updated_list.as_dict())


Updating details for list 832
{'description': 'New list description', 'id': 832, 'name': 'New Term list name'}


In [9]:
# Add terms
print("\nAdding terms to list {}".format(list_id))
client.list_management_term.add_term(
    list_id=list_id,
    term="term1",
    language="eng"
)
client.list_management_term.add_term(
    list_id=list_id,
    term="term2",
    language="eng"
)


Adding terms to list 832


{'ContentId': '182966',
 'AdditionalInfo': [{'Key': 'Source', 'Value': '832'}],
 'Status': {'Code': 3000, 'Description': 'OK', 'Exception': None},
 'TrackingId': 'b0f9cc7e-215b-44c8-ae19-c2661b98dd15'}

In [10]:
# Get all terms ids
print("\nGetting all term IDs for list {}".format(list_id))
terms = client.list_management_term.get_all_terms(list_id=list_id, language="eng")
terms_data = terms.data
pprint(terms_data.as_dict())


Getting all term IDs for list 832
{'language': 'eng',
 'status': {'code': 3000, 'description': 'OK'},
 'terms': [{'term': 'term1'}, {'term': 'term2'}],
 'tracking_id': 'b4d45862-acfd-40ac-b89a-591d1548236d'}


In [11]:
# Refresh the index
print("\nRefreshing the search index for list {}".format(list_id))
refresh_index = client.list_management_term_lists.refresh_index_method(list_id=list_id, language="eng")

pprint(refresh_index.as_dict())


Refreshing the search index for list 832
{'advanced_info': [],
 'content_source_id': '832',
 'is_update_success': True,
 'status': {'code': 3000, 'description': 'OK'},
 'tracking_id': 'be94a6dd-bbb7-4f76-845d-0cb246fcb7df'}


In [12]:
# Screen text
with open(os.path.join(TEXT_FOLDER, 'custom_term_list.txt'), "rb") as text_fd:
    screen = client.text_moderation.screen_text(
        text_content_type="text/plain",
        text_content=text_fd,
        language="eng",
        autocorrect=False,
        pii=False,
        list_id=list_id
    )
    
    pprint(screen.as_dict())

{'language': 'eng',
 'normalized_text': ' text contains  terms "term1"  "term2".',
 'original_text': 'This text contains the terms "term1" and "term2".',
 'status': {'code': 3000, 'description': 'OK'},
 'terms': [{'index': 23, 'list_id': 832, 'original_index': 30, 'term': 'term1'},
           {'index': 32,
            'list_id': 832,
            'original_index': 42,
            'term': 'term2'}],
 'tracking_id': 'ba05033c-35ec-435b-905e-3c3e572753d3'}


In [13]:
# Remove terms
term_to_remove = "term1"
print("\nRemove term {} from list {}".format(term_to_remove, list_id))
client.list_management_term.delete_term(
    list_id=list_id,
    term=term_to_remove,
    language="eng"
)


Remove term term1 from list 832


In [14]:
# Delete all terms
print("\nDelete all terms in the image list {}".format(list_id))
client.list_management_term.delete_all_terms(list_id=list_id, language="eng")


Delete all terms in the image list 832


In [15]:
# Delete list
print("\nDelete the term list {}".format(list_id))
client.list_management_term_lists.delete(list_id=list_id)


Delete the term list 832


In [16]:
IMAGE_LIST = [
    "https://moderatorsampleimages.blob.core.windows.net/samples/sample2.jpg",
    "https://moderatorsampleimages.blob.core.windows.net/samples/sample5.png"
]

In [17]:
for image_url in IMAGE_LIST:
    print("\nEvaluate image {}".format(image_url))


Evaluate image https://moderatorsampleimages.blob.core.windows.net/samples/sample2.jpg

Evaluate image https://moderatorsampleimages.blob.core.windows.net/samples/sample5.png


In [18]:
#Adult or Racy content detection
print("\nEvaluate for adult and racy content.")
evaluation = client.image_moderation.evaluate_url_input(
    content_type="application/json",
    cache_image=True,
    data_representation="URL",
    value=IMAGE_LIST[1]
)

pprint(evaluation.as_dict())


Evaluate for adult and racy content.
{'adult_classification_score': 0.001438833656720817,
 'advanced_info': [{'key': 'ImageDownloadTimeInMs', 'value': '297'},
                   {'key': 'ImageSizeInBytes', 'value': '2278902'}],
 'cache_id': 'd1727060-7fb0-47b5-8390-5efab73df232_637668530553834032',
 'is_image_adult_classified': False,
 'is_image_racy_classified': False,
 'racy_classification_score': 0.004629917559213936,
 'result': False,
 'status': {'code': 3000, 'description': 'OK'},
 'tracking_id': '6842c02f-4ba0-43f9-a6fa-72cc9bb29ae8'}


In [19]:
#OCR
print("\nDetect and extract text.")
evaluation = client.image_moderation.ocr_url_input(
    language="eng",
    content_type="application/json",
    data_representation="URL",
    value=IMAGE_LIST[0],
    cache_image=True,
)

pprint(evaluation.as_dict())


Detect and extract text.
{'candidates': [],
 'language': 'eng',
 'metadata': [{'key': 'ImageDownloadTimeInMs', 'value': '172'},
              {'key': 'ImageSizeInBytes', 'value': '273405'}],
 'status': {'code': 3000, 'description': 'OK'},
 'text': 'IF WE DID \n'
         'ALL \n'
         'THE THINGS \n'
         'WE ARE \n'
         'CAPABLE \n'
         'OF DOING, \n'
         'WE WOULD \n'
         'LITERALLY \n'
         'ASTOUND \n'
         'OURSELVES. \n',
 'tracking_id': '91c88ae4-4a11-414d-b510-b0b1ba3dd6e8'}


In [20]:
#Face detection
print("\nDetect faces.")
evaluation = client.image_moderation.find_faces_url_input(
    content_type="application/json",
    cache_image=True,
    data_representation="URL",
    value=IMAGE_LIST[1]
)

pprint(evaluation.as_dict())


Detect faces.
{'advanced_info': [{'key': 'ImageDownloadTimeInMs', 'value': '232'},
                   {'key': 'ImageSizeInBytes', 'value': '2278902'}],
 'count': 6,
 'faces': [{'bottom': 633, 'left': 297, 'right': 531, 'top': 399},
           {'bottom': 503, 'left': 1228, 'right': 1453, 'top': 278},
           {'bottom': 595, 'left': 47, 'right': 257, 'top': 385},
           {'bottom': 619, 'left': 966, 'right': 1168, 'top': 417},
           {'bottom': 590, 'left': 589, 'right': 781, 'top': 398},
           {'bottom': 578, 'left': 807, 'right': 978, 'top': 407}],
 'result': True,
 'status': {'code': 3000, 'description': 'OK'},
 'tracking_id': '5d52b719-0b6b-417a-8781-80da100644be'}
