## Comprehend Custom Classification Example
### Spam Detection Example
### [Spam Text MessagesDataset](https://www.kaggle.com/datasets/team-ai/spam-text-message-classification)

In [None]:
import pandas as pd
import numpy as np

df = pd.read_csv("spam.csv")
df.head()

In [None]:
print(len(df))

### Boto3 Clients

In [None]:
import boto3
s3 = boto3.resource('s3')
comprehend = boto3.client('comprehend')

### Create Bucket For Dataset

In [None]:
s3.create_bucket(Bucket = "custom-comprehend-example-mars") #replace this with your unique bucket name

In [None]:
s3.meta.client.upload_file('spam.csv', 'custom-comprehend-example-mars', 'spam.csv') #replace second param with your unique bucket name

In [None]:
document_classifier_name = 'SpamClassifierOne'

In [None]:
# Create a document classifier
# Create an IAM role for DataAccess for Comprehend and copy that arn in DataAccessRoleArn
create_response = comprehend.create_document_classifier(
    InputDataConfig={
        'S3Uri': 's3://custom-comprehend-example/spam.csv'
    },
    DataAccessRoleArn='Enter IAM role arn here',
    DocumentClassifierName=document_classifier_name,
    LanguageCode='en'
)
print("Create response: %s\n", create_response)

In [None]:
# Check the status of the classifier
describe_response = comprehend.describe_document_classifier(
    DocumentClassifierArn=create_response['DocumentClassifierArn'])
print("Describe response: %s\n", describe_response)

In [None]:
describe_response['DocumentClassifierProperties']['Status']

### Track Model Training

In [None]:
import time
describe_response = comprehend.describe_document_classifier(
    DocumentClassifierArn=create_response['DocumentClassifierArn'])
while describe_response['DocumentClassifierProperties']['Status'] == 'TRAINING':
    describe_response = comprehend.describe_document_classifier(DocumentClassifierArn=create_response['DocumentClassifierArn'])
    print(describe_response['DocumentClassifierProperties']['Status'])
    time.sleep(60)
describe_response

### Grab Document Classifier ARN

In [None]:
DocumentClassifierArn=create_response['DocumentClassifierArn']
DocumentClassifierArn

### Create Real-Time Endpoint

In [None]:
realtime_endpoint_name = document_classifier_name + '-comprehend-ep'

response = comprehend.create_endpoint(
    EndpointName=realtime_endpoint_name,
    ModelArn='Enter Model ARN here',
    DesiredInferenceUnits=10)
endpoint_arn = response['EndpointArn']

In [None]:
response = comprehend.describe_endpoint(EndpointArn=endpoint_arn)
response

In [None]:
response['EndpointProperties']['Status']

In [None]:
import time
describe_response = comprehend.describe_endpoint(EndpointArn=endpoint_arn)
while describe_response['EndpointProperties']['Status'] == 'CREATING':
    describe_response = comprehend.describe_endpoint(EndpointArn=endpoint_arn)
    print(describe_response['EndpointProperties']['Status'])
    time.sleep(30)
describe_response

### Invocation

In [None]:
df['Message'][0] #copy to next cell for text

In [None]:
response = comprehend.classify_document(
        Text='Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat...',
        EndpointArn='Enter role arn here'
    )
response

## Clean Up

In [None]:
comprehend.delete_endpoint(EndpointArn='Enter role arn here')