### Get the Personalize boto3 Client

In [39]:
import boto3

import json
import numpy as np
import pandas as pd
import time

personalize = boto3.client('personalize')
personalize_runtime = boto3.client('personalize-runtime')

### Specify a Bucket and Data Output Location

In [40]:
bucket = "personalizedemozixiqi"       # name of S3 bucket
filename = "tripadvisor_data/tripadvisor.csv"  # replace with a name that you want to save the dataset under

In [41]:
boto3.Session().resource('s3').Bucket(bucket).Object(filename).upload_file(filename)

### Create Schema

In [42]:
schema = {
    "type": "record",
    "name": "Interactions",
    "namespace": "com.amazonaws.personalize.schema",
    "fields": [
        {
            "name": "USER_ID",
            "type": "string"
        },
        {
            "name": "ITEM_ID",
            "type": "string"
        },
        {
            "name": "OVERALL_RATING",
            "type": "int"
        },
        {
            "name": "RATING1",
            "type": "int"
        },
        {
            "name": "RATING2",
            "type": "int"
        },
        {
            "name": "RATING3",
            "type": "int"
        },
        {
            "name": "RATING4",
            "type": "int"
        },
        {
            "name": "TIMESTAMP",
            "type": "long"
        },
    ],
    "version": "1.0"
}

create_schema_response = personalize.create_schema(
    name = "TRIPADVISOR",
    schema = json.dumps(schema)
)

schema_arn = create_schema_response['schemaArn']
print(json.dumps(create_schema_response, indent=2))

{
  "schemaArn": "arn:aws:personalize:us-east-1:990615222287:schema/TRIPADVISOR",
  "ResponseMetadata": {
    "RequestId": "4602157d-5b93-4bc2-ad89-1576fe526210",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Tue, 03 Dec 2019 20:02:36 GMT",
      "x-amzn-requestid": "4602157d-5b93-4bc2-ad89-1576fe526210",
      "content-length": "77",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


### Create and Wait for Dataset Group

#### Create Dataset Group

In [43]:
create_dataset_group_response = personalize.create_dataset_group(
    name = "tripadvisor-dataset-group"
)

dataset_group_arn = create_dataset_group_response['datasetGroupArn']
print(json.dumps(create_dataset_group_response, indent=2))

{
  "datasetGroupArn": "arn:aws:personalize:us-east-1:990615222287:dataset-group/tripadvisor-dataset-group",
  "ResponseMetadata": {
    "RequestId": "1c0155e6-1d48-48b1-a466-b4ea8bd6bff3",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Tue, 03 Dec 2019 20:02:52 GMT",
      "x-amzn-requestid": "1c0155e6-1d48-48b1-a466-b4ea8bd6bff3",
      "content-length": "104",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


#### Wait for Dataset Group to Have ACTIVE Status

In [44]:
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_dataset_group_response = personalize.describe_dataset_group(
        datasetGroupArn = dataset_group_arn
    )
    status = describe_dataset_group_response["datasetGroup"]["status"]
    print("DatasetGroup: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

DatasetGroup: CREATE PENDING
DatasetGroup: ACTIVE


### Create Dataset

In [45]:
dataset_type = "INTERACTIONS"
create_dataset_response = personalize.create_dataset(
    name = "tripadvisor-dataset",
    datasetType = dataset_type,
    datasetGroupArn = dataset_group_arn,
    schemaArn = schema_arn
)

dataset_arn = create_dataset_response['datasetArn']
print(json.dumps(create_dataset_response, indent=2))

{
  "datasetArn": "arn:aws:personalize:us-east-1:990615222287:dataset/tripadvisor-dataset-group/INTERACTIONS",
  "ResponseMetadata": {
    "RequestId": "cd614d66-b9ad-4856-bd11-5170cbc5b054",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Tue, 03 Dec 2019 20:04:08 GMT",
      "x-amzn-requestid": "cd614d66-b9ad-4856-bd11-5170cbc5b054",
      "content-length": "106",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


### Prepare, Create, and Wait for Dataset Import Job

#### Attach Policy to S3 Bucket

In [46]:
s3 = boto3.client("s3")

policy = {
    "Version": "2012-10-17",
    "Id": "PersonalizeS3BucketAccessPolicy",
    "Statement": [
        {
            "Sid": "PersonalizeS3BucketAccessPolicy",
            "Effect": "Allow",
            "Principal": {
                "Service": "personalize.amazonaws.com"
            },
            "Action": [
                "s3:GetObject",
                "s3:ListBucket"
            ],
            "Resource": [
                "arn:aws:s3:::{}".format(bucket),
                "arn:aws:s3:::{}/*".format(bucket)
            ]
        }
    ]
}

s3.put_bucket_policy(Bucket=bucket, Policy=json.dumps(policy))

{'ResponseMetadata': {'RequestId': '52E68D18EBD2FB22',
  'HostId': 'JgyBspyjWM+q0NlgU8k0G4mMo1mZC21p1FXS+3DqaCEIs8VDo6ei/2SbCc+JeuwUDMUf7vvErBk=',
  'HTTPStatusCode': 204,
  'HTTPHeaders': {'x-amz-id-2': 'JgyBspyjWM+q0NlgU8k0G4mMo1mZC21p1FXS+3DqaCEIs8VDo6ei/2SbCc+JeuwUDMUf7vvErBk=',
   'x-amz-request-id': '52E68D18EBD2FB22',
   'date': 'Tue, 03 Dec 2019 20:04:16 GMT',
   'server': 'AmazonS3'},
  'RetryAttempts': 0}}

#### Create Personalize Role

In [47]:
iam = boto3.client("iam")

role_name = "PersonalizeRoleTripAdvisor"
assume_role_policy_document = {
    "Version": "2012-10-17",
    "Statement": [
        {
          "Effect": "Allow",
          "Principal": {
            "Service": "personalize.amazonaws.com"
          },
          "Action": "sts:AssumeRole"
        }
    ]
}

create_role_response = iam.create_role(
    RoleName = role_name,
    AssumeRolePolicyDocument = json.dumps(assume_role_policy_document)
)

# AmazonPersonalizeFullAccess provides access to any S3 bucket with a name that includes "personalize" or "Personalize" 
# if you would like to use a bucket with a different name, please consider creating and attaching a new policy
# that provides read access to your bucket or attaching the AmazonS3ReadOnlyAccess policy to the role
policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonPersonalizeFullAccess"
iam.attach_role_policy(
    RoleName = role_name,
    PolicyArn = policy_arn
)

time.sleep(60) # wait for a minute to allow IAM role policy attachment to propagate

role_arn = create_role_response["Role"]["Arn"]
print(role_arn)

arn:aws:iam::990615222287:role/PersonalizeRoleTripAdvisor


#### Create Dataset Import Job

In [48]:
create_dataset_import_job_response = personalize.create_dataset_import_job(
    jobName = "tripadvisor-dataset-import-job",
    datasetArn = dataset_arn,
    dataSource = {
        "dataLocation": "s3://{}/{}".format(bucket, filename)
    },
    roleArn = role_arn
)

dataset_import_job_arn = create_dataset_import_job_response['datasetImportJobArn']
print(json.dumps(create_dataset_import_job_response, indent=2))

{
  "datasetImportJobArn": "arn:aws:personalize:us-east-1:990615222287:dataset-import-job/tripadvisor-dataset-import-job",
  "ResponseMetadata": {
    "RequestId": "00183d67-a520-427b-9529-4f20c8d23bab",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Tue, 03 Dec 2019 20:06:23 GMT",
      "x-amzn-requestid": "00183d67-a520-427b-9529-4f20c8d23bab",
      "content-length": "118",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


#### Wait for Dataset Import Job to Have ACTIVE Status

In [49]:
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_dataset_import_job_response = personalize.describe_dataset_import_job(
        datasetImportJobArn = dataset_import_job_arn
    )
    status = describe_dataset_import_job_response["datasetImportJob"]['status']
    print("DatasetImportJob: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

DatasetImportJob: CREATE PENDING
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: ACTIVE


### Select Recipe

In [50]:
list_recipes_response = personalize.list_recipes()
recipe_arn = "arn:aws:personalize:::recipe/aws-popularity-count" # aws-hrnn selected for demo purposes
list_recipes_response

{'recipes': [{'name': 'aws-hrnn',
   'recipeArn': 'arn:aws:personalize:::recipe/aws-hrnn',
   'status': 'ACTIVE',
   'creationDateTime': datetime.datetime(2019, 6, 10, 0, 0, tzinfo=tzlocal()),
   'lastUpdatedDateTime': datetime.datetime(2019, 6, 20, 0, 39, 17, 65000, tzinfo=tzlocal())},
  {'name': 'aws-hrnn-coldstart',
   'recipeArn': 'arn:aws:personalize:::recipe/aws-hrnn-coldstart',
   'status': 'ACTIVE',
   'creationDateTime': datetime.datetime(2019, 6, 10, 0, 0, tzinfo=tzlocal()),
   'lastUpdatedDateTime': datetime.datetime(2019, 6, 20, 0, 39, 17, 64000, tzinfo=tzlocal())},
  {'name': 'aws-hrnn-metadata',
   'recipeArn': 'arn:aws:personalize:::recipe/aws-hrnn-metadata',
   'status': 'ACTIVE',
   'creationDateTime': datetime.datetime(2019, 6, 10, 0, 0, tzinfo=tzlocal()),
   'lastUpdatedDateTime': datetime.datetime(2019, 6, 20, 0, 39, 17, 64000, tzinfo=tzlocal())},
  {'name': 'aws-personalized-ranking',
   'recipeArn': 'arn:aws:personalize:::recipe/aws-personalized-ranking',
   'stat

### Create and Wait for Solution

#### Create Solution

In [51]:
create_solution_response = personalize.create_solution(
    name = "tripadvisor-solution",
    datasetGroupArn = dataset_group_arn,
    recipeArn = recipe_arn
)

solution_arn = create_solution_response['solutionArn']
print(json.dumps(create_solution_response, indent=2))

{
  "solutionArn": "arn:aws:personalize:us-east-1:990615222287:solution/tripadvisor-solution",
  "ResponseMetadata": {
    "RequestId": "2e7bb862-96fa-44cf-8c9f-02467761a86c",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Tue, 03 Dec 2019 20:21:29 GMT",
      "x-amzn-requestid": "2e7bb862-96fa-44cf-8c9f-02467761a86c",
      "content-length": "90",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


#### Create Solution Version

In [52]:
create_solution_version_response = personalize.create_solution_version(
    solutionArn = solution_arn
)

solution_version_arn = create_solution_version_response['solutionVersionArn']
print(json.dumps(create_solution_version_response, indent=2))

{
  "solutionVersionArn": "arn:aws:personalize:us-east-1:990615222287:solution/tripadvisor-solution/e3f02b7d",
  "ResponseMetadata": {
    "RequestId": "c2706962-5e91-45f0-8367-30571fe5aa20",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Tue, 03 Dec 2019 20:21:36 GMT",
      "x-amzn-requestid": "c2706962-5e91-45f0-8367-30571fe5aa20",
      "content-length": "106",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


#### Wait for Solution Version to Have ACTIVE Status

In [53]:
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_solution_version_response = personalize.describe_solution_version(
        solutionVersionArn = solution_version_arn
    )
    status = describe_solution_version_response["solutionVersion"]["status"]
    print("SolutionVersion: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

SolutionVersion: CREATE PENDING
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGR

#### Get Metrics of Solution

In [54]:
get_solution_metrics_response = personalize.get_solution_metrics(
    solutionVersionArn = solution_version_arn
)

print(json.dumps(get_solution_metrics_response, indent=2))

{
  "solutionVersionArn": "arn:aws:personalize:us-east-1:990615222287:solution/tripadvisor-solution/e3f02b7d",
  "metrics": {
    "coverage": 0.0015,
    "mean_reciprocal_rank_at_25": 0.0,
    "normalized_discounted_cumulative_gain_at_10": 0.0,
    "normalized_discounted_cumulative_gain_at_25": 0.0,
    "normalized_discounted_cumulative_gain_at_5": 0.0,
    "precision_at_10": 0.0,
    "precision_at_25": 0.0,
    "precision_at_5": 0.0
  },
  "ResponseMetadata": {
    "RequestId": "00efea09-5e65-405f-9037-c0af4cf0a249",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Tue, 03 Dec 2019 21:02:54 GMT",
      "x-amzn-requestid": "00efea09-5e65-405f-9037-c0af4cf0a249",
      "content-length": "383",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


### Create and Wait for Campaign

#### Create Campaign

In [55]:
create_campaign_response = personalize.create_campaign(
    name = "DEMO-campaign",
    solutionVersionArn = solution_version_arn,
    minProvisionedTPS = 1
)

campaign_arn = create_campaign_response['campaignArn']
print(json.dumps(create_campaign_response, indent=2))

{
  "campaignArn": "arn:aws:personalize:us-east-1:990615222287:campaign/DEMO-campaign",
  "ResponseMetadata": {
    "RequestId": "f52f0dcf-2f9a-4e67-97ae-433621194da6",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Tue, 03 Dec 2019 21:03:05 GMT",
      "x-amzn-requestid": "f52f0dcf-2f9a-4e67-97ae-433621194da6",
      "content-length": "83",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


#### Wait for Campaign to Have ACTIVE Status

In [56]:
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_campaign_response = personalize.describe_campaign(
        campaignArn = campaign_arn
    )
    status = describe_campaign_response["campaign"]["status"]
    print("Campaign: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

Campaign: CREATE PENDING
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: ACTIVE


### Get Recommendations

#### Call GetRecommendations

In [58]:
user_id = 0
item_id = 0

get_recommendations_response = personalize_runtime.get_recommendations(
    campaignArn = campaign_arn,
    userId = str(user_id),
    itemId = str(item_id)
)

item_list = get_recommendations_response['itemList']

print("Recommendations: {}".format(json.dumps(item_list, indent=2)))

Recommendations: [
  {
    "itemId": "11981"
  },
  {
    "itemId": "17078"
  },
  {
    "itemId": "12222"
  },
  {
    "itemId": "1238"
  },
  {
    "itemId": "17097"
  },
  {
    "itemId": "1663"
  },
  {
    "itemId": "11925"
  },
  {
    "itemId": "17219"
  },
  {
    "itemId": "4322"
  },
  {
    "itemId": "4567"
  },
  {
    "itemId": "12252"
  },
  {
    "itemId": "17110"
  },
  {
    "itemId": "2695"
  },
  {
    "itemId": "12115"
  },
  {
    "itemId": "12158"
  },
  {
    "itemId": "12022"
  },
  {
    "itemId": "17223"
  },
  {
    "itemId": "17074"
  },
  {
    "itemId": "14626"
  },
  {
    "itemId": "4243"
  },
  {
    "itemId": "12059"
  },
  {
    "itemId": "12073"
  },
  {
    "itemId": "15501"
  },
  {
    "itemId": "6363"
  },
  {
    "itemId": "11897"
  }
]


### Solution Creation With AutoML

In [61]:
create_solution_response = personalize.create_solution(
    name = "tripadvisor-solution-automl",
    datasetGroupArn = dataset_group_arn,
    performAutoML = True,
)

automl_solution_arn = create_solution_response['solutionArn']
print(json.dumps(create_solution_response, indent=2))

{
  "solutionArn": "arn:aws:personalize:us-east-1:990615222287:solution/tripadvisor-solution-automl",
  "ResponseMetadata": {
    "RequestId": "1462618c-b0de-41c9-b676-8e3dcf9082cb",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Tue, 03 Dec 2019 22:34:51 GMT",
      "x-amzn-requestid": "1462618c-b0de-41c9-b676-8e3dcf9082cb",
      "content-length": "97",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


In [63]:
create_solution_version_response = personalize.create_solution_version(
    solutionArn = automl_solution_arn
)

automl_solution_version_arn = create_solution_version_response['solutionVersionArn']
print(json.dumps(create_solution_version_response, indent=2))

{
  "solutionVersionArn": "arn:aws:personalize:us-east-1:990615222287:solution/tripadvisor-solution-automl/34e23e30",
  "ResponseMetadata": {
    "RequestId": "1e3d36b0-5ffd-4ca7-a94b-5240a46e8afa",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Tue, 03 Dec 2019 22:36:07 GMT",
      "x-amzn-requestid": "1e3d36b0-5ffd-4ca7-a94b-5240a46e8afa",
      "content-length": "113",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


In [65]:
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_solution_version_response = personalize.describe_solution_version(
        solutionVersionArn = automl_solution_version_arn
    )
    status = describe_solution_version_response["solutionVersion"]["status"]
    print("SolutionVersion: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

SolutionVersion: ACTIVE


In [67]:
get_solution_metrics_response = personalize.get_solution_metrics(
    solutionVersionArn = automl_solution_version_arn
)

print(json.dumps(get_solution_metrics_response, indent=2))

{
  "solutionVersionArn": "arn:aws:personalize:us-east-1:990615222287:solution/tripadvisor-solution-automl/34e23e30",
  "metrics": {
    "coverage": 0.3684,
    "mean_reciprocal_rank_at_25": 0.1558,
    "normalized_discounted_cumulative_gain_at_10": 0.1633,
    "normalized_discounted_cumulative_gain_at_25": 0.1668,
    "normalized_discounted_cumulative_gain_at_5": 0.1603,
    "precision_at_10": 0.0173,
    "precision_at_25": 0.0075,
    "precision_at_5": 0.0329
  },
  "ResponseMetadata": {
    "RequestId": "b75235ca-f36c-433e-a23f-189863520c39",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Wed, 04 Dec 2019 00:50:41 GMT",
      "x-amzn-requestid": "b75235ca-f36c-433e-a23f-189863520c39",
      "content-length": "411",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}
