## Import the libraries

In [1]:
import time
from time import sleep
import json
from datetime import datetime
import boto3
import pandas as pd

  from pandas.core.computation.check import NUMEXPR_INSTALLED


##  Fetch the dataset

In [9]:
import pandas as pd
import boto3
import io
bucket='crs-f13'
file_key = 'user_interactions.csv'
s3_client = boto3.client('s3')
obj = s3_client.get_object(Bucket=bucket, Key=file_key)
df = pd.read_csv(io.BytesIO(obj['Body'].read()))
df.head()

Unnamed: 0,USER_ID,ITEM_ID,EVENT_TYPE,TIMESTAMP
0,812353,4429430,view,1603045800
1,239494,8391177,click,1611945000
2,774544,5758296,view,1636914600
3,255308,1784357,view,1548441000
4,348298,3921147,view,1561573800


In [10]:
original_data = df
print(original_data.info())
original_data.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20000 entries, 0 to 19999
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   USER_ID     20000 non-null  object
 1   ITEM_ID     20000 non-null  int64 
 2   EVENT_TYPE  20000 non-null  object
 3   TIMESTAMP   20000 non-null  int64 
dtypes: int64(2), object(2)
memory usage: 625.1+ KB
None


Unnamed: 0,USER_ID,ITEM_ID,EVENT_TYPE,TIMESTAMP
0,812353,4429430,view,1603045800
1,239494,8391177,click,1611945000
2,774544,5758296,view,1636914600
3,255308,1784357,view,1548441000
4,348298,3921147,view,1561573800


## Prepare the data

In [16]:
watched_df = original_data.copy()
watched_df = watched_df[['USER_ID', 'ITEM_ID', 'TIMESTAMP']]
watched_df['EVENT_TYPE']='view'

clicked_df = original_data.copy()
clicked_df = clicked_df[['USER_ID', 'ITEM_ID', 'TIMESTAMP']]
clicked_df['EVENT_TYPE']='click'

interactions_df = clicked_df.copy()
# interactions_df = interactions_df.append(watched_df)
interactions_df = pd.concat([interactions_df, watched_df])
interactions_df.sort_values("TIMESTAMP", axis = 0, ascending = True, inplace = True, na_position ='last')

In [17]:
interactions_df.rename(columns = {'USER_ID':'USER_ID', 'ITEM_ID':'ITEM_ID', 'TIMESTAMP':'TIMESTAMP'}, inplace = True) 
interactions_filename = "interactions.csv"
interactions_df.to_csv((interactions_filename), index=False, float_format='%.0f')

## Create the dataset group

In [19]:
# Configure the SDK to Personalize:
personalize = boto3.client('personalize')
personalize_runtime = boto3.client('personalize-runtime')

create_dataset_group_response = personalize.create_dataset_group(
    name = "personalize-demo-crs1"
)

dataset_group_arn = create_dataset_group_response['datasetGroupArn']
print(json.dumps(create_dataset_group_response, indent=2))

{
  "datasetGroupArn": "arn:aws:personalize:ap-south-1:327664300428:dataset-group/personalize-demo-crs1",
  "ResponseMetadata": {
    "RequestId": "0b10c41e-8975-4a19-9512-c7c16cdd263a",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "date": "Thu, 16 Nov 2023 19:26:15 GMT",
      "content-type": "application/x-amz-json-1.1",
      "content-length": "101",
      "connection": "keep-alive",
      "x-amzn-requestid": "0b10c41e-8975-4a19-9512-c7c16cdd263a"
    },
    "RetryAttempts": 0
  }
}


In [20]:
%%time
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_dataset_group_response = personalize.describe_dataset_group(
        datasetGroupArn = dataset_group_arn
    )
    status = describe_dataset_group_response["datasetGroup"]["status"]
    print("DatasetGroup: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

DatasetGroup: CREATE PENDING
DatasetGroup: ACTIVE
CPU times: user 31.5 ms, sys: 4.48 ms, total: 36 ms
Wall time: 1min


## Create the schema and dataset

In [22]:
interactions_schema = {
    "type": "record",
    "name": "Interactions",
    "namespace": "com.amazonaws.personalize.schema",
    "fields": [
        {
            "name": "USER_ID",
            "type": "string"
        },
        {
            "name": "ITEM_ID",
            "type": "string"
        },
        {
            "name": "EVENT_TYPE",
            "type": "string"
        },
        {
            "name": "TIMESTAMP",
            "type": "long"
        }
    ],
    "version": "1.0"
}

create_schema_response = personalize.create_schema(
    name = "personalize-demo-int",
    schema = json.dumps(interactions_schema)
)

interaction_schema_arn = create_schema_response['schemaArn']
print(json.dumps(create_schema_response, indent=2))

dataset_type = "INTERACTIONS"
create_dataset_response = personalize.create_dataset(
    name = "personalize-demo-crs1",
    datasetType = dataset_type,
    datasetGroupArn = dataset_group_arn,
    schemaArn = interaction_schema_arn
)

interactions_dataset_arn = create_dataset_response['datasetArn']
print(json.dumps(create_dataset_response, indent=2))


{
  "schemaArn": "arn:aws:personalize:ap-south-1:327664300428:schema/personalize-demo-int",
  "ResponseMetadata": {
    "RequestId": "749c50a8-edde-4c5b-8d87-b04724d543f2",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "date": "Thu, 16 Nov 2023 19:29:01 GMT",
      "content-type": "application/x-amz-json-1.1",
      "content-length": "87",
      "connection": "keep-alive",
      "x-amzn-requestid": "749c50a8-edde-4c5b-8d87-b04724d543f2"
    },
    "RetryAttempts": 0
  }
}
{
  "datasetArn": "arn:aws:personalize:ap-south-1:327664300428:dataset/personalize-demo-crs1/INTERACTIONS",
  "ResponseMetadata": {
    "RequestId": "2541ee38-ea4d-4fdc-b9ad-3d584705acb5",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "date": "Thu, 16 Nov 2023 19:29:01 GMT",
      "content-type": "application/x-amz-json-1.1",
      "content-length": "103",
      "connection": "keep-alive",
      "x-amzn-requestid": "2541ee38-ea4d-4fdc-b9ad-3d584705acb5"
    },
    "RetryAttempts": 0
  }
}


## Create Amazon S3 bucket and upload data

In [23]:
session = boto3.session.Session()
region = session.region_name
s3 = boto3.client('s3')
account_id = boto3.client('sts').get_caller_identity().get('Account')
bucket_name = account_id + "-" + region + "-" + "crs-f13"
print(bucket_name)
if region == "us-east-1":
    s3.create_bucket(Bucket=bucket_name)
else:
    s3.create_bucket(
        Bucket=bucket_name,
        CreateBucketConfiguration={'LocationConstraint': region}
        )

327664300428-ap-south-1-crs-f13


In [24]:
interactions_file_path = data_dir + "/" + interactions_filename
boto3.Session().resource('s3').Bucket(bucket_name).Object(interactions_filename).upload_file(interactions_file_path)
interactions_s3DataPath = "s3://"+bucket_name+"/"+interactions_filename

## Configure S3 bucket policy

In [25]:
policy = {
    "Version": "2012-10-17",
    "Id": "PersonalizeS3BucketAccessPolicy",
    "Statement": [
        {
            "Sid": "PersonalizeS3BucketAccessPolicy",
            "Effect": "Allow",
            "Principal": {
                "Service": "personalize.amazonaws.com"
            },
            "Action": [
                "s3:*Object",
                "s3:ListBucket"
            ],
            "Resource": [
                "arn:aws:s3:::{}".format(bucket_name),
                "arn:aws:s3:::{}/*".format(bucket_name)
            ]
        }
    ]
}

s3.put_bucket_policy(Bucket=bucket_name, Policy=json.dumps(policy))

{'ResponseMetadata': {'RequestId': 'Y5EXG9D45Q8KERDH',
  'HostId': 'Itj2x/LSjr8xtgTt2rqmgTvnjfJHAsC0ZqVqVdW4l0VTcIASHoWmuRTjZrZk+ZixI1chTSP2C7MZUbyC9v1OAQ==',
  'HTTPStatusCode': 204,
  'HTTPHeaders': {'x-amz-id-2': 'Itj2x/LSjr8xtgTt2rqmgTvnjfJHAsC0ZqVqVdW4l0VTcIASHoWmuRTjZrZk+ZixI1chTSP2C7MZUbyC9v1OAQ==',
   'x-amz-request-id': 'Y5EXG9D45Q8KERDH',
   'date': 'Thu, 16 Nov 2023 19:30:20 GMT',
   'server': 'AmazonS3'},
  'RetryAttempts': 0}}

In [None]:
iam = boto3.client("iam")

role_name = "PersonalizeRolePOC"
assume_role_policy_document = {
    "Version": "2012-10-17",
    "Statement": [
        {
          "Effect": "Allow",
          "Principal": {
            "Service": "personalize.amazonaws.com"
          },
          "Action": "sts:AssumeRole"
        }
    ]
}

create_role_response = iam.create_role(
    RoleName = role_name,
    AssumeRolePolicyDocument = json.dumps(assume_role_policy_document)
)

# AmazonPersonalizeFullAccess provides access to any S3 bucket with a name that includes "personalize" or "Personalize" 
# if you would like to use a bucket with a different name, please consider creating and attaching a new policy
# that provides read access to your bucket or attaching the AmazonS3ReadOnlyAccess policy to the role
policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonPersonalizeFullAccess"
iam.attach_role_policy(
    RoleName = role_name,
    PolicyArn = policy_arn
)

# Now add S3 support
iam.attach_role_policy(
    PolicyArn='arn:aws:iam::aws:policy/AmazonS3FullAccess',
    RoleName=role_name
)
time.sleep(60) # wait for a minute to allow IAM role policy attachment to propagate

role_arn = create_role_response["Role"]["Arn"]
print(role_arn)

## Import the dataset into Amazon Personalize

In [15]:
create_dataset_import_job_response = personalize.create_dataset_import_job(
    jobName = "personalize-demo-import1",
    datasetArn = interactions_dataset_arn,
    dataSource = {
        "dataLocation": "s3://{}/{}".format(bucket_name, interactions_filename)
    },
    roleArn = role_arn
)

dataset_import_job_arn = create_dataset_import_job_response['datasetImportJobArn']
print(json.dumps(create_dataset_import_job_response, indent=2))

{
  "datasetImportJobArn": "arn:aws:personalize:ap-south-1:327664300428:dataset-import-job/personalize-demo-import1",
  "ResponseMetadata": {
    "RequestId": "de56bc1a-78d1-436a-97da-24f20d2f417f",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "date": "Sun, 12 Nov 2023 19:56:29 GMT",
      "content-type": "application/x-amz-json-1.1",
      "content-length": "113",
      "connection": "keep-alive",
      "x-amzn-requestid": "de56bc1a-78d1-436a-97da-24f20d2f417f"
    },
    "RetryAttempts": 0
  }
}


In [16]:
%%time
max_time = time.time() + 6*60*60 # 6 hours
while time.time() < max_time:
    describe_dataset_import_job_response = personalize.describe_dataset_import_job(
        datasetImportJobArn = dataset_import_job_arn
    )
    status = describe_dataset_import_job_response["datasetImportJob"]['status']
    print("DatasetImportJob: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: ACTIVE
CPU times: user 79.5 ms, sys: 19.2 ms, total: 98.8 ms
Wall time: 3min


 # Solution

## Choose a recipe

In [17]:
# aws-user-personalization selected for demo purposes
recipe_arn = "arn:aws:personalize:::recipe/aws-user-personalization"

## Configure the solution

In [18]:
create_solution_response = personalize.create_solution(
    name = "personalize-demo-soln-user-personalization",
    datasetGroupArn = dataset_group_arn,
    recipeArn = recipe_arn
)

solution_arn = create_solution_response['solutionArn']
print(json.dumps(create_solution_response, indent=2))

{
  "solutionArn": "arn:aws:personalize:ap-south-1:327664300428:solution/personalize-demo-soln-user-personalization",
  "ResponseMetadata": {
    "RequestId": "f7387a08-27e4-4c61-9a1f-40df0ae19662",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "date": "Sun, 12 Nov 2023 20:04:11 GMT",
      "content-type": "application/x-amz-json-1.1",
      "content-length": "113",
      "connection": "keep-alive",
      "x-amzn-requestid": "f7387a08-27e4-4c61-9a1f-40df0ae19662"
    },
    "RetryAttempts": 0
  }
}


## Create a solution version (train a model)

In [19]:
create_solution_version_response = personalize.create_solution_version(
    solutionArn = solution_arn
)

solution_version_arn = create_solution_version_response['solutionVersionArn']
print(json.dumps(create_solution_version_response, indent=2))

{
  "solutionVersionArn": "arn:aws:personalize:ap-south-1:327664300428:solution/personalize-demo-soln-user-personalization/46aa2171",
  "ResponseMetadata": {
    "RequestId": "55e90994-d061-41ef-bd71-b2094d7415ce",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "date": "Sun, 12 Nov 2023 20:04:42 GMT",
      "content-type": "application/x-amz-json-1.1",
      "content-length": "129",
      "connection": "keep-alive",
      "x-amzn-requestid": "55e90994-d061-41ef-bd71-b2094d7415ce"
    },
    "RetryAttempts": 0
  }
}


In [20]:
%%time
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_solution_version_response = personalize.describe_solution_version(
        solutionVersionArn = solution_version_arn
    )
    status = describe_solution_version_response["solutionVersion"]["status"]
    print("SolutionVersion: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

SolutionVersion: CREATE PENDING
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: ACTIVE
CPU times: user 552 ms, sys: 28.8 ms, total: 581 ms
Wall time: 18min 2s


## Evaluate the solution version

In [21]:
get_solution_metrics_response = personalize.get_solution_metrics(
    solutionVersionArn = solution_version_arn
)

print(json.dumps(get_solution_metrics_response, indent=2))

{
  "solutionVersionArn": "arn:aws:personalize:ap-south-1:327664300428:solution/personalize-demo-soln-user-personalization/46aa2171",
  "metrics": {
    "coverage": 0.0563,
    "mean_reciprocal_rank_at_25": 0.0527,
    "normalized_discounted_cumulative_gain_at_10": 0.0604,
    "normalized_discounted_cumulative_gain_at_25": 0.1109,
    "normalized_discounted_cumulative_gain_at_5": 0.0381,
    "precision_at_10": 0.0179,
    "precision_at_25": 0.0193,
    "precision_at_5": 0.0143
  },
  "ResponseMetadata": {
    "RequestId": "5c92a5c0-d729-44a6-84b7-a68e34b94c7c",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "date": "Sun, 12 Nov 2023 20:25:15 GMT",
      "content-type": "application/x-amz-json-1.1",
      "content-length": "427",
      "connection": "keep-alive",
      "x-amzn-requestid": "5c92a5c0-d729-44a6-84b7-a68e34b94c7c"
    },
    "RetryAttempts": 0
  }
}


## Create your campaign

In [22]:
create_campaign_response = personalize.create_campaign(
    name = "personalize-demo-camp",
    solutionVersionArn = solution_version_arn,
    minProvisionedTPS = 1,
    campaignConfig = {
        "itemExplorationConfig": {
            "explorationWeight": "0.3",
	"explorationItemAgeCutOff": "30"
        }
    }
)

campaign_arn = create_campaign_response['campaignArn']
print(json.dumps(create_campaign_response, indent=2))

max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_campaign_response = personalize.describe_campaign(
        campaignArn = campaign_arn
    )
    status = describe_campaign_response["campaign"]["status"]
    print("Campaign: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

{
  "campaignArn": "arn:aws:personalize:ap-south-1:327664300428:campaign/personalize-demo-camp",
  "ResponseMetadata": {
    "RequestId": "18c34616-9652-4a23-93c0-918683303411",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "date": "Sun, 12 Nov 2023 20:26:14 GMT",
      "content-type": "application/x-amz-json-1.1",
      "content-length": "92",
      "connection": "keep-alive",
      "x-amzn-requestid": "18c34616-9652-4a23-93c0-918683303411"
    },
    "RetryAttempts": 0
  }
}
Campaign: CREATE PENDING
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: ACTIVE


## Delete everything

In [None]:
# Delete the campaign
personalize.delete_campaign(campaignArn=campaign_arn)
time.sleep(300)
print("delete_campaign done")
# Delete the solution
personalize.delete_solution(solutionArn=solution_arn)
time.sleep(60)
print("delete_solution done")
# Delete the interaction dataset
personalize.delete_dataset(datasetArn=interactions_dataset_arn)
time.sleep(60)
print("delete_dataset done")
# Delete the schema
personalize.delete_schema(schemaArn=interaction_schema_arn)
time.sleep(60)
print("delete_schema done")
# Delete the dataset group
personalize.delete_dataset_group(datasetGroupArn = dataset_group_arn)
time.sleep(60)
print("delete_dataset_group done")

In [None]:
# Empty S3 Bucket
boto3.Session().resource('s3').Bucket(bucket_name).Object(interactions_filename).delete()
# IAM policies should also be removed
iam = boto3.client("iam")
iam.detach_role_policy(PolicyArn="arn:aws:iam::aws:policy/AmazonS3FullAccess", RoleName=role_name)
iam.detach_role_policy(PolicyArn="arn:aws:iam::aws:policy/service-role/AmazonPersonalizeFullAccess",RoleName=role_name)
iam.detach_role_policy(PolicyArn="arn:aws:iam::aws:policy/service-role/IAMFullAccess",RoleName=role_name)
iam.delete_role(RoleName=role_name)

In [None]:
# go to sagemaker console
# stop and delete the notebook instance