## Download the dataSet

In [1]:
data_dir = "Untitled"
!mkdir $data_dir
!cd $data_dir && wget http://files.grouplens.org/datasets/hetrec2011/hetrec2011-lastfm-2k.zip
!cd $data_dir && unzip hetrec2011-lastfm-2k.zip

mkdir: cannot create directory ‘Untitled’: File exists
--2023-02-06 19:45:23--  http://files.grouplens.org/datasets/hetrec2011/hetrec2011-lastfm-2k.zip
Resolving files.grouplens.org (files.grouplens.org)... 128.101.65.152
Connecting to files.grouplens.org (files.grouplens.org)|128.101.65.152|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2589075 (2.5M) [application/zip]
Saving to: ‘hetrec2011-lastfm-2k.zip’


2023-02-06 19:45:24 (3.08 MB/s) - ‘hetrec2011-lastfm-2k.zip’ saved [2589075/2589075]

Archive:  hetrec2011-lastfm-2k.zip
  inflating: user_friends.dat        
  inflating: user_taggedartists.dat  
  inflating: user_taggedartists-timestamps.dat  
  inflating: artists.dat             
  inflating: readme.txt              
  inflating: tags.dat                
  inflating: user_artists.dat        


In [2]:
!ls $data_dir

artists.dat		  tags.dat	    user_taggedartists.dat
hetrec2011-lastfm-2k.zip  user_artists.dat  user_taggedartists-timestamps.dat
readme.txt		  user_friends.dat


## Prepare Data

In [3]:
import time
from time import sleep
import json
from datetime import datetime
import numpy as np
import boto3
import pandas as pd

In [4]:
original_data = pd.read_csv(data_dir + '/user_taggedartists-timestamps.dat')
original_data.head(5)

Unnamed: 0,userID\tartistID\ttagID\ttimestamp
0,2\t52\t13\t1238536800000
1,2\t52\t15\t1238536800000
2,2\t52\t18\t1238536800000
3,2\t52\t21\t1238536800000
4,2\t52\t41\t1238536800000


In [5]:
original_data = pd.read_csv(data_dir + '/user_taggedartists-timestamps.dat', delimiter='\t')
original_data.head(5)

Unnamed: 0,userID,artistID,tagID,timestamp
0,2,52,13,1238536800000
1,2,52,15,1238536800000
2,2,52,18,1238536800000
3,2,52,21,1238536800000
4,2,52,41,1238536800000


In [6]:
original_data.describe()

Unnamed: 0,userID,artistID,tagID,timestamp
count,186479.0,186479.0,186479.0,186479.0
mean,1035.600137,4375.845328,1439.582913,1239204000000.0
std,622.461272,4897.789595,2775.340279,42990910000.0
min,2.0,1.0,1.0,-428720400000.0
25%,488.0,686.0,79.0,1209593000000.0
50%,1021.0,2203.0,195.0,1243807000000.0
75%,1624.0,6714.0,887.0,1275343000000.0
max,2100.0,18744.0,12647.0,1304941000000.0


In [7]:
original_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 186479 entries, 0 to 186478
Data columns (total 4 columns):
 #   Column     Non-Null Count   Dtype
---  ------     --------------   -----
 0   userID     186479 non-null  int64
 1   artistID   186479 non-null  int64
 2   tagID      186479 non-null  int64
 3   timestamp  186479 non-null  int64
dtypes: int64(4)
memory usage: 5.7 MB


In [8]:
original_data.timestamp = original_data.timestamp / 1000
original_data.head(5)

Unnamed: 0,userID,artistID,tagID,timestamp
0,2,52,13,1238537000.0
1,2,52,15,1238537000.0
2,2,52,18,1238537000.0
3,2,52,21,1238537000.0
4,2,52,41,1238537000.0


In [9]:
arb_time_stamp = original_data.iloc[50]['timestamp']
print(arb_time_stamp)
print(datetime.utcfromtimestamp(arb_time_stamp).strftime('%Y-%m-%d %H:%M:%S'))

1235862000.0
2009-02-28 23:00:00


In [10]:
interactions_df = original_data.copy()
interactions_df = interactions_df[['userID', 'artistID', 'timestamp']]
interactions_df.head()

Unnamed: 0,userID,artistID,timestamp
0,2,52,1238537000.0
1,2,52,1238537000.0
2,2,52,1238537000.0
3,2,52,1238537000.0
4,2,52,1238537000.0


In [11]:
interactions_df = original_data.copy()
interactions_df = interactions_df[['userID', 'artistID', 'timestamp']]
interactions_df.head()

Unnamed: 0,userID,artistID,timestamp
0,2,52,1238537000.0
1,2,52,1238537000.0
2,2,52,1238537000.0
3,2,52,1238537000.0
4,2,52,1238537000.0


In [12]:
interactions_df.dtypes

userID         int64
artistID       int64
timestamp    float64
dtype: object

In [13]:
interactions_df.astype({'timestamp': 'int64'}).dtypes

userID       int64
artistID     int64
timestamp    int64
dtype: object

In [14]:
interactions_df.rename(columns = {'userID':'USER_ID', 'artistID':'ITEM_ID', 
                              'timestamp':'TIMESTAMP'}, inplace = True) 

In [15]:
interactions_filename = "interactions.csv"
interactions_df.to_csv((data_dir+"/"+interactions_filename), index=False, float_format='%.0f')

## Create dataset groups and the interactions dataset

In [16]:
# Configure the SDK to Personalize:
personalize = boto3.client('personalize')
personalize_runtime = boto3.client('personalize-runtime')

### Create the dataset group

In [17]:
create_dataset_group_response = personalize.create_dataset_group(
    name = "personalize-ranking-dsg-3"
)

dataset_group_arn = create_dataset_group_response['datasetGroupArn']
print(json.dumps(create_dataset_group_response, indent=2))

{
  "datasetGroupArn": "arn:aws:personalize:us-east-1:325347478438:dataset-group/personalize-ranking-dsg-3",
  "ResponseMetadata": {
    "RequestId": "042e5d16-37ee-4a41-9048-9e54deb4f991",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "date": "Mon, 06 Feb 2023 19:46:03 GMT",
      "content-type": "application/x-amz-json-1.1",
      "content-length": "104",
      "connection": "keep-alive",
      "x-amzn-requestid": "042e5d16-37ee-4a41-9048-9e54deb4f991"
    },
    "RetryAttempts": 0
  }
}


In [18]:
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_dataset_group_response = personalize.describe_dataset_group(
        datasetGroupArn = dataset_group_arn
    )
    status = describe_dataset_group_response["datasetGroup"]["status"]
    print("DatasetGroup: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

DatasetGroup: CREATE IN_PROGRESS
DatasetGroup: ACTIVE


### Create the dataset

In [19]:
interactions_schema = schema = {
    "type": "record",
    "name": "Interactions",
    "namespace": "com.amazonaws.personalize.schema",
    "fields": [
        {
            "name": "USER_ID",
            "type": "string"
        },
        {
            "name": "ITEM_ID",
            "type": "string"
        },
        {
            "name": "TIMESTAMP",
            "type": "long"
        }
    ],
    "version": "1.0"
}

create_schema_response = personalize.create_schema(
    name = "personalize-ranking-interactions-3",
    schema = json.dumps(interactions_schema)
)

schema_arn = create_schema_response['schemaArn']
print(json.dumps(create_schema_response, indent=2))

{
  "schemaArn": "arn:aws:personalize:us-east-1:325347478438:schema/personalize-ranking-interactions-3",
  "ResponseMetadata": {
    "RequestId": "729483ce-3c0d-4e80-ba68-1b711f014b50",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "date": "Mon, 06 Feb 2023 19:47:05 GMT",
      "content-type": "application/x-amz-json-1.1",
      "content-length": "100",
      "connection": "keep-alive",
      "x-amzn-requestid": "729483ce-3c0d-4e80-ba68-1b711f014b50"
    },
    "RetryAttempts": 0
  }
}


In [20]:
dataset_type = "INTERACTIONS"
create_dataset_response = personalize.create_dataset(
    name = "personalize-ranking-ds",
    datasetType = dataset_type,
    datasetGroupArn = dataset_group_arn,
    schemaArn = schema_arn
)

interactions_dataset_arn = create_dataset_response['datasetArn']
print(json.dumps(create_dataset_response, indent=2))

{
  "datasetArn": "arn:aws:personalize:us-east-1:325347478438:dataset/personalize-ranking-dsg-3/INTERACTIONS",
  "ResponseMetadata": {
    "RequestId": "23bf4960-74bd-4c65-a421-31b71ea50806",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "date": "Mon, 06 Feb 2023 19:47:05 GMT",
      "content-type": "application/x-amz-json-1.1",
      "content-length": "106",
      "connection": "keep-alive",
      "x-amzn-requestid": "23bf4960-74bd-4c65-a421-31b71ea50806"
    },
    "RetryAttempts": 0
  }
}


## Configure an S3 bucket and an IAM role

In [21]:
with open('/opt/ml/metadata/resource-metadata.json') as notebook_info:
    data = json.load(notebook_info)
    resource_arn = data['ResourceArn']
    region = resource_arn.split(':')[3]
print(region)

us-east-1


In [22]:
s3 = boto3.client('s3')
suffix = str(np.random.uniform())[4:9]
bucket_name = "personalize-ranking-demo-"+   suffix        # replace with the name of your S3 bucket
print(bucket_name)
if region != "us-east-1":
    s3.create_bucket(Bucket=bucket_name, CreateBucketConfiguration={'LocationConstraint': region})
else:
    s3.create_bucket(Bucket=bucket_name)

personalize-ranking-demo-08877


### Upload data to S3

In [23]:
interactions_file_path = data_dir + "/" + interactions_filename
boto3.Session().resource('s3').Bucket(bucket_name).Object(interactions_filename).upload_file(interactions_file_path)
interactions_s3DataPath = "s3://"+bucket_name+"/"+interactions_filename

### Set the S3 bucket policy

In [24]:
policy = {
    "Version": "2012-10-17",
    "Id": "PersonalizeS3BucketAccessPolicy",
    "Statement": [
        {
            "Sid": "PersonalizeS3BucketAccessPolicy",
            "Effect": "Allow",
            "Principal": {
                "Service": "personalize.amazonaws.com"
            },
            "Action": [
                "s3:*Object",
                "s3:ListBucket"
            ],
            "Resource": [
                "arn:aws:s3:::{}".format(bucket_name),
                "arn:aws:s3:::{}/*".format(bucket_name)
            ]
        }
    ]
}

s3.put_bucket_policy(Bucket=bucket_name, Policy=json.dumps(policy))

{'ResponseMetadata': {'RequestId': '02EEJ69J5BYD8P5Q',
  'HostId': '8c3KT1vvd3KKxk/li61Yk3Sz7yhBwUQ/BN5S6gQBNZ1DQf24LdMy/S9RCSQqghQLRU5Fjh0ANK8=',
  'HTTPStatusCode': 204,
  'HTTPHeaders': {'x-amz-id-2': '8c3KT1vvd3KKxk/li61Yk3Sz7yhBwUQ/BN5S6gQBNZ1DQf24LdMy/S9RCSQqghQLRU5Fjh0ANK8=',
   'x-amz-request-id': '02EEJ69J5BYD8P5Q',
   'date': 'Mon, 06 Feb 2023 19:49:31 GMT',
   'server': 'AmazonS3'},
  'RetryAttempts': 0}}

### Create an IAM role

In [26]:
iam = boto3.client("iam",aws_access_key_id= 'AKIAUXQBYROTDST4SLVI',aws_secret_access_key="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")

role_name = "PersonalizeRoleRanking2"
assume_role_policy_document = {
    "Version": "2012-10-17",
    "Statement": [
        {
          "Effect": "Allow",
          "Principal": {
            "Service": "personalize.amazonaws.com"
          },
          "Action": "sts:AssumeRole"
        }
    ]
}

create_role_response = iam.create_role(
    RoleName = role_name,
    AssumeRolePolicyDocument = json.dumps(assume_role_policy_document)
)

# AmazonPersonalizeFullAccess provides access to any S3 bucket with a name that includes "personalize" or "Personalize" 
# if you would like to use a bucket with a different name, please consider creating and attaching a new policy
# that provides read access to your bucket or attaching the AmazonS3ReadOnlyAccess policy to the role
policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonPersonalizeFullAccess"
iam.attach_role_policy(
    RoleName = role_name,
    PolicyArn = policy_arn
)

# Now add S3 support
iam.attach_role_policy(
    PolicyArn='arn:aws:iam::aws:policy/AmazonS3FullAccess',
    RoleName=role_name
)
time.sleep(60) # wait for a minute to allow IAM role policy attachment to propagate

role_arn = create_role_response["Role"]["Arn"]
print(role_arn)

arn:aws:iam::325347478438:role/PersonalizeRoleRanking2


### Import the interactions data

In [27]:
create_dataset_import_job_response = personalize.create_dataset_import_job(
    jobName = "personalize-ranking-example",
    datasetArn = interactions_dataset_arn,
    dataSource = {
        "dataLocation": "s3://{}/{}".format(bucket_name, interactions_filename)
    },
    roleArn = role_arn
)

dataset_import_job_arn = create_dataset_import_job_response['datasetImportJobArn']
print(json.dumps(create_dataset_import_job_response, indent=2))

{
  "datasetImportJobArn": "arn:aws:personalize:us-east-1:325347478438:dataset-import-job/personalize-ranking-example",
  "ResponseMetadata": {
    "RequestId": "3a7c5840-e268-4979-9a05-0a0dce1de92f",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "date": "Mon, 06 Feb 2023 19:51:14 GMT",
      "content-type": "application/x-amz-json-1.1",
      "content-length": "115",
      "connection": "keep-alive",
      "x-amzn-requestid": "3a7c5840-e268-4979-9a05-0a0dce1de92f"
    },
    "RetryAttempts": 0
  }
}


In [28]:
%%time

max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_dataset_import_job_response = personalize.describe_dataset_import_job(
        datasetImportJobArn = dataset_import_job_arn
    )
    status = describe_dataset_import_job_response["datasetImportJob"]['status']
    print("DatasetImportJob: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: ACTIVE
CPU times: user 48.4 ms, sys: 577 µs, total: 49 ms
Wall time: 3min


## Create solutions

In [29]:
personalize.list_recipes()

{'recipes': [{'name': 'aws-ecomm-customers-who-viewed-x-also-viewed',
   'recipeArn': 'arn:aws:personalize:::recipe/aws-ecomm-customers-who-viewed-x-also-viewed',
   'status': 'ACTIVE',
   'creationDateTime': datetime.datetime(2019, 6, 10, 0, 0, tzinfo=tzlocal()),
   'lastUpdatedDateTime': datetime.datetime(2022, 12, 14, 15, 7, 41, 686000, tzinfo=tzlocal()),
   'domain': 'ECOMMERCE'},
  {'name': 'aws-ecomm-frequently-bought-together',
   'recipeArn': 'arn:aws:personalize:::recipe/aws-ecomm-frequently-bought-together',
   'status': 'ACTIVE',
   'creationDateTime': datetime.datetime(2019, 6, 10, 0, 0, tzinfo=tzlocal()),
   'lastUpdatedDateTime': datetime.datetime(2022, 12, 14, 15, 7, 41, 686000, tzinfo=tzlocal()),
   'domain': 'ECOMMERCE'},
  {'name': 'aws-ecomm-popular-items-by-purchases',
   'recipeArn': 'arn:aws:personalize:::recipe/aws-ecomm-popular-items-by-purchases',
   'status': 'ACTIVE',
   'creationDateTime': datetime.datetime(2019, 6, 10, 0, 0, tzinfo=tzlocal()),
   'lastUpdat

### Personalized Ranking

In [30]:
# selecting the recipe
rerank_recipe_arn = "arn:aws:personalize:::recipe/aws-personalized-ranking"

### Create the solution

In [31]:
rerank_create_solution_response = personalize.create_solution(
    name = "personalize-ranking",
    datasetGroupArn = dataset_group_arn,
    recipeArn = rerank_recipe_arn
)

rerank_solution_arn = rerank_create_solution_response['solutionArn']
print(json.dumps(rerank_create_solution_response, indent=2))

{
  "solutionArn": "arn:aws:personalize:us-east-1:325347478438:solution/personalize-ranking",
  "ResponseMetadata": {
    "RequestId": "0ac6d828-c5c0-4222-821f-17ef9a548737",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "date": "Mon, 06 Feb 2023 19:55:06 GMT",
      "content-type": "application/x-amz-json-1.1",
      "content-length": "89",
      "connection": "keep-alive",
      "x-amzn-requestid": "0ac6d828-c5c0-4222-821f-17ef9a548737"
    },
    "RetryAttempts": 0
  }
}


### Create the solution version

In [32]:
rerank_create_solution_version_response = personalize.create_solution_version(
    solutionArn = rerank_solution_arn
)

In [33]:
rerank_solution_version_arn = rerank_create_solution_version_response['solutionVersionArn']
print(json.dumps(rerank_create_solution_version_response, indent=2))

{
  "solutionVersionArn": "arn:aws:personalize:us-east-1:325347478438:solution/personalize-ranking/ee39952d",
  "ResponseMetadata": {
    "RequestId": "8977c38e-f766-4802-9bbc-0cff0dc346f1",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "date": "Mon, 06 Feb 2023 19:55:13 GMT",
      "content-type": "application/x-amz-json-1.1",
      "content-length": "105",
      "connection": "keep-alive",
      "x-amzn-requestid": "8977c38e-f766-4802-9bbc-0cff0dc346f1"
    },
    "RetryAttempts": 0
  }
}


In [34]:
# keep track of the solution version creation status
in_progress_solution_versions = [
    rerank_solution_version_arn
]

max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    for solution_version_arn in in_progress_solution_versions:
        version_response = personalize.describe_solution_version(
            solutionVersionArn = solution_version_arn
        )
        status = version_response["solutionVersion"]["status"]
        
        if status == "ACTIVE":
            print("Build succeeded for {}".format(solution_version_arn))
            in_progress_solution_versions.remove(solution_version_arn)
        elif status == "CREATE FAILED":
            print("Build failed for {}".format(solution_version_arn))
            in_progress_solution_versions.remove(solution_version_arn)
    
    if len(in_progress_solution_versions) <= 0:
        break
    else:
        print("At least one solution build is still in progress")
        
    time.sleep(60)

At least one solution build is still in progress
At least one solution build is still in progress
At least one solution build is still in progress
At least one solution build is still in progress
At least one solution build is still in progress
At least one solution build is still in progress
At least one solution build is still in progress
At least one solution build is still in progress
At least one solution build is still in progress
At least one solution build is still in progress
At least one solution build is still in progress
At least one solution build is still in progress
At least one solution build is still in progress
At least one solution build is still in progress
At least one solution build is still in progress
At least one solution build is still in progress
At least one solution build is still in progress
At least one solution build is still in progress
Build succeeded for arn:aws:personalize:us-east-1:325347478438:solution/personalize-ranking/ee39952d


## Create campaigns

In [35]:
# Personalized Ranking
rerank_create_campaign_response = personalize.create_campaign(
    name = "personalize-poc-rerank",
    solutionVersionArn = rerank_solution_version_arn,
    minProvisionedTPS = 1
)

rerank_campaign_arn = rerank_create_campaign_response['campaignArn']
print(json.dumps(rerank_create_campaign_response, indent=2))

{
  "campaignArn": "arn:aws:personalize:us-east-1:325347478438:campaign/personalize-poc-rerank",
  "ResponseMetadata": {
    "RequestId": "37eb4089-c75f-4fb6-8656-84b52d9ed850",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "date": "Mon, 06 Feb 2023 20:13:34 GMT",
      "content-type": "application/x-amz-json-1.1",
      "content-length": "92",
      "connection": "keep-alive",
      "x-amzn-requestid": "37eb4089-c75f-4fb6-8656-84b52d9ed850"
    },
    "RetryAttempts": 0
  }
}


In [36]:
rerank_create_solution_version_response = personalize.create_solution_version(
    solutionArn = rerank_solution_arn
)

In [37]:
rerank_solution_version_arn = rerank_create_solution_version_response['solutionVersionArn']
print(json.dumps(rerank_create_solution_version_response, indent=2))

{
  "solutionVersionArn": "arn:aws:personalize:us-east-1:325347478438:solution/personalize-ranking/9bcb47e9",
  "ResponseMetadata": {
    "RequestId": "6e5855aa-3437-4800-ac55-bd6f3e86fafe",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "date": "Mon, 06 Feb 2023 20:13:35 GMT",
      "content-type": "application/x-amz-json-1.1",
      "content-length": "105",
      "connection": "keep-alive",
      "x-amzn-requestid": "6e5855aa-3437-4800-ac55-bd6f3e86fafe"
    },
    "RetryAttempts": 0
  }
}


In [38]:
# keep track of the campaign creation status
in_progress_campaigns = [
    rerank_campaign_arn
]

max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    for campaign_arn in in_progress_campaigns:
        version_response = personalize.describe_campaign(
            campaignArn = campaign_arn
        )
        status = version_response["campaign"]["status"]
        
        if status == "ACTIVE":
            print("Build succeeded for {}".format(campaign_arn))
            in_progress_campaigns.remove(campaign_arn)
        elif status == "CREATE FAILED":
            print("Build failed for {}".format(campaign_arn))
            in_progress_campaigns.remove(campaign_arn)
    
    if len(in_progress_campaigns) <= 0:
        break
    else:
        print("At least one campaign build is still in progress")
        
    time.sleep(60)

At least one campaign build is still in progress
At least one campaign build is still in progress
At least one campaign build is still in progress
At least one campaign build is still in progress
At least one campaign build is still in progress
At least one campaign build is still in progress
At least one campaign build is still in progress
At least one campaign build is still in progress
At least one campaign build is still in progress
At least one campaign build is still in progress
At least one campaign build is still in progress
Build succeeded for arn:aws:personalize:us-east-1:325347478438:campaign/personalize-poc-rerank


## Interact with campaigns

In [39]:
# loading in the dataset which we can use for our lookup table

# Create a dataframe for the items by reading in the correct source CSV
items_df = pd.read_csv(data_dir + '/artists.dat', delimiter='\t', index_col=0)

# Render some sample data
items_df.head(5)

Unnamed: 0_level_0,name,url,pictureURL
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,MALICE MIZER,http://www.last.fm/music/MALICE+MIZER,http://userserve-ak.last.fm/serve/252/10808.jpg
2,Diary of Dreams,http://www.last.fm/music/Diary+of+Dreams,http://userserve-ak.last.fm/serve/252/3052066.jpg
3,Carpathian Forest,http://www.last.fm/music/Carpathian+Forest,http://userserve-ak.last.fm/serve/252/40222717...
4,Moi dix Mois,http://www.last.fm/music/Moi+dix+Mois,http://userserve-ak.last.fm/serve/252/54697835...
5,Bella Morte,http://www.last.fm/music/Bella+Morte,http://userserve-ak.last.fm/serve/252/14789013...


In [40]:
def get_artist_by_id(artist_id, artist_df=items_df):
    """
    This takes in an artist_id from Personalize so it will be a string,
    converts it to an int, and then does a lookup in a default or specified
    dataframe.
    
    A really broad try/except clause was added in case anything goes wrong.
    
    Feel free to add more debugging or filtering here to improve results if
    you hit an error.
    """
    try:
        return artist_df.loc[int(artist_id)]['name']
    except:
        return "Error obtaining artist"

In [41]:
# A known good id
print(get_artist_by_id(artist_id="987"))
# A bad type of value
print(get_artist_by_id(artist_id="987.9393939"))
# Really bad values
print(get_artist_by_id(artist_id="Steve"))

Earth, Wind & Fire
Error obtaining artist
Error obtaining artist


In [42]:
users_df = pd.read_csv(data_dir + '/user_artists.dat', delimiter='\t', index_col=0)
# Render some sample data
users_df.head(5)

Unnamed: 0_level_0,artistID,weight
userID,Unnamed: 1_level_1,Unnamed: 2_level_1
2,51,13883
2,52,11690
2,53,11351
2,54,10300
2,55,8983


### Personalized Ranking

In [43]:
# we will need a random user and a random collection of 25 items
rerank_user = users_df.sample(1).index.tolist()[0]
rerank_items = items_df.sample(25).index.tolist()

In [44]:
rerank_list = []
for item in rerank_items:
    artist = get_artist_by_id(item)
    rerank_list.append(artist)
rerank_df = pd.DataFrame(rerank_list, columns = [rerank_user])
rerank_df

Unnamed: 0,1060
0,Talib al-Habib
1,Alliance Ethnik
2,L'ame Immotale
3,ELECTROANOVA
4,Gavin DeGraw
5,Xanopticon
6,Audio
7,$lick
8,Xeg
9,colligere


#### make the personalized ranking API call

In [45]:
# Convert user to string:
user_id = str(rerank_user)
rerank_item_list = []
for item in rerank_items:
    rerank_item_list.append(str(item))
    
# Get recommended reranking
get_recommendations_response_rerank = personalize_runtime.get_personalized_ranking(
        campaignArn = rerank_campaign_arn,
        userId = user_id,
        inputList = rerank_item_list
)

get_recommendations_response_rerank

{'ResponseMetadata': {'RequestId': '05824c4d-0875-4646-b894-f8c88c6ea149',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'date': 'Mon, 06 Feb 2023 20:25:07 GMT',
   'content-type': 'application/json',
   'content-length': '1932',
   'connection': 'keep-alive',
   'x-amzn-requestid': '05824c4d-0875-4646-b894-f8c88c6ea149'},
  'RetryAttempts': 0},
 'personalizedRanking': [{'itemId': '602', 'score': 0.3141241},
  {'itemId': '3749', 'score': 0.1048485},
  {'itemId': '3466', 'score': 0.102602},
  {'itemId': '1356', 'score': 0.0784145},
  {'itemId': '16624', 'score': 0.0657954},
  {'itemId': '9861', 'score': 0.0636229},
  {'itemId': '2692', 'score': 0.0608698},
  {'itemId': '7883', 'score': 0.0549864},
  {'itemId': '9455', 'score': 0.047699},
  {'itemId': '11797', 'score': 0.0308084},
  {'itemId': '16111', 'score': 0.0263885},
  {'itemId': '10465', 'score': 0.0251501},
  {'itemId': '1680', 'score': 0.0246903},
  {'itemId': '17101'},
  {'itemId': '15530'},
  {'itemId': '7954'},
  {'itemId': '458

In [46]:
ranked_list = []
item_list = get_recommendations_response_rerank['personalizedRanking']
for item in item_list:
    artist = get_artist_by_id(item['itemId'])
    ranked_list.append(artist)
ranked_df = pd.DataFrame(ranked_list, columns = ['Re-Ranked'])
rerank_df = pd.concat([rerank_df, ranked_df], axis=1)
rerank_df

Unnamed: 0,1060,Re-Ranked
0,Talib al-Habib,Iron & Wine
1,Alliance Ethnik,Gavin DeGraw
2,L'ame Immotale,Sarah McLachlan
3,ELECTROANOVA,Heathen
4,Gavin DeGraw,ELECTROANOVA
5,Xanopticon,The Shamen
6,Audio,Xanopticon
7,$lick,Ananda Shake
8,Xeg,Sun Ra
9,colligere,The Idan Raichel Project


## Clean up campaigns

In [47]:
#Clean up campaigns
paginator = personalize.get_paginator('list_campaigns')
for paginate_result in paginator.paginate():
    for campaign in paginate_result["campaigns"]:
        print(campaign["campaignArn"])

arn:aws:personalize:us-east-1:325347478438:campaign/personalize-poc-rerank


In [48]:
personalize.delete_campaign(
    campaignArn = "arn:aws:personalize:us-east-1:325347478438:campaign/personalize-poc-rerank"
)

{'ResponseMetadata': {'RequestId': 'b02dd27b-8106-4302-a9c8-a76fe5c726c5',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'date': 'Mon, 06 Feb 2023 20:25:13 GMT',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'connection': 'keep-alive',
   'x-amzn-requestid': 'b02dd27b-8106-4302-a9c8-a76fe5c726c5'},
  'RetryAttempts': 0}}

## Clean up solutions

In [56]:
paginator = personalize.get_paginator('list_solutions')
for paginate_result in paginator.paginate():
    for solution in paginate_result["solutions"]:
        print(solution["solutionArn"])

arn:aws:personalize:us-east-1:325347478438:solution/personalize-ranking


In [57]:
personalize.delete_solution(
    solutionArn = "arn:aws:personalize:us-east-1:325347478438:solution/personalize-ranking"
)

{'ResponseMetadata': {'RequestId': '024b0cf4-9318-43c8-847a-083aa9e10094',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'date': 'Mon, 06 Feb 2023 20:37:34 GMT',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'connection': 'keep-alive',
   'x-amzn-requestid': '024b0cf4-9318-43c8-847a-083aa9e10094'},
  'RetryAttempts': 0}}

## Clean up datasets

In [60]:
# Clean up datasets
paginator = personalize.get_paginator('list_datasets')
for paginate_result in paginator.paginate():
    for datasets in paginate_result["datasets"]:
        print(datasets["datasetArn"])

arn:aws:personalize:us-east-1:325347478438:dataset/personalize-ranking-dsg-3/INTERACTIONS


In [62]:
personalize.delete_dataset(
    datasetArn = "arn:aws:personalize:us-east-1:325347478438:dataset/personalize-ranking-dsg-3/INTERACTIONS"
)

{'ResponseMetadata': {'RequestId': 'c96ac6ed-5b35-4a4b-9ba0-dea973aab077',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'date': 'Mon, 06 Feb 2023 20:39:34 GMT',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'connection': 'keep-alive',
   'x-amzn-requestid': 'c96ac6ed-5b35-4a4b-9ba0-dea973aab077'},
  'RetryAttempts': 0}}

## Clean up the schemas

In [63]:
# Clean up the schemas
paginator = personalize.get_paginator('list_schemas')
for paginate_result in paginator.paginate():
    for schema in paginate_result["schemas"]:
        print(schema["schemaArn"])

arn:aws:personalize:us-east-1:325347478438:schema/personalize-ranking-interactions-3


In [64]:
personalize.delete_schema(
    schemaArn = "arn:aws:personalize:us-east-1:325347478438:schema/personalize-ranking-interactions-3"
)


{'ResponseMetadata': {'RequestId': '1c2f3630-d023-41e6-990d-e6423ef293a7',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'date': 'Mon, 06 Feb 2023 20:40:03 GMT',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'connection': 'keep-alive',
   'x-amzn-requestid': '1c2f3630-d023-41e6-990d-e6423ef293a7'},
  'RetryAttempts': 0}}

## Clean up the dataset groups

In [65]:
# Clean up the dataset groups
paginator = personalize.get_paginator('list_dataset_groups')
for paginate_result in paginator.paginate():
    for dataset_group in paginate_result["datasetGroups"]:
        print(dataset_group["datasetGroupArn"])

arn:aws:personalize:us-east-1:325347478438:dataset-group/personalize-ranking-dsg-3


In [66]:
personalize.delete_dataset_group(
    datasetGroupArn = "arn:aws:personalize:us-east-1:325347478438:dataset-group/personalize-ranking-dsg-3"
)

{'ResponseMetadata': {'RequestId': '08c95705-00ee-4f24-a87a-673fa618cf8c',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'date': 'Mon, 06 Feb 2023 20:40:27 GMT',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'connection': 'keep-alive',
   'x-amzn-requestid': '08c95705-00ee-4f24-a87a-673fa618cf8c'},
  'RetryAttempts': 0}}