In [1]:
import tensorflow as tf
import boto3
import pandas as pd
from sagemaker import get_execution_role
import numpy as np
import json

In [2]:
import sagemaker
from sagemaker.tensorflow.model import TensorFlowModel

# Initial Prep

In [3]:
sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()

Couldn't call 'get_role' to get Role ARN from role name AmazonSageMaker-ExecutionRole-20210908T201427 to get Role path.
Assuming role was created in SageMaker AWS console, as the name contains `AmazonSageMaker-ExecutionRole`. Defaulting to Role ARN with service-role in path. If this Role ARN is incorrect, please add IAM read permissions to your role or supply the Role Arn directly.


In [4]:
bucket_name = 'beer-reviews-dataset-pb'
ret_model_path = 'ret_model/model.tar.gz'
rank_model_path = 'rank_model/model.tar.gz'

db_path = 'final_reviews.csv'

In [5]:
# import data needed for inference
s3client = boto3.client('s3', region_name='us-east-2')
s3_data = s3client.get_object(Bucket=bucket_name, Key=db_path)
df = pd.read_csv(s3_data['Body'])
df = df.drop(columns = ['Unnamed: 0'])

In [6]:
df.head()

Unnamed: 0,score,username,text,style,abv,brewery_name,beer_name
0,3.68,Kemmerlin,A: Amber red color with a thin quickly dimi...,American Amber / Red Ale,5.3,Twisted Pine Brewing Company,Twisted Pine Amber Ale
1,3.35,dkoehler42,This beer is a good alternative to your sta...,American Amber / Red Ale,5.3,Twisted Pine Brewing Company,Twisted Pine Amber Ale
2,4.05,Rodekill,Easily one of the best American Amber ales ...,American Amber / Red Ale,5.3,Twisted Pine Brewing Company,Twisted Pine Amber Ale
3,2.8,Lukebell15,,American Amber / Red Ale,5.3,Twisted Pine Brewing Company,Twisted Pine Amber Ale
4,2.56,jzlyo,Aroma is caramel malts and mild earthiness....,American Amber / Red Ale,5.3,Twisted Pine Brewing Company,Twisted Pine Amber Ale


In [7]:
# Load TensorFlow Model For Retrieval

# using tf version 2.3
tf_framework_version = tf.__version__
ret_model = TensorFlowModel(model_data=f's3://{bucket_name}/{ret_model_path}', role=role, framework_version=tf_framework_version )

In [8]:
# Create Endpoint For Retrieval
ret_predictor = ret_model.deploy(initial_instance_count=1, instance_type='ml.c5.xlarge')

update_endpoint is a no-op in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


---!

In [15]:
user = 'pblackburn'
user_input = {
  'instances': [user]
}

#returns dictionary of beer names + scores (we only need beer names)
result = ret_predictor.predict(user_input)

# Get list of beers
# See TF graph for why we need to select output_2
beer_list = result['predictions'][0]['output_2']

#filter dataframe by retrieved beers
beer_info_df = df[['style', 'abv', 'brewery_name', 'beer_name']]
beer_info_df = beer_info_df.groupby(by=['beer_name']).first().reset_index()
retrieved_df = beer_info_df[beer_info_df['beer_name'].isin(beer_list)]

In [16]:
retrieved_df.head()

Unnamed: 0,beer_name,style,abv,brewery_name
3,#42 Poblano Stout,English Oatmeal Stout,5.7,"Big Choice Brewing, LLC"
8,'Bout Damn Time,American IPA,7.3,4 Noses Brewing Company
15,(S)Pinner,American IPA,4.9,Oskar Blues Grill & Brew
36,12 Apostles,German Kölsch,6.1,Westminster Brewing Company
39,13,American IPA,7.1,Crystal Springs Brewing Company


In [17]:
# Load TensorFlow Model For Ranking

# using tf version 2.3
tf_framework_version = tf.__version__
rank_model = TensorFlowModel(model_data=f's3://{bucket_name}/{rank_model_path}', role=role, framework_version=tf_framework_version )

In [18]:
# Create Endpoint For Ranking
rank_predictor = rank_model.deploy(initial_instance_count=1, instance_type='ml.c5.xlarge')

update_endpoint is a no-op in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


---!

In [19]:
# example mulit input:
# can't use numpy arrays like when we could when using the TF .pb model
# Instead we need a list of JSON inputs
rank_input_example = {
  'instances': [
      {'abv': 7,
     'beer_name': "Tree Shaker Imperial Peach IPA", 
     'brewery_name': "Odell Brewing Company",
     'review_text': "",
     'style': "American Imperial IPA",
     'username': "pblackburn"}, 
    {'abv': 7,
     'beer_name': "Tree Shaker Imperial Peach IPA", 
     'brewery_name': "Odell Brewing Company",
     'review_text': "",
     'style': "American Imperial IPA",
     'username': "pblackburn"}
  ]
}


In [20]:
# add colums for user and review text
retrieved_df['username'] = [user] * 500
retrieved_df['review_text'] = [""] * 500
retrieved_df['abv'] = retrieved_df['abv'].astype('int32') #convert to int32 data type because that's what I use in the model

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


In [21]:
retrieved_df.head()

Unnamed: 0,beer_name,style,abv,brewery_name,username,review_text
3,#42 Poblano Stout,English Oatmeal Stout,5,"Big Choice Brewing, LLC",pblackburn,
8,'Bout Damn Time,American IPA,7,4 Noses Brewing Company,pblackburn,
15,(S)Pinner,American IPA,4,Oskar Blues Grill & Brew,pblackburn,
36,12 Apostles,German Kölsch,6,Westminster Brewing Company,pblackburn,
39,13,American IPA,7,Crystal Springs Brewing Company,pblackburn,


In [22]:
json_records = retrieved_df.to_dict("records")
rank_input = {
    'instances': json_records
}

In [23]:
#returns dictionary of beer names + scores (we only need beer names)
rank_result = rank_predictor.predict(rank_input)


In [24]:
retrieved_df['rankings'] = np.array(rank_result['predictions']).reshape(-1,)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [25]:
ranked_beers_df = retrieved_df.sort_values(by=['rankings'], ascending=False)

In [26]:
#filter out any beers that I have already tried-
ranked_beers_df = ranked_beers_df[~ranked_beers_df['beer_name'].isin(df[df['username']=='pblackburn']['beer_name'])]

In [27]:
ranked_beers_df.head()

Unnamed: 0,beer_name,style,abv,brewery_name,username,review_text,rankings
1709,Casey Family Preserves - Independence Nectarine,American Wild Ale,6,Casey Brewing & Blending,pblackburn,,4.463147
2067,Coconut Medianoche,American Imperial Stout,14,WeldWerks Brewing Co.,pblackburn,,4.450992
8831,Trophy Hunt,American Imperial Stout,14,Cerebral Brewing,pblackburn,,4.447558
4492,Intergalactic Juice Hunter 4.0,American Imperial IPA,8,Odd13 Brewing,pblackburn,,4.430002
4632,Jumpseat,American Wild Ale,6,Black Project Spontaneous & Wild Ales,pblackburn,,4.409402


In [28]:
# Save my list to s3-
my_recommendations_path = 'my_beer_recs.csv'
ranked_beers_df.to_csv(f's3://{bucket_name}/{my_recommendations_path}', index=False)

# Delete Endpoints

In [29]:
ret_predictor.delete_endpoint()
rank_predictor.delete_endpoint()