# Text Embedding for Sentences: How to run inference on the endpoint you have created?

In [None]:
import re
import json
import boto3
import numpy as np
import seaborn as sns
from sklearn.preprocessing import normalize
import pandas as pd
from sklearn.metrics.pairwise import linear_kernel
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
df = pd.read_csv('./cocktails.csv')
df['content'] = df['ingredients'] + df['instructions'] + df['description']
df['embeddings'] = None
#df = df.head(4)

In [None]:
#newline, bold, unbold = '\n', '\033[1m', '\033[0m'
embedding_list = []

def query_endpoint(encoded_text):
    
    endpoint_name = 'jumpstart-dft-tf-tcembedding-universal-sentence-encode'
    #endpoint_name = 'jumpstart-dft-tf-tcembedding-bert-en-uncased-l-2-h-256'
    #endpoint_name = 'jumpstart-dft-tf-tcembedding-bert-en-uncased-l-12-h-76'
    #endpoint_name = 'jumpstart-dft-tf-tcembedding-bert-en-uncased-l-2-h-128'
    client = boto3.client('runtime.sagemaker')
    response = client.invoke_endpoint(EndpointName=endpoint_name, ContentType='application/x-text', Body=encoded_text, Accept='application/json;verbose')
    return response

def parse_response(query_response):
    model_predictions = json.loads(query_response['Body'].read())
    embedding, model_output = model_predictions['embedding'], model_predictions['model_output']
    return embedding, model_output


#for sentence in sentences:
for index, row in df.iterrows():
    
    sentence = row['content']
    
    
    try:
        query_response = query_endpoint(sentence.encode('utf-8'))
    except Exception as e:
        if e.response['Error']['Code'] == 'ModelError':
            raise Exception(
                 "Backend scripts have been updated in Feb '22 to standardize response "
                 "format of endpoint response."
                 "Previous endpoints may not support verbose response type used in this notebook."
                 f"To use this notebook, please launch the endpoint again. Error: {e}."
            )
        else:
            raise
    try:
        embedding, _ = parse_response(query_response)
        #df.iloc[index]['embeddings'] = embedding
        embedding_list.append(embedding)
        print('Index is {} / 1296'.format(index))
        
    except KeyError as e:
        raise Exception(
              "Backend scripts have been updated in Feb '22 to standardize response "
              "format of endpoint response."
               "Response from previous endpoints not consistent with this notebook."
               f"To use this notebook, please launch the endpoint again. Error: {e}."
       )
        
df['embeddings'] = embedding_list
    

Index is 0 / 1296
Index is 1 / 1296
Index is 2 / 1296
Index is 3 / 1296
Index is 4 / 1296
Index is 5 / 1296
Index is 6 / 1296
Index is 7 / 1296
Index is 8 / 1296
Index is 9 / 1296
Index is 10 / 1296
Index is 11 / 1296
Index is 12 / 1296
Index is 13 / 1296
Index is 14 / 1296
Index is 15 / 1296
Index is 16 / 1296
Index is 17 / 1296
Index is 18 / 1296
Index is 19 / 1296
Index is 20 / 1296
Index is 21 / 1296
Index is 22 / 1296
Index is 23 / 1296
Index is 24 / 1296
Index is 25 / 1296
Index is 26 / 1296
Index is 27 / 1296
Index is 28 / 1296
Index is 29 / 1296
Index is 30 / 1296
Index is 31 / 1296
Index is 32 / 1296
Index is 33 / 1296
Index is 34 / 1296
Index is 35 / 1296
Index is 36 / 1296
Index is 37 / 1296
Index is 38 / 1296
Index is 39 / 1296
Index is 40 / 1296
Index is 41 / 1296
Index is 42 / 1296
Index is 43 / 1296
Index is 44 / 1296
Index is 45 / 1296
Index is 46 / 1296
Index is 47 / 1296
Index is 48 / 1296
Index is 49 / 1296
Index is 50 / 1296
Index is 51 / 1296
Index is 52 / 1296
Ind

In [None]:
df.head()

Unnamed: 0.1,Unnamed: 0,name,ingredients,instructions,description,link,content,embeddings
0,0,Alexander,"Sweet cream, Crème de cacao, Gin,",Shake with ice and strain into a chilled cock...,"The granddaddy of chocolate cream drinks, inc...",https://cocktailpartyapp.com/drinks/alexander/,"Sweet cream, Crème de cacao, Gin, Shake with ...","[0.56270808, 2.09430766, -0.0106404098, -0.438..."
1,1,Alexander the Great,"Sweet cream, Coffee liqueur, Crème de cacao, B...",Shake all ingredients with ice and strain int...,Alexander the Great is yet another riff on th...,https://cocktailpartyapp.com/drinks/alexander-...,"Sweet cream, Coffee liqueur, Crème de cacao, B...","[0.016935885, 1.02173078, 0.512784779, -0.3564..."
2,2,Apricot Flip,"Egg, Simple syrup, Apricot liqueur, Cognac,","Dry-shake all ingredients first without ice, ...",The Apricot Flip from The PDT Cocktail Book i...,https://cocktailpartyapp.com/drinks/apricot-flip/,"Egg, Simple syrup, Apricot liqueur, Cognac, D...","[-0.078418225, 1.29880476, 0.550741494, 0.4085..."
3,3,Apricot Lady,"Orange curaçao, Limejuice, Eggwhite, Apricot l...","Dry-shake all ingredients, then add ice and s...","The Apricot Lady is a tiny, lovely cocktail t...",https://cocktailpartyapp.com/drinks/apricot-lady/,"Orange curaçao, Limejuice, Eggwhite, Apricot l...","[0.268677205, 2.11677742, 0.00548471324, 0.082..."
4,4,Arancia Julius,"Sparkling water, Sweet cream, Pineapplejuice, ...",Shake everything (except the sparkling water)...,Galliano (an vanilla-anise-orange liqueur) ha...,https://cocktailpartyapp.com/drinks/arancia-ju...,"Sparkling water, Sweet cream, Pineapplejuice, ...","[0.369119674, 0.574867129, 1.15106976, 0.02284..."


In [None]:
#df = df.drop(columns=['Unnamed: 0'])
df['name'] = df['name'].str.upper()
df.set_index('name')

Unnamed: 0_level_0,ingredients,instructions,description,link,content,embeddings
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ALEXANDER,"Sweet cream, Crème de cacao, Gin,",Shake with ice and strain into a chilled cock...,"The granddaddy of chocolate cream drinks, inc...",https://cocktailpartyapp.com/drinks/alexander/,"Sweet cream, Crème de cacao, Gin, Shake with ...","[0.56270808, 2.09430766, -0.0106404098, -0.438..."
ALEXANDER THE GREAT,"Sweet cream, Coffee liqueur, Crème de cacao, B...",Shake all ingredients with ice and strain int...,Alexander the Great is yet another riff on th...,https://cocktailpartyapp.com/drinks/alexander-...,"Sweet cream, Coffee liqueur, Crème de cacao, B...","[0.016935885, 1.02173078, 0.512784779, -0.3564..."
APRICOT FLIP,"Egg, Simple syrup, Apricot liqueur, Cognac,","Dry-shake all ingredients first without ice, ...",The Apricot Flip from The PDT Cocktail Book i...,https://cocktailpartyapp.com/drinks/apricot-flip/,"Egg, Simple syrup, Apricot liqueur, Cognac, D...","[-0.078418225, 1.29880476, 0.550741494, 0.4085..."
APRICOT LADY,"Orange curaçao, Limejuice, Eggwhite, Apricot l...","Dry-shake all ingredients, then add ice and s...","The Apricot Lady is a tiny, lovely cocktail t...",https://cocktailpartyapp.com/drinks/apricot-lady/,"Orange curaçao, Limejuice, Eggwhite, Apricot l...","[0.268677205, 2.11677742, 0.00548471324, 0.082..."
ARANCIA JULIUS,"Sparkling water, Sweet cream, Pineapplejuice, ...",Shake everything (except the sparkling water)...,Galliano (an vanilla-anise-orange liqueur) ha...,https://cocktailpartyapp.com/drinks/arancia-ju...,"Sparkling water, Sweet cream, Pineapplejuice, ...","[0.369119674, 0.574867129, 1.15106976, 0.02284..."
...,...,...,...,...,...,...
WHITECAP,"Cinnamon, Clove, Cream of coconut, Milk, Light...",Begin by bringing the milk just to boil. Plac...,The Whitecap is tiki historian Jeff “Beachbum...,https://cocktailpartyapp.com/drinks/whitecap/,"Cinnamon, Clove, Cream of coconut, Milk, Light...","[-0.196124062, 0.789519489, 0.151253715, -0.07..."
WINTER TRAVELER’S PUNCH,"Pineapplejuice, Orgeat, Spiced rum, Chile liqu...",Shake all ingredients with ice and strain int...,If you’re looking for a respite from all thos...,https://cocktailpartyapp.com/drinks/winter-tra...,"Pineapplejuice, Orgeat, Spiced rum, Chile liqu...","[-0.198356926, 1.67377615, -0.0479293391, 0.48..."
WITHOUT FACULTY,"Tiki bitters, Ginger liqueur, Lemonjuice, Bana...",Shake everything (except the bitters) with ic...,"Tony Burke, an ambassador for Angel’s Envy wh...",https://cocktailpartyapp.com/drinks/without-fa...,"Tiki bitters, Ginger liqueur, Lemonjuice, Bana...","[0.455201268, 1.01366079, -0.31250006, 0.10392..."
YELLOW BIRD,"Limejuice, Orangejuice, Vanilla anise liqueur,...",Shake with ice and strain into an ice-filled ...,The Yellow Bird is a nice beach cocktail that...,https://cocktailpartyapp.com/drinks/yellow-bird/,"Limejuice, Orangejuice, Vanilla anise liqueur,...","[-0.23458524, 0.729493678, -0.226028189, 0.334..."


In [None]:
df.drop_duplicates('ingredients', inplace=True)

In [None]:
df.shape

(1019, 7)

In [None]:
df.set_index('name',inplace=True)

In [None]:
df.reset_index(inplace=True)

In [None]:
df

Unnamed: 0,name,ingredients,instructions,description,link,content,embeddings
0,ALEXANDER,"Sweet cream, Crème de cacao, Gin,",Shake with ice and strain into a chilled cock...,"The granddaddy of chocolate cream drinks, inc...",https://cocktailpartyapp.com/drinks/alexander/,"Sweet cream, Crème de cacao, Gin, Shake with ...","[0.56270808, 2.09430766, -0.0106404098, -0.438..."
1,ALEXANDER THE GREAT,"Sweet cream, Coffee liqueur, Crème de cacao, B...",Shake all ingredients with ice and strain int...,Alexander the Great is yet another riff on th...,https://cocktailpartyapp.com/drinks/alexander-...,"Sweet cream, Coffee liqueur, Crème de cacao, B...","[0.016935885, 1.02173078, 0.512784779, -0.3564..."
2,APRICOT FLIP,"Egg, Simple syrup, Apricot liqueur, Cognac,","Dry-shake all ingredients first without ice, ...",The Apricot Flip from The PDT Cocktail Book i...,https://cocktailpartyapp.com/drinks/apricot-flip/,"Egg, Simple syrup, Apricot liqueur, Cognac, D...","[-0.078418225, 1.29880476, 0.550741494, 0.4085..."
3,APRICOT LADY,"Orange curaçao, Limejuice, Eggwhite, Apricot l...","Dry-shake all ingredients, then add ice and s...","The Apricot Lady is a tiny, lovely cocktail t...",https://cocktailpartyapp.com/drinks/apricot-lady/,"Orange curaçao, Limejuice, Eggwhite, Apricot l...","[0.268677205, 2.11677742, 0.00548471324, 0.082..."
4,ARANCIA JULIUS,"Sparkling water, Sweet cream, Pineapplejuice, ...",Shake everything (except the sparkling water)...,Galliano (an vanilla-anise-orange liqueur) ha...,https://cocktailpartyapp.com/drinks/arancia-ju...,"Sparkling water, Sweet cream, Pineapplejuice, ...","[0.369119674, 0.574867129, 1.15106976, 0.02284..."
...,...,...,...,...,...,...,...
1014,WHAT THE DICKENS?,"Aromatic bitters, Pineapplejuice, Limejuice, S...",Shake all ingredients with ice and strain int...,What The Dickens? What a delicious drink! It’...,https://cocktailpartyapp.com/drinks/what-the-d...,"Aromatic bitters, Pineapplejuice, Limejuice, S...","[-0.181431755, 0.787691653, 0.777929187, 0.071..."
1015,WINTER TRAVELER’S PUNCH,"Pineapplejuice, Orgeat, Spiced rum, Chile liqu...",Shake all ingredients with ice and strain int...,If you’re looking for a respite from all thos...,https://cocktailpartyapp.com/drinks/winter-tra...,"Pineapplejuice, Orgeat, Spiced rum, Chile liqu...","[-0.198356926, 1.67377615, -0.0479293391, 0.48..."
1016,WITHOUT FACULTY,"Tiki bitters, Ginger liqueur, Lemonjuice, Bana...",Shake everything (except the bitters) with ic...,"Tony Burke, an ambassador for Angel’s Envy wh...",https://cocktailpartyapp.com/drinks/without-fa...,"Tiki bitters, Ginger liqueur, Lemonjuice, Bana...","[0.455201268, 1.01366079, -0.31250006, 0.10392..."
1017,YELLOW BIRD,"Limejuice, Orangejuice, Vanilla anise liqueur,...",Shake with ice and strain into an ice-filled ...,The Yellow Bird is a nice beach cocktail that...,https://cocktailpartyapp.com/drinks/yellow-bird/,"Limejuice, Orangejuice, Vanilla anise liqueur,...","[-0.23458524, 0.729493678, -0.226028189, 0.334..."


In [None]:
df.to_pickle('./df_universal_embedded.pkl')

In [None]:
#embedding_matrix = np.array(embedding_list)

In [None]:
#similarity_matrix = linear_kernel(embedding_matrix, embedding_matrix)

#similarity_df = pd.DataFrame(similarity_matrix, columns=df.name, index=df.name)


In [None]:
#similarity_df["Apricot Lady"].sort_values(ascending=False)

In [None]:
#similarity_df

In [None]:
#similarity_matrix = linear_kernel(embedding_matrix, embedding_matrix)

In [None]:
#X = np.array(df.iloc[3]["embeddings"]).reshape(1, -1)
#Y = np.array(df.iloc[2]["embeddings"]).reshape(1, -1)

#cosine_similarity(X, Y)

In [None]:
similarity_df = pd.DataFrame(np.zeros((df.shape[0], df.shape[0])), columns=df.index.values, index=df.index.values)


for outer_index, outer_row in df.iterrows():
    
    for inner_index, inner_row in df.iterrows():
        
        X = np.array(outer_row['embeddings']).reshape(1, -1)
        Y = np.array(inner_row['embeddings']).reshape(1, -1)
    
        cos_sim = cosine_similarity(X, Y)
        
        similarity_df.iloc[outer_index][inner_index] = cos_sim
        
        

In [None]:
similarity_df.columns = df['name']
similarity_df.index = df['name']

In [None]:
similarity_df[]

name,ALEXANDER,ALEXANDER THE GREAT,APRICOT FLIP,APRICOT LADY,ARANCIA JULIUS,"ARIGATOU, GOMEN NASAI",AURORA BORA BOREALIS,B-52,BANANA ALEXANDER,BANOFFEE,...,TWENTY SEVENTY SWIZZLE,UNDEAD GENTLEMAN,VICIOUS VIRGIN,VIRGIN ISLAND KULA,VOYAGER,WHAT THE DICKENS?,WINTER TRAVELER’S PUNCH,WITHOUT FACULTY,YELLOW BIRD,ZOMBIE
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ALEXANDER,1.000000,0.749534,0.634627,0.639228,0.559685,0.604578,0.554668,0.613291,0.645506,0.688927,...,0.621940,0.558870,0.572787,0.601496,0.546906,0.641534,0.554237,0.573700,0.601637,0.586865
ALEXANDER THE GREAT,0.749534,1.000000,0.663748,0.590453,0.600239,0.545186,0.577018,0.597406,0.663025,0.701179,...,0.604700,0.558392,0.609828,0.628248,0.593985,0.671247,0.592353,0.619255,0.595444,0.652468
APRICOT FLIP,0.634627,0.663748,1.000000,0.753442,0.565411,0.596820,0.560967,0.575795,0.672813,0.705750,...,0.600368,0.559641,0.625906,0.698456,0.557079,0.615124,0.594916,0.578795,0.549047,0.587320
APRICOT LADY,0.639228,0.590453,0.753442,1.000000,0.621742,0.707187,0.590613,0.618490,0.668263,0.662849,...,0.649523,0.612239,0.652977,0.701681,0.652271,0.637030,0.611829,0.570637,0.649149,0.626043
ARANCIA JULIUS,0.559685,0.600239,0.565411,0.621742,1.000000,0.581450,0.589304,0.502865,0.611094,0.595509,...,0.599492,0.541426,0.579946,0.642008,0.604505,0.653535,0.621917,0.605154,0.642098,0.575347
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
WHAT THE DICKENS?,0.641534,0.671247,0.615124,0.637030,0.653535,0.615819,0.675135,0.590132,0.662437,0.661964,...,0.672676,0.631767,0.668899,0.678866,0.729705,1.000000,0.689960,0.694317,0.687183,0.670954
WINTER TRAVELER’S PUNCH,0.554237,0.592353,0.594916,0.611829,0.621917,0.606332,0.682327,0.545862,0.628254,0.603784,...,0.633163,0.592123,0.642754,0.675284,0.644571,0.689960,1.000000,0.565508,0.630387,0.646660
WITHOUT FACULTY,0.573700,0.619255,0.578795,0.570637,0.605154,0.568563,0.602325,0.517370,0.627071,0.720434,...,0.570248,0.590374,0.649022,0.644311,0.635895,0.694317,0.565508,1.000000,0.592656,0.619320
YELLOW BIRD,0.601637,0.595444,0.549047,0.649149,0.642098,0.586505,0.644532,0.599325,0.608662,0.597938,...,0.620507,0.594858,0.689943,0.657459,0.682849,0.687183,0.630387,0.592656,1.000000,0.627957


In [None]:
similarity_df.to_pickle('./similarity_df.pkl')