In [1]:
from pipelines.pipeline import run_pipeline

In [2]:
num_samples = 5

results = run_pipeline(num_samples)

Found Parquet file: samples/text reviews/new_yelp_text_reviews.parquet
Sampled 5 rows from samples/text reviews/new_yelp_text_reviews.parquet.


2025-01-04 16:55:56.744553: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Review: Love, love, LOVE this place. The workers are all super chill yet productive, the prices are excellent, and best of all the food is taaaaaastyyyyyy! Even the vegan "meat" tastes and even FEELS so real! When I'm in a pinch or just don't feel like cooking, this is my go-to place. Much better than fast food, yet same kind of prices!
Prediction: Positive
Actual: 5
----------------------------------------
Review: This is a don't miss. Food is always great. Service is good. Wish I had one of these in Abilene, Texas. It is a loud environment so don't go expecting quiet and relaxing. A family favorite.
Prediction: Positive
Actual: 5
----------------------------------------
Review: I ordered two baked potatoes and an apple fritter for $23 (first off, extremely overpriced). The baked potatoes were microwaved right in front of us, and they tasted very much so microwaved. The apple fritter was very DRY and tasted like bread sprinkled with cinnamon. We were very unsatisfied with our meal and

In [3]:
import json
import torch
import boto3
import tarfile
import sagemaker
import transformers
import pandas as pd
from io import StringIO
from sagemaker.huggingface import HuggingFaceModel
from sagemaker import get_execution_role, Session

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


In [4]:
s3 = boto3.client('s3')
bucket_name = "sagemaker-studio-619071335465-8h7owh9eftx"
model_artifact_local_path = "/home/sagemaker-user/model_fixed.tar.gz"

s3_key = "opt/ml/model/model.tar.gz"

s3.upload_file(model_artifact_local_path, bucket_name, s3_key)

In [5]:
s3_client = boto3.client('s3')
bucket_name = "sagemaker-studio-619071335465-8h7owh9eftx"
s3_key = "opt/ml/model/model.tar.gz"
local_path = "model_fixed.tar.gz"

s3_client.download_file(bucket_name, s3_key, local_path)

with tarfile.open(local_path, "r:gz") as tar:
    print("Files in the archive:")
    for member in tar.getmembers():
        print(member.name)

Files in the archive:
config.json
model.safetensors
tokenizer_config.json
special_tokens_map.json
vocab.txt
tokenizer.json


In [6]:
model_artifact = f"s3://{bucket_name}/{s3_key}"

print(model_artifact)
print(transformers.__version__)
print(torch.__version__)

s3://sagemaker-studio-619071335465-8h7owh9eftx/opt/ml/model/model.tar.gz
4.38.2
2.4.1.post100


In [27]:

sagemaker_session = Session()
role = get_execution_role()

model_artifact = f"s3://{bucket_name}/{s3_key}"

huggingface_model = HuggingFaceModel(
    model_data=model_artifact,
    role=role,
    transformers_version="4.37.0",
    pytorch_version="2.1.0",
    py_version="py310",
    sagemaker_session=sagemaker_session,
    entry_point="inference_text.py",
    source_dir="/home/sagemaker-user/inference",
)


endpoint_name = "hf-text-reviews-01044"
predictor = huggingface_model.deploy(
    initial_instance_count=1,
    instance_type="ml.m5.large",
    endpoint_name=endpoint_name
)

print(f"Endpoint deployed: {predictor.endpoint_name}")


-------!Endpoint deployed: hf-text-reviews-01044


In [28]:
from pipelines.preprocessing_reviews import preprocess_text_reviews

sampled_df = preprocess_text_reviews(bucket_name, 5)
text_list = sampled_df['text'].tolist()
text_list

Found Parquet file: samples/text reviews/new_yelp_text_reviews.parquet
Sampled 5 rows from samples/text reviews/new_yelp_text_reviews.parquet.


['Really poor service by a cranky, "really should quit " waitress. I usually empathize as I waitressed for several years in my working life but this one was just rotten and really unexpected from a usually reliably wonderful dining experience . "Chops and fries" served literally 2 tiny chops FROM a lamb rack for $27! NOT two lamb chop racks. Scandalous! And then over cooked. Worst part was how the waitress reacted when we express our sincere disappointment...she was arrogant! "That\'s  how it\'s served. (Frown, arms crossed) Says so right there in the menu." Said we could have ONE other chop( proudly) for and additional fee! Really set on that appeasing me. After exhausting us of that "solution" reluctantly offered to return and replace the meal with another. We were on tight schedule for a show which we had told her of and she had already not rushed our food as we requested...so at that point- 45 min after having been seated-@5:30- time didn\'t allow for it. Pissed!',
 "Very delicious

In [29]:
payload = json.dumps({"text": text_list})

sagemaker_client = boto3.client('sagemaker-runtime')

response = sagemaker_client.invoke_endpoint(
    EndpointName=endpoint_name,
    Body=payload,
    ContentType="application/json"
)

# Decode the response
response_body = json.loads(response["Body"].read().decode("utf-8"))
print(response_body)

{'label': ['Negative', 'Positive', 'Positive', 'Positive', 'Positive']}


In [32]:

from pipelines.inference_reviews import load_model_and_tokenizer, run_inference, postprocess_results

s3_client = boto3.client('s3')
s3_key = "opt/ml/model/model.tar.gz"
local_dir = "/home/sagemaker-user/checkpoint-29650"

reviews_list = sampled_df['stars_reviews'].tolist()


# text_list = ["The service was outstanding!", "Not worth the price."]
model, tokenizer = load_model_and_tokenizer(local_dir, s3_key, bucket_name)
predictions = run_inference(model, tokenizer, text_list)

# Print results
results = postprocess_results(text_list, predictions, reviews_list)

for result in results:
    print(f"Review: {result['review']}")
    print(f"Prediction: {result['prediction']}")
    print(f"Actual: {result['actual']}")
    print("-" * 40)


Review: Really poor service by a cranky, "really should quit " waitress. I usually empathize as I waitressed for several years in my working life but this one was just rotten and really unexpected from a usually reliably wonderful dining experience . "Chops and fries" served literally 2 tiny chops FROM a lamb rack for $27! NOT two lamb chop racks. Scandalous! And then over cooked. Worst part was how the waitress reacted when we express our sincere disappointment...she was arrogant! "That's  how it's served. (Frown, arms crossed) Says so right there in the menu." Said we could have ONE other chop( proudly) for and additional fee! Really set on that appeasing me. After exhausting us of that "solution" reluctantly offered to return and replace the meal with another. We were on tight schedule for a show which we had told her of and she had already not rushed our food as we requested...so at that point- 45 min after having been seated-@5:30- time didn't allow for it. Pissed!
Prediction: Neg

In [17]:
# text_list = ["The service was outstanding!", "Not worth the price."]

{
  "text_list": ["The service was outstanding!", "Not worth the price."]
}

endpoint_name = 'hf-text-reviews-0103'
sagemaker_client = boto3.client('sagemaker-runtime')

response = sagemaker_client.invoke_endpoint(
    EndpointName=endpoint_name,
    Body=json.dumps({"text": text_list}),
    ContentType="application/json"
)

# Decode the response

response_body = json.loads(response["Body"].read().decode("utf-8"))
print(response_body)

for response in response_body:
  print(response)

{'label': 'POSITIVE', 'score': 0.9999480247497559}
label
score
