<a href="https://colab.research.google.com/github/RoshiniBochkar/NLP-projects/blob/main/amazon_food_review.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import sagemaker
import boto3
import pandas as pd
# Initialize SageMaker session
sagemaker_session = sagemaker.Session()
# Get the SageMaker execution role
role = sagemaker.get_execution_role()
# 53 bucket for storing data
bucket = sagemaker_session.default_bucket()
prefix = "nip-model-demo"

**Prepare and Upload Data**

In [4]:
df = pd.read_csv("Reviews.csv")
df = df[["Text", "Score"]].dropna()
df["Sentiment"] = df["Score"].apply(lambda x: 1 if x > 3 else 0)
df = df[["Text", "Sentiment"]]
df.to_csv("processed_reviews.csv", index=False)
s3 = boto3.client("s3")
s3.upload_file("processed_reviews.csv", bucket, f"{prefix}/processed_reviews.csv")
s3_train_data = f"s3://{bucket}/{prefix}/processed_reviews.csv"
print("Data uploaded to:", s3_train_data)

**Create the Trainina Script**

In [19]:
%%writefile train.py
import argparse
import os
import pandas as pd
import joblib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline

Writing train.py


In [20]:
def train():
  parser = argparse.ArgumentParser()
  parser.add_argument ("--train_data", type=str, default=os.environ["SM_CHANNEL_TRAIN"])
  args = parser.parse_args()
  train_data_path = os.path.join(args.train_data, "processed_reviews.csv")
  df = pd.read_csv(train_data_path)
X = df["Text"]
y = df["Sentiment"]

In [21]:
pipeline = Pipeline([
    ("tfidf", TfidfVectorizer(stop_words="english")),
     ("clf", LogisticRegression)
])
pipeline.fit(X,y)
model_path = os.path.join("/opt/ml/model", "model.joblib")
joblib.dump(pipeline, model_path)
print ("Model saved at", model_path)
if __name__ == "__main__":
  train()

**Train the Model in SageMaker**

In [22]:
from sagemaker. sklearn.estimator import SKLearn
sklearn_estimator = SKLearn(
    entry_point="train.py",
    framework_version="0.23-1",
    instance_type="ml.m5.large",
    role=role,
    sagemaker_session=sagemaker_session,
)
# Train the model on SageMaker
sklearn_estimator.fit({"train": s3_train_data})

**Deploy the Model**

In [6]:
%%writefile inference.py
import joblib
import os
import json
import pandas as pd
def model_fn(model_dir):
  model_path = os.path.join(model_dir, "model.joblib")
  return joblib. load (model_path)
def input_fn(request_body, request_content_type):
  if request_content_type == "application/json":
    data = json.loads(request_body)
    return pd.DataFrame(data, columns=["Text"])
  else:
    raise ValueError ("Unsupported content type: 0)".formatrequest_content_type))
def predict_fn(input_data, model):
  return model.predict(input_data["Text"]).tolist()

Writing inference.py


**Deploy the Model in SageMaker**

In [24]:
from sagemaker. sklearn.model import SKLearnModel
model_data = sklearn_estimator.model_data
sklearn_model = SKLearnModel(
    model_data=model_data,
    role=role,
    entry_point="inference.py",
    framework_version="0.23-1",
    sagemaker_session=sagemaker_session,
)
predictor = sklearn_model.deploy(instance_type="ml.m5.large", initial_instance_count=1)

In [24]:
import json
test_data = json.dumps(["This product is amazing!", "Worst product ever."])
response = predictor.predict(test_data)
print("Predictions:", response)

**Clean Up Resources**

In [None]:
predictor.delete_endpoint()