<a href="https://colab.research.google.com/github/Aditikumari20/NLP/blob/main/Amazon_Food_Review.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install sagemaker



In [4]:
import sagemaker
import boto3
import pandas as pd

sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()
bucket = sagemaker_session.default_bucket()
prefix = "nlp-model-demo"

In [6]:
# Load dataset
df = pd.read_csv("Reviews.csv")  # Ensure the dataset is available locally
# Keep only required columns
df = df[["Text", "Score"]].dropna()
# Convert scores into binary sentiment (1 = Positive, 0 = Negative)
df["Sentiment"] = df["Score"]apply(lambda x: 1 if x > 3 else 0)
df = df[["Text", "Sentiment"]]
# Save processed data
df.to_csv("processed_reviews.csv", index=False)
# Upload to S3
s3 = boto3.client("s3")
s3.upload_file("processed_reviews.csv", bucket, f"{prefix}/processed_reviews.csv")
# Store S3 path
s3_train_data = f"s3://{bucket}/{prefix}/processed_reviews.csv"
print("Data uploaded to:", s3_train_data)

In [7]:
# Load dataset
df = pd.read_csv("Reviews.csv")  # Ensure the dataset is available locally
# Keep only required columns
df = df[["Text", "Score"]].dropna()
# Convert scores into binary sentiment (1 = Positive, 0 = Negative)
df["Sentiment"] = df["Score"]apply(lambda x: 1 if x > 3 else 0)
df = df[["Text", "Sentiment"]]
# Save processed data
df.to_csv("processed_reviews.csv", index=False)
# Upload to S3
s3 = boto3.client("s3")
s3.upload_file("processed_reviews.csv", bucket, f"{prefix}/processed_reviews.csv")
# Store S3 path
s3_train_data = f"s3://{bucket}/{prefix}/processed_reviews.csv"
print("Data uploaded to:", s3_train_data)

In [8]:
%%writefile train.py
import argparse
import os
import pandas as pd
import joblib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline

def train():
    # Argument parser for SageMaker input
    parser = argparse.ArgumentParser()
    parser.add_argument("--train_data", type=str, default=os.environ["SM_CHANNEL_TRAIN"])
    args = parser.parse_args()
    # Load dataset from the provided path
    train_data_path = os.path.join(args.train_data, "processed_reviews.csv")
    df = pd.read_csv(train_data_path)
    # Split data
    X = df["Text"]
    y = df["Sentiment"]
    # Create a text-processing pipeline
    pipeline = Pipeline([
        ("tfidf", TfidfVectorizer(stop_words="english")),
        ("clf", LogisticRegression())
    ])
    # Train model
    pipeline.fit(X, y)
    # Save trained model
    model_path = os.path.join("/opt/ml/model", "model.joblib")
    joblib.dump(pipeline, model_path)
    print("Model saved at", model_path)
if _name_ == "_main_":
    train()

Writing train.py


In [9]:
from sagemaker.sklearn.estimator import SKLeran

sklearn_estimator = SKLearn(
    entry_point="train.py",
    framework_version="0.23-1",
    instance_type="ml.m5.large",
    role=role,
    sagemaker_session=sagemaker_session,
)
sklearn_estimator.fit({"train": s3_train_data})

In [10]:
%%writefile inference.py
import joblib
import os
import json
import pandas as pd
# Load trained model
def model_fn(model_dir):
    model_path = os.path.join(model_dir, "model.joblib")
    return joblib.load(model_path)
# Parse input JSON
def input_fn(request_body, request_content_type):
    if request_content_type == "application/json":
        data = json.loads(request_body)
        return pd.DataFrame(data, columns=["Text"])
    else:
        raise ValueError("Unsupported content type: {}".format(request_content_type))
# Generate predictions
def predict_fn(input_data, model):
    return model.predict(input_data["Text"]).tolist()

Writing inference.py


Deploy The Model In SageMaker

In [11]:
from sagemaker.sklearn.modelimport SKLearnModel
model_data = sklearn_estimator.model_data

sklearn_model = SKLearnModel(
    model_data=model_data,
    role=role,
    entrypoint="inference.py",
    framework_version="0.23-1",
    sagemaker_session=sagemaker_session,
)
predictor = sklearn_model.deploy(instance_type="ml.m5.large",initial_instance_count=1)


In [12]:
import json
test_data = json.dumps(["This product is amazing!", "Worst product ever."])
print("Predictions:", response)

CleanUp Response

In [13]:
predictor.delete_endpoint()