In [None]:
import sagemaker
import boto3
import pandas as pd
sagemaker_session=sagemaker.Session()
role=sagemaker.get_execution_role()
bucket=sagemaker_session.default_bucket()
prefix='nlp-model-demo'

In [None]:
import sagemaker
import boto3
import pandas as pd
# Initialize SageMaker session
sagemaker_session = sagemaker.Session()
# Get the SageMaker execution role
role = sagemaker.get_execution_role()
# S3 bucket for storing data
bucket = sagemaker_session.default_bucket()
prefix = "nlp-model-demo"

In [None]:
df=pd.read_csv("Reviews.csv")
df=df[["Text","Score"]].dropna()
df["Sentiment"]=df["Score"].apply(lambda x: 1 if x>3 else 0)
df=df[['Text',"Sentiment"]]
df.to_csv("processed_reviews.csv",bucket,f"{prefix}/preocesses_reviews.csv")
s3_train_data=f"s3://{bucket}/{prefix}/preocessed_reviews.csv"
print("Data uploaded to:",s3_train_data)

In [None]:
%%writefile train.py
import argparse
import os
import pandas as pd
import joblib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
def train():
    # Argument parser for SageMaker input
    parser = argparse.ArgumentParser()
    parser.add_argument("--train_data", type=str, default=os.environ["SM_CHANNEL_TRAIN"])
    args = parser.parse_args()
    # Load dataset from the provided path
    train_data_path = os.path.join(args.train_data, "processed_reviews.csv")
    df = pd.read_csv(train_data_path)
    # Split data
    X = df["Text"]
    y = df["Sentiment"]
# Create a text-processing pipeline
    pipeline = Pipeline([
        ("tfidf", TfidfVectorizer(stop_words="english")),
        ("clf", LogisticRegression())
    ])
    # Train model
    pipeline.fit(X, y)
    # Save trained model
    model_path = os.path.join("/opt/ml/model", "model.joblib")
    joblib.dump(pipeline, model_path)
    print("Model saved at", model_path)
if __name__ == "__main__":
    train()

In [None]:
from sagemaker.sklearn.estimator import SKLearn
# Define SageMaker SKLearn Estimator
sklearn_estimator = SKLearn(
    entry_point="train.py",
    framework_version="0.23-1",
    instance_type="ml.m5.large",
    role=role,
    sagemaker_session=sagemaker_session,
)
# Train the model on SageMaker
sklearn_estimator.fit({"train": s3_train_data})

In [None]:
%%writefile inference.py
import joblib
import os
import json
import pandas as pd
# Load trained model
def model_fn(model_dir):
    model_path = os.path.join(model_dir, "model.joblib")
    return joblib.load(model_path)
# Parse input JSON
def input_fn(request_body, request_content_type):
    if request_content_type == "application/json":
        data = json.loads(request_body)
        return pd.DataFrame(data, columns=["Text"])
    else:
        raise ValueError("Unsupported content type: {}".format(request_content_type))
# Generate predictions
def predict_fn(input_data, model):
    return model.predict(input_data["Text"]).tolist()

In [None]:
from sagemaker.sklearn.model import SKLearnModel
# Get model path from training job
model_data = sklearn_estimator.model_data
# Create a SageMaker model
sklearn_model = SKLearnModel(
    model_data=model_data,
    role=role,
    entry_point="inference.py",
    framework_version="0.23-1",
    sagemaker_session=sagemaker_session,
)
# Deploy the model to a real-time endpoint
predictor = sklearn_model.deploy(instance_type="ml.m5.large", initial_instance_count=1)

In [None]:
# Sample test data
test_data = json.dumps(["This product is amazing!", "Worst product ever."])
response = predictor.predict(test_data)
print("Predictions:", response)

In [None]:
# Delete the SageMaker endpoint
predictor.delete_endpoint()