In [None]:
# Install required dependencies in the notebook
!pip install boto3 sagemaker scikit-learn==1.5.2 joblib

# Import necessary libraries
import boto3
import sagemaker
from sagemaker import get_execution_role
from sagemaker.sklearn.estimator import SKLearn
import pandas as pd
import tarfile
import os

# Define variables
bucket_name = "saylee-s3-archive-bucket"
role = get_execution_role()  # Use your SageMaker execution role
region = boto3.Session().region_name
model_tarball = "model.tar.gz"

# Upload dataset to S3
data = pd.read_csv("../data/metadata.csv")
s3 = boto3.client("s3")
data_file = "metadata.csv"
data.to_csv(data_file, index=False)
s3.upload_file(data_file, bucket_name, data_file)
print(f"Uploaded dataset to s3://{bucket_name}/{data_file}")

# Prepare model tarball
with tarfile.open(model_tarball, "w:gz") as tar:
    tar.add("../model/inference.py")
    tar.add("../model/model.joblib")

s3.upload_file(model_tarball, bucket_name, model_tarball)
print(f"Uploaded model tarball to s3://{bucket_name}/{model_tarball}")

# Create SageMaker SKLearn Estimator
framework_version = "0.23-1"
script_path = "../model/inference.py"

sklearn_model = SKLearn(
    entry_point=script_path,
    role=role,
    framework_version=framework_version,
    instance_type="ml.m5.large",  # Choose an appropriate instance type
    model_uri=f"s3://{bucket_name}/{model_tarball}",
)

# Deploy the model
predictor = sklearn_model.deploy(initial_instance_count=1, instance_type="ml.t2.medium")
print("Model deployed successfully.")

# Testing the endpoint
import json

# Prepare a test payload (replace with appropriate data structure)
test_payload = json.dumps({"data": [45, 2048]})  # Example: 45 days, 2048 bytes

# Use the predictor for inference
response = predictor.predict(test_payload)
print("Prediction result:", response)

# Clean up (optional: uncomment to delete resources)
# predictor.delete_endpoint()
# print("Endpoint deleted.")
