In [1]:
# Setup and Load Data

import pandas as pd
import boto3



# Read CSV
s3 = boto3.client('s3', "us-east-2")
read_file = s3.get_object(Bucket="sagemaker-ftb-sagemaker-domain", Key="wine_data/wine_data.csv")
df = pd.read_csv(read_file['Body'],sep=';')

# Display the first few rows
df.head()


Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [2]:
from sklearn.model_selection import train_test_split
import time

# Select 4 features and the target variable
features = df[['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar']]
target = df['quality']
df['record_id'] = range(len(df))
current_time_sec = int(round(time.time()))
df['event_time'] = pd.Series([current_time_sec]*len(df), dtype="float64")
df.columns = df.columns.str.replace(' ', '_')

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)


In [3]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Train a linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Test the model
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

Mean Squared Error: 0.5337124668435269


In [6]:
import pickle

# Assuming you have a trained model named 'model'
filename = 'wine_prediction_model.pkl'
with open(filename, 'wb') as file:
    pickle.dump(model, file)

In [9]:
# Put model in s3

s3.upload_file(filename, "sagemaker-ftb-sagemaker-domain", "wine_data/" + filename)


None


In [4]:
import time
import os
from sagemaker import get_execution_role, session
import boto3

region = boto3.Session().region_name

role = get_execution_role()

sm_client = boto3.client('sagemaker', region_name=region)

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


In [5]:
import time
model_package_group_name = "wine-linear-regression-" + str(round(time.time()))
model_package_group_input_dict = {
 "ModelPackageGroupName" : model_package_group_name,
 "ModelPackageGroupDescription" : "Sample model package group"
}

create_model_package_group_response = sm_client.create_model_package_group(**model_package_group_input_dict)
print('ModelPackageGroup Arn : {}'.format(create_model_package_group_response['ModelPackageGroupArn']))

ModelPackageGroup Arn : arn:aws:sagemaker:us-east-2:642693618675:model-package-group/wine-linear-regression-1728332385


In [14]:
from sagemaker import image_uris
image_uri = image_uris.retrieve(framework='sklearn',region='us-east-2',version='0.23-1',image_scope='inference')

In [15]:
# Specify the model source
model_url = "s3://sagemaker-ftb-sagemaker-domain/wine_data/" + filename

modelpackage_inference_specification =  {
    "InferenceSpecification": {
      "Containers": [
         {
            "Image": image_uri,
	    "ModelDataUrl": model_url
         }
      ],
      "SupportedContentTypes": [ "text/csv" ],
      "SupportedResponseMIMETypes": [ "text/csv" ],
   }
 }

# Alternatively, you can specify the model source like this:
# modelpackage_inference_specification["InferenceSpecification"]["Containers"][0]["ModelDataUrl"]=model_url

create_model_package_input_dict = {
    "ModelPackageGroupName" : model_package_group_name,
    "ModelPackageDescription" : "Model to detect 3 different types of irises (Setosa, Versicolour, and Virginica)",
    "ModelApprovalStatus" : "PendingManualApproval"
}
create_model_package_input_dict.update(modelpackage_inference_specification)

In [16]:
create_model_package_response = sm_client.create_model_package(**create_model_package_input_dict)
model_package_arn = create_model_package_response["ModelPackageArn"]
print('ModelPackage Version ARN : {}'.format(model_package_arn))

ModelPackage Version ARN : arn:aws:sagemaker:us-east-2:642693618675:model-package/wine-linear-regression-1728332385/1
