In [42]:
# importing necessary libraries
import pandas as pd
import numpy as np
import boto3
from sklearn.model_selection import train_test_split
import sagemaker
from sagemaker import Session
import io
import sagemaker.amazon.common as smac
import os
from sagemaker.amazon.amazon_estimator import get_image_uri

In [43]:
#read the csv file
df=pd.read_csv("student_scores.csv")

In [44]:
#head of the dataframe
df.head()

Unnamed: 0,Hours,Scores
0,2.5,21
1,5.1,47
2,3.2,27
3,8.5,75
4,3.5,30


In [45]:
#shape of the data
df.shape

(25, 2)

In [46]:
#separate x and y
x=df[["Hours"]]
y=df[["Scores"]]

In [47]:
#checking data types
x.dtypes
x=x.astype("float32")
y=y.astype("float32")

In [48]:
y.dtypes

Scores    float32
dtype: object

In [49]:
#split the data
X_train,X_test,y_train,y_test=train_test_split(x,y,test_size=0.2)

In [50]:
#reset index
X_train=X_train.reset_index(drop=True)
y_train=y_train.reset_index(drop=True)
X_test=X_test.reset_index(drop=True)
y_test=y_test.reset_index(drop=True)

In [51]:
#we need to take label column as vector
y_train=y_train.iloc[:,0]

In [52]:
y_test=y_test.iloc[:,0]

In [53]:
#lets create sagemaker session
sagemaker_session=sagemaker.Session()
#define the bucket name
bucket_name="yt-ml-sagemaker"
#define the prefix
prefix="linear-learner"
#get the execution role
role=sagemaker.get_execution_role()

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml


In [54]:
#convert X_train into numpy aray
X_train=np.array(X_train)

In [55]:
#create the buffer
buf=io.BytesIO()
smac.write_numpy_to_dense_tensor(buf,X_train,y_train)
buf.seek(0)

0

In [56]:
#define the name of the file
key="student-data"

#code to upload in s3
boto3.resource('s3').Bucket(bucket_name).Object(os.path.join(prefix,'train',key)).upload_fileobj(buf)

#path of our data
s3_train_data=f"s3://{bucket_name}/{prefix}/train/{key}"

print("Data uploaded",s3_train_data)

Data uploaded s3://yt-ml-sagemaker/linear-learner/train/student-data


In [57]:
#convert X_train into numpy aray
X_test=np.array(X_test)
#create the buffer
buf=io.BytesIO()
smac.write_numpy_to_dense_tensor(buf,X_test,y_test)
buf.seek(0)
#define the name of the file
key="student-data-test"

#code to upload in s3
boto3.resource('s3').Bucket(bucket_name).Object(os.path.join(prefix,'test',key)).upload_fileobj(buf)

#path of our data
s3_train_data=f"s3://{bucket_name}/{prefix}/test/{key}"

print("Data uploaded",s3_train_data)

Data uploaded s3://yt-ml-sagemaker/linear-learner/test/student-data-test


In [58]:
#output location
output_location=f"s3://{bucket_name}/{prefix}/output"

In [59]:
#bring the container
container=sagemaker.image_uris.retrieve("linear-learner",boto3.Session().region_name)

INFO:sagemaker.image_uris:Same images used for training and inference. Defaulting to image scope: inference.
INFO:sagemaker.image_uris:Defaulting to the only supported framework/algorithm version: 1.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.


In [60]:
#define the estimator
linear=sagemaker.estimator.Estimator(container,role,instance_count=1,instance_type="ml.c4.xlarge",output_path=output_location,sagemaker_session=sagemaker_session)

In [61]:
#setting up the hyperparameters
linear.set_hyperparameters(feature_dim=1,predictor_type="regressor",mini_batch_size=4,epochs=6,num_models=32,loss="absolute_loss")

In [62]:
#fit the model
linear.fit({"train":s3_train_data})

INFO:sagemaker:Creating training-job with name: linear-learner-2023-12-04-03-56-01-649


2023-12-04 03:56:01 Starting - Starting the training job...
2023-12-04 03:56:17 Starting - Preparing the instances for training......
2023-12-04 03:57:26 Downloading - Downloading input data...
2023-12-04 03:57:51 Training - Downloading the training image.........
2023-12-04 03:59:27 Uploading - Uploading generated training model[34mDocker entrypoint called with argument(s): train[0m
[34mRunning default environment configuration script[0m
[34m[12/04/2023 03:59:21 INFO 140240539854656] Reading default configuration from /opt/amazon/lib/python3.8/site-packages/algorithm/resources/default-input.json: {'mini_batch_size': '1000', 'epochs': '15', 'feature_dim': 'auto', 'use_bias': 'true', 'binary_classifier_model_selection_criteria': 'accuracy', 'f_beta': '1.0', 'target_recall': '0.8', 'target_precision': '0.8', 'num_models': 'auto', 'num_calibration_samples': '10000000', 'init_method': 'uniform', 'init_scale': '0.07', 'init_sigma': '0.01', 'init_bias': '0.0', 'optimizer': 'auto', 'loss

In [63]:
#deploy the model
linear_regresor=linear.deploy(initial_instance_count=1,instance_type="ml.m4.xlarge")

INFO:sagemaker:Creating model with name: linear-learner-2023-12-04-04-00-51-638
INFO:sagemaker:Creating endpoint-config with name linear-learner-2023-12-04-04-00-51-638
INFO:sagemaker:Creating endpoint with name linear-learner-2023-12-04-04-00-51-638


-------!

In [65]:
linear_regresor.serializer=sagemaker.serializers.CSVSerializer()
linear_regresor.deserializer=sagemaker.deserializers.JSONDeserializer()

In [66]:
#prediction
results=linear_regresor.predict(X_test)

In [67]:
results

{'predictions': [{'score': 77.46267700195312},
  {'score': 55.5824089050293},
  {'score': 67.51710510253906},
  {'score': 71.99261474609375},
  {'score': 40.66404724121094}]}

In [68]:
predictions=np.array([i["score"] for i in results["predictions"]])

In [69]:
predictions

array([77.462677  , 55.58240891, 67.5171051 , 71.99261475, 40.66404724])