In [1]:
import pandas as pd
import numpy as np
import boto3
from sklearn.model_selection import train_test_split
import sagemaker
from sagemaker import Session
import io
import sagemaker.amazon.common as smac
import os
from sagemaker.amazon.amazon_estimator import get_image_uri


sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


In [2]:
df = pd.read_csv("student_scores.csv")
df.head()

Unnamed: 0,Hours,Scores
0,2.5,21
1,5.1,47
2,3.2,27
3,8.5,75
4,3.5,30


In [6]:
df.shape

(25, 2)

In [7]:
# seperate x and y
x = df["Hours"]
y = df["Scores"]

In [8]:
x.dtypes
y.dtypes

dtype('int64')

In [9]:
x = x.astype("float32")
y = y.astype("float32")

In [10]:
# split the data 
X_train,X_test,y_train,y_test =train_test_split(x,y,test_size=0.2)

In [11]:
X_train = X_train.reset_index(drop=True)
X_test =  X_test.reset_index(drop=True)
y_train = y_train.reset_index(drop=True)
y_test =  y_test.reset_index(drop=True)


In [12]:

import numpy as np

print("X_train:", type(X_train), getattr(X_train, "shape", None), getattr(X_train, "dtype", None))
print("y_train:", type(y_train), getattr(y_train, "shape", None), getattr(y_train, "dtype", None))


X_train: <class 'pandas.core.series.Series'> (20,) float32
y_train: <class 'pandas.core.series.Series'> (20,) float32


In [13]:

# Ensure correct shapes/dtypes
X_train = np.asarray(X_train, dtype=np.float32).reshape(-1, 1)  # (20,) -> (20, 1)
y_train = np.asarray(y_train, dtype=np.float32).ravel()         # Series -> ndarray (20,)


In [14]:
y_train

array([30., 75., 81., 17., 42., 62., 88., 54., 41., 69., 67., 60., 30.,
       47., 95., 76., 35., 20., 27., 30.], dtype=float32)

In [15]:
# lets create sagemaker session
sagemaker_session = sagemaker.Session()
#define the bucket name
bucket_name = "shubhamaws-bucket"
# define the prefix
prefix = "linear_learner"
# get the execution role
role = sagemaker.get_execution_role()
role

'arn:aws:iam::919751357950:role/service-role/AmazonSageMaker-ExecutionRole-20260112T120635'

In [16]:
# convert X_train into numpy array
X_train = np.array(X_train)
X_train

array([[2.5],
       [8.5],
       [8.3],
       [1.1],
       [3.3],
       [5.9],
       [9.2],
       [4.8],
       [4.5],
       [7.4],
       [6.1],
       [5.5],
       [3.5],
       [5.1],
       [8.9],
       [6.9],
       [3.8],
       [1.5],
       [3.2],
       [2.7]], dtype=float32)

In [17]:
buf = io.BytesIO()
smac.write_numpy_to_dense_tensor(buf,X_train,y_train)
buf.seek(0)

0

In [18]:
# define the name of the file
key = "students-data"

#code to upload in s3
boto3.resource('s3').Bucket(bucket_name).Object(os.path.join(prefix,'train',key)).upload_fileobj(buf)

# path of our data
s3_train_data = f"s3://{bucket_name}/{prefix}/train/{key}"

print("Data uploaded",s3_train_data)
buf.close()

Data uploaded s3://shubhamaws-bucket/linear_learner/train/students-data


In [19]:

# Ensure correct shapes/dtypes
X_test = np.asarray(X_test, dtype=np.float32).reshape(-1, 1)  # (20,) -> (20, 1)
y_test = np.asarray(y_test, dtype=np.float32).ravel()         # Series -> ndarray (20,)


In [20]:
# convert X_train into numpy array
X_test = np.array(X_test)
X_test

array([[2.7],
       [7.7],
       [7.8],
       [1.9],
       [2.5]], dtype=float32)

In [21]:
buf = io.BytesIO()
smac.write_numpy_to_dense_tensor(buf,X_test,y_test)
buf.seek(0)

0

In [22]:

# define the name of the file
key = "students-data-test"

#code to upload in s3
boto3.resource('s3').Bucket(bucket_name).Object(os.path.join(prefix,'test',key)).upload_fileobj(buf)

# path of our data
s3_test_data = f"s3://{bucket_name}/{prefix}/test/{key}"

print("Data uploaded",s3_test_data)

Data uploaded s3://shubhamaws-bucket/linear_learner/test/students-data-test


In [23]:
output_location = f"s3://{bucket_name}/{prefix}/output"
output_location

's3://shubhamaws-bucket/linear_learner/output'

In [24]:
#bring the container
container = sagemaker.image_uris.retrieve("linear-learner",boto3.Session().region_name)

In [25]:
#define the estimator
linear=sagemaker.estimator.Estimator(container,role=role,instance_count =1,instance_type="ml.c5.xlarge",output_path=output_location,sagemaker_session=sagemaker_session)

In [26]:
# setting up the hyperparameters
linear.set_hyperparameters(feature_dim=1,predictor_type="regressor",mini_batch_size=4,epochs=6,num_models=32,loss="absolute_loss")

In [27]:
linear.fit({"train":s3_test_data})

INFO:sagemaker:Creating training-job with name: linear-learner-2026-01-29-10-02-09-333


2026-01-29 10:02:12 Starting - Starting the training job...
2026-01-29 10:02:28 Starting - Preparing the instances for training...
2026-01-29 10:03:09 Downloading - Downloading the training image.........
2026-01-29 10:04:40 Training - Training image download completed. Training in progress..[34mDocker entrypoint called with argument(s): train[0m
[34mRunning default environment configuration script[0m
[34m[01/29/2026 10:04:44 INFO 140651495421760] Reading default configuration from /opt/amazon/lib/python3.8/site-packages/algorithm/resources/default-input.json: {'mini_batch_size': '1000', 'epochs': '15', 'feature_dim': 'auto', 'use_bias': 'true', 'binary_classifier_model_selection_criteria': 'accuracy', 'f_beta': '1.0', 'target_recall': '0.8', 'target_precision': '0.8', 'num_models': 'auto', 'num_calibration_samples': '10000000', 'init_method': 'uniform', 'init_scale': '0.07', 'init_sigma': '0.01', 'init_bias': '0.0', 'optimizer': 'auto', 'loss': 'auto', 'margin': '1.0', 'quantile'

In [34]:
# deploy the model
linear_regressor = linear.deploy(initial_instance_count=1,instance_type="ml.m5.xlarge")

INFO:sagemaker:Creating model with name: linear-learner-2026-01-29-10-21-43-796
INFO:sagemaker:Creating endpoint-config with name linear-learner-2026-01-29-10-21-43-796
INFO:sagemaker:Creating endpoint with name linear-learner-2026-01-29-10-21-43-796


-------!

In [30]:
linear_regressor.serializer = sagemaker.serializers.CSVSerializer()
linear_regressor.deserializer = sagemaker.deserializers.JSONDeserializer()

In [31]:
#predictions 
results = linear_regressor.predict(X_test)
results

{'predictions': [{'score': 37.92646408081055},
  {'score': 74.64373779296875},
  {'score': 75.37808990478516},
  {'score': 32.05169677734375},
  {'score': 36.45777130126953}]}

In [33]:
predictions = np.array([i["score"] for i in results["predictions"]])
predictions

array([37.92646408, 74.64373779, 75.3780899 , 32.05169678, 36.4577713 ])