In [24]:
import pandas as pd
import numpy as np
import boto3
from sklearn.model_selection import train_test_split
import sagemaker 
from sagemaker import Session
import sagemaker.amazon.common as smac
import os
from sagemaker.amazon.amazon_estimator import get_image_uri
import io

In [3]:
df = pd.read_csv('student_scores.csv')

In [6]:
df.shape

(25, 2)

In [8]:
x = df[['Hours']]
y = df[['Scores']]


In [13]:
x = x.astype("float32")
y = y.astype("float32")

In [14]:
x.dtypes

Hours    float32
dtype: object

In [15]:
y.dtypes

Scores    float32
dtype: object

In [16]:
X_train,X_test,y_train,y_test = train_test_split(x,y,test_size = 0.2)

In [17]:
X_train = X_train.reset_index(drop = True)
y_train = y_train.reset_index(drop = True)
X_test = X_test.reset_index(drop = True)
y_test = y_test.reset_index(drop = True)

In [19]:
y_train = y_train.iloc[:,0]

In [32]:
y_train = np.array(y_train)

In [21]:
sagemaker_session = sagemaker.Session()
bucket_name = "c160506a4117400l11163953t1w772864730-sandboxbucket-i4cmtu9q8tvt"
prefix = "linear-learner"
role = sagemaker.get_execution_role()

In [22]:
X_train = np.array(X_train)

In [25]:
buf = io.BytesIO()
smac.write_numpy_to_dense_tensor(buf,X_train,y_train)
buf.seek(0)

0

In [26]:
key = "student-data"

boto3.resource('s3').Bucket(bucket_name).Object(os.path.join(prefix,'train',key)).upload_fileobj(buf)

s3_train_data = f"s3://{bucket_name}/{prefix}/train/{key}"

print("Data uploaded",s3_train_data)

Data uploaded s3://c160506a4117400l11163953t1w772864730-sandboxbucket-i4cmtu9q8tvt/linear-learner/train/student-data


In [35]:
y_test = y_test.squeeze()

In [36]:
X_test = np.array(X_test)

buf = io.BytesIO()

smac.write_numpy_to_dense_tensor(buf,X_test,y_test)

buf.seek(0)

key = "student-data-test"

boto3.resource('s3').Bucket(bucket_name).Object(os.path.join(prefix,'test',key)).upload_fileobj(buf)

s3_train_data = f"s3://{bucket_name}/{prefix}/test/{key}"

print("Data uploaded",s3_train_data)

Data uploaded s3://c160506a4117400l11163953t1w772864730-sandboxbucket-i4cmtu9q8tvt/linear-learner/test/student-data-test


In [41]:
region = boto3.Session().region_name
container = sagemaker.image_uris.retrieve("linear-learner",region)
print(container)

382416733822.dkr.ecr.us-east-1.amazonaws.com/linear-learner:1


In [43]:
output_location = f"s3://{bucket_name}/{prefix}/"

In [44]:
linear = sagemaker.estimator.Estimator(container,role,instance_count=1,instance_type="ml.c4.xlarge",output_path=output_location,sagemaker_session=sagemaker.Session())

In [49]:
linear.set_hyperparameters(feature_dim=1,predictor_type="regressor",mini_batch_size=4,epochs=6,num_models=32,loss="absolute_loss")

In [50]:
linear.fit({"train":s3_train_data})

INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker:Creating training-job with name: linear-learner-2025-08-10-10-11-46-228


2025-08-10 10:11:47 Starting - Starting the training job...
2025-08-10 10:12:01 Starting - Preparing the instances for training...
2025-08-10 10:12:50 Downloading - Downloading the training image............
2025-08-10 10:14:46 Training - Training image download completed. Training in progress....
2025-08-10 10:15:14 Uploading - Uploading generated training model
2025-08-10 10:15:14 Completed - Training job completed
..Training seconds: 170
Billable seconds: 170


In [51]:
linear_regressor = linear.deploy(initial_instance_count=1,instance_type="ml.m4.xlarge")

INFO:sagemaker:Creating model with name: linear-learner-2025-08-10-10-17-50-907
INFO:sagemaker:Creating endpoint-config with name linear-learner-2025-08-10-10-17-50-907
INFO:sagemaker:Creating endpoint with name linear-learner-2025-08-10-10-17-50-907


-------!

In [52]:
linear_regressor.serializer=sagemaker.serializers.CSVSerializer()
linear_regressor.deserializer=sagemaker.deserializers.JSONDeserializer()

In [53]:
results = linear_regressor.predict(X_test)

In [55]:
results

{'predictions': [{'score': 27.75180435180664},
  {'score': 30.15160369873047},
  {'score': 72.14811706542969},
  {'score': 67.94847106933594},
  {'score': 48.15011215209961}]}

In [56]:
import nbformat

# Replace 'your_notebook.ipynb' with your file name
notebook_filename = 'mynotebook.ipynb'

try:
    nb = nbformat.read(notebook_filename, as_version=4)
    nbformat.validate(nb)
    print("Notebook is valid!")
except Exception as e:
    print("Notebook error:", e)


Notebook is valid!


In [57]:
nbformat.write(nb, 'clean_notebook.ipynb')
