In [40]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [41]:
df = pd.read_csv('salesforecast_dataset.csv')

In [None]:
df['isholiday'] = df['isholiday'].replace({'False':0,'True':1})

In [24]:
df.head()

Unnamed: 0,store,temperature,fuel_price,markdown1,markdown2,markdown3,markdown4,markdown5,cpi,unemployment,isholiday,dept,weekly_sales,type,size,month_year,year
0,1,42.31,2.572,0.0,0.0,0.0,0.0,0.0,211.096358,8.106,False,1,24924.5,1,151315,2010-02,2010
1,1,42.31,2.572,0.0,0.0,0.0,0.0,0.0,211.096358,8.106,False,2,50605.27,1,151315,2010-02,2010
2,1,42.31,2.572,0.0,0.0,0.0,0.0,0.0,211.096358,8.106,False,3,13740.12,1,151315,2010-02,2010
3,1,42.31,2.572,0.0,0.0,0.0,0.0,0.0,211.096358,8.106,False,4,39954.04,1,151315,2010-02,2010
4,1,42.31,2.572,0.0,0.0,0.0,0.0,0.0,211.096358,8.106,False,5,32229.38,1,151315,2010-02,2010


In [27]:
df.isholiday.unique()

array([0, 1])

In [22]:
df['isholiday'] = df['isholiday'].replace({'False': 0, 'True': 1})

In [29]:
# Splitting 'month_year' into 'year' and 'month' columns
df[['year', 'month']] = df['month_year'].str.split('-', expand=True)

# Convert 'year' and 'month' columns to integers
df['year'] = df['year'].astype(int)
df['month'] = df['month'].astype(int)

In [35]:
# Drop the 'month_year' column
df.drop(columns=['month_year'], inplace=True)

In [32]:
df['isholiday'] = df['isholiday'].astype(int)

In [36]:
x=df.drop(["weekly_sales","fuel_price","markdown1","markdown2","markdown3","markdown4","markdown5"],axis=1)
y=df["weekly_sales"]

In [38]:
x_new = np.array(x).astype('float32')
y_new = np.array(y).astype('float32')

In [63]:
x_train,x_test,y_train,y_test=train_test_split(x_new,y_new,test_size=0.25, random_state=42)

In [None]:
# Sagemaker

In [43]:
import sagemaker #how to create engine/session
import boto3 # To access AWS service via python

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [44]:
#Creating a session based on sagemaker
sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()
print(role)

arn:aws:iam::851725313206:role/service-role/AmazonSageMaker-ExecutionRole-20240729T140061


In [45]:
bucket = 'salesdatadeployment'
prefix = 'ML'

In [47]:
x_train.shape

(7500, 10)

In [50]:
y_train.shape

(7500,)

In [51]:
y_train = x_train[:,0] # Converting one dimentional array
y_train.shape

(7500,)

In [54]:
import io
import sagemaker.amazon.common as smac #Sagemaker common library

In [57]:
buffer = io.BytesIO()
smac.write_numpy_to_dense_tensor(buffer, x_train, y_train)
buffer.seek(0)  # By seek(0) To avoiding external affact

0

In [58]:
import os
key = 'train_data'
boto3.resource('s3').Bucket(bucket).Object(os.path.join(prefix, key)).upload_fileobj(buffer)

In [60]:
s3_train_data = 's3://{}/{}/{}'.format(bucket, prefix,key)
print(s3_train_data)

s3://salesdatadeployment/ML/train_data


In [66]:
y_test

array([  1295.94,  10466.42,  40552.37, ...,  18555.99, 120037.4 ,
        46039.49], dtype=float32)

In [67]:
y_test.shape

(2500,)

In [73]:
buffer_test = io.BytesIO()
smac.write_numpy_to_dense_tensor(buffer_test, x_train, y_train)
buffer_test.seek(0)  # By seek(0) To avoiding external affact

0

In [74]:
key_test = 'test_data'
boto3.resource('s3').Bucket(bucket).Object(os.path.join(prefix, key_test)).upload_fileobj(buffer_test)

In [70]:
output = 's3://{}/{}/output'.format(bucket, prefix)
print(output)

s3://salesdatadeployment/ML/output


In [79]:
from sagemaker.amazon.amazon_estimator import get_image_uri
container = get_image_uri(boto3.Session().region_name,"linear-learner")

The method get_image_uri has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


In [84]:
model = sagemaker.estimator.Estimator(container,
                                      role,
                                      train_instance_count=1,
                                      train_instance_type='ml.c4.xlarge',
                                      output_path=output,
                                      sagemaker_session =sagemaker_session, train_use_spot_instances = True,
                                      train_max_run = 300,
                                      train_max_wait = 600)

See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


In [88]:
model.set_hyperparameters(feature_dim = 10,
                          predictor_type='regressor',
                          mini_batch_size = 100,
                          epochs = 5,
                          num_models = 32,
                          loss = 'absolute_loss')

In [89]:
model.fit({'train': s3_train_data})

INFO:sagemaker:Creating training-job with name: linear-learner-2024-07-29-14-45-18-966


2024-07-29 14:45:19 Starting - Starting the training job......
2024-07-29 14:46:14 Downloading - Downloading input data...
2024-07-29 14:46:39 Downloading - Downloading the training image.........
2024-07-29 14:48:10 Training - Training image download completed. Training in progress..[34mDocker entrypoint called with argument(s): train[0m
[34mRunning default environment configuration script[0m
[34m[07/29/2024 14:48:18 INFO 140594514720576] Reading default configuration from /opt/amazon/lib/python3.8/site-packages/algorithm/resources/default-input.json: {'mini_batch_size': '1000', 'epochs': '15', 'feature_dim': 'auto', 'use_bias': 'true', 'binary_classifier_model_selection_criteria': 'accuracy', 'f_beta': '1.0', 'target_recall': '0.8', 'target_precision': '0.8', 'num_models': 'auto', 'num_calibration_samples': '10000000', 'init_method': 'uniform', 'init_scale': '0.07', 'init_sigma': '0.01', 'init_bias': '0.0', 'optimizer': 'auto', 'loss': 'auto', 'margin': '1.0', 'quantile': '0.5',

In [90]:
model_deploy = model.deploy(initial_instance_count=1,
                            instance_type = 'ml.m4.xlarge')

INFO:sagemaker:Creating model with name: linear-learner-2024-07-29-14-50-56-255
INFO:sagemaker:Creating endpoint-config with name linear-learner-2024-07-29-14-50-56-255
INFO:sagemaker:Creating endpoint with name linear-learner-2024-07-29-14-50-56-255


-------!

In [95]:
from sagemaker.serializers import CSVSerializer # i/p
from sagemaker.deserializers import JSONDeserializer #o/p
model_deploy.content_type='test/csv'
model_deploy.serializer = CSVSerializer()
model_deploy.deserializer = JSONDeserializer()

In [97]:
x_test

array([[1.0000000e+00, 6.9309998e+01, 2.1704536e+02, ..., 1.5131500e+05,
        2.0110000e+03, 1.0000000e+01],
       [1.0000000e+00, 6.4610001e+01, 2.1579601e+02, ..., 1.5131500e+05,
        2.0110000e+03, 5.0000000e+00],
       [1.0000000e+00, 8.3360001e+01, 2.1123514e+02, ..., 1.5131500e+05,
        2.0100000e+03, 7.0000000e+00],
       ...,
       [1.0000000e+00, 4.5320000e+01, 2.2042577e+02, ..., 1.5131500e+05,
        2.0120000e+03, 2.0000000e+00],
       [1.0000000e+00, 4.4549999e+01, 2.1953600e+02, ..., 1.5131500e+05,
        2.0110000e+03, 1.2000000e+01],
       [1.0000000e+00, 3.8509998e+01, 2.1124217e+02, ..., 1.5131500e+05,
        2.0100000e+03, 2.0000000e+00]], dtype=float32)

In [None]:
result = model_deploy.predict(x_test)

In [None]:
result  #Json O/P

In [None]:
prediction = np.array([i['score'] for i in result ['predictions']])

In [None]:
plt.scatter(x_test, y_test)
plt.plot(x_test, prediction)
plt.show()

In [98]:
model_deploy.delete_endpoint() #To delete the instance used for model.

INFO:sagemaker:Deleting endpoint configuration with name: linear-learner-2024-07-29-14-50-56-255
INFO:sagemaker:Deleting endpoint with name: linear-learner-2024-07-29-14-50-56-255
