## Training your First Model in Python

- **Kernel**: `conda_python3`
- **SageMaker Python SDK Version**: `2.X`

In [2]:
%store -r df_all_data

In [3]:
from sklearn.model_selection import train_test_split

X = df_all_data['management_experience_months'].values 
y = df_all_data['monthly_salary'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [4]:
X_train

array([15, 49, 94, 38, 93, 46,  7, 56, 19, 67, 71, 65, 73, 29])

In [5]:
X_test

array([32, 61, 37, 59, 14, 22])

In [8]:
import pandas as pd
df_training_data = pd.DataFrame({ 'monthly_salary': y_train, 'management_experience_months': X_train})
df_training_data

Unnamed: 0,monthly_salary,management_experience_months
0,1020,15
1,1390,49
2,1590,94
3,1290,38
4,1750,93
5,1240,46
6,960,7
7,1290,56
8,960,19
9,1340,67


In [9]:
!mkdir -p tmp

In [10]:
df_training_data.to_csv('tmp/training_data.csv', header=False, index=False)

In [11]:
s3_bucket = 'sagemaker-cookbook-bucket'
prefix = 'chapter01'

In [12]:
!aws s3 cp tmp/training_data.csv s3://{s3_bucket}/{prefix}/input/training_data.csv

Completed 109 Bytes/109 Bytes (1.4 KiB/s) with 1 file(s) remainingupload: tmp/training_data.csv to s3://sagemaker-cookbook-bucket/chapter01/input/training_data.csv


In [13]:
import sagemaker 
import boto3
from sagemaker import get_execution_role 

role = get_execution_role()
session = sagemaker.Session()
region_name = boto3.Session().region_name

In [14]:
training_s3_input_location = f"s3://{s3_bucket}/{prefix}/input/training_data.csv" 
training_s3_output_location = f"s3://{s3_bucket}/{prefix}/output/"

In [15]:
from sagemaker.inputs import TrainingInput

train = TrainingInput(training_s3_input_location, content_type="text/csv")

In [16]:
train.__dict__

{'config': {'DataSource': {'S3DataSource': {'S3DataType': 'S3Prefix',
    'S3Uri': 's3://sagemaker-cookbook-bucket/chapter01/input/training_data.csv',
    'S3DataDistributionType': 'FullyReplicated'}},
  'ContentType': 'text/csv'}}

In [17]:
from sagemaker.image_uris import retrieve 

container = retrieve("linear-learner", region_name, "1")
container

'382416733822.dkr.ecr.us-east-1.amazonaws.com/linear-learner:1'

In [18]:
estimator = sagemaker.estimator.Estimator(
    container,
    role, 
    instance_count=1, 
    instance_type='ml.m5.xlarge',
    output_path=training_s3_output_location,
    sagemaker_session=session)

In [19]:
estimator.set_hyperparameters(predictor_type='regressor', mini_batch_size=4)

In [20]:
estimator.fit({'train': train})

2021-03-13 02:23:19 Starting - Starting the training job...
2021-03-13 02:23:43 Starting - Launching requested ML instancesProfilerReport-1615602198: InProgress
.........
2021-03-13 02:25:04 Starting - Preparing the instances for training......
2021-03-13 02:26:04 Downloading - Downloading input data...
2021-03-13 02:26:45 Training - Training image download completed. Training in progress.
2021-03-13 02:26:45 Uploading - Uploading generated training model[34mDocker entrypoint called with argument(s): train[0m
[34mRunning default environment configuration script[0m
[34m[03/13/2021 02:26:42 INFO 140345420867392] Reading default configuration from /opt/amazon/lib/python2.7/site-packages/algorithm/resources/default-input.json: {u'loss_insensitivity': u'0.01', u'epochs': u'15', u'feature_dim': u'auto', u'init_bias': u'0.0', u'lr_scheduler_factor': u'auto', u'num_calibration_samples': u'10000000', u'accuracy_top_k': u'3', u'_num_kv_servers': u'auto', u'use_bias': u'true', u'num_point_fo


2021-03-13 02:27:05 Completed - Training job completed
Training seconds: 49
Billable seconds: 49


In [21]:
model_data = estimator.model_data
model_data

's3://sagemaker-cookbook-bucket/chapter01/output/linear-learner-2021-03-13-02-23-18-930/output/model.tar.gz'

In [22]:
%store model_data

Stored 'model_data' (str)


In [25]:
model_uri = estimator.image_uri
model_uri

'382416733822.dkr.ecr.us-east-1.amazonaws.com/linear-learner:1'

In [26]:
%store model_uri

Stored 'model_uri' (str)


In [32]:
%store X_test
%store y_test

Stored 'X_test' (ndarray)
Stored 'y_test' (ndarray)
