## Visualizing and Understanding your Data in R

- **Kernel**: `R`

In [3]:
df.all_data <- readRDS("tmp/df.all_data")

In [5]:
set.seed(42)

random.row.numbers <- sample(1:nrow(df.all_data), 0.7*nrow(df.all_data)) 
df.training_data <- df.all_data[random.row.numbers,]
df.test_data <- df.all_data[-random.row.numbers,]

In [6]:
df.training_data <- df.training_data[c(3,2)] 
row.names(df.training_data) <- NULL

In [7]:
df.training_data

monthly_salary,management_experience_months
<int>,<int>
960,19
1590,94
1630,65
960,7
1480,71
1330,61
1300,37
1040,22
1290,56
1020,15


In [8]:
df.test_data

Unnamed: 0_level_0,last_name,management_experience_months,monthly_salary
Unnamed: 0_level_1,<chr>,<int>,<int>
3,Brown,38,1290
11,Chen,14,1090
12,Kim,67,1340
13,Davis,29,1170
14,James,49,1390
15,Perez,46,1240


In [37]:
saveRDS(df.test_data, "tmp/df.test_data")

In [5]:
cmd <- function(bash_command) {
    output <- system(bash_command, intern=TRUE)
    last_line = ""
    
    for (line in output) { 
        cat(line)
        cat("\n")
        last_line = line 
    }
    
    return(last_line) 
}

In [6]:
cmd('mkdir -p tmp')

In [7]:
write.table(df.training_data, "tmp/training_data.csv", 
            sep=",",
            col.names=FALSE, row.names=FALSE)

In [8]:
s3.bucket <- 'sagemaker-cookbook-bucket'
s3.prefix <- 'chapter01'
local.source <- "tmp/training_data.csv"
s3.destination <- paste0('s3://', s3.bucket, '/', s3.prefix, '/input/', 'training_data.csv')
upload.s3.command <- paste('aws s3 cp', local.source, s3.destination) 

In [9]:
cmd(upload.s3.command)

Completed 109 Bytes/109 Bytes (1.4 KiB/s) with 1 file(s) remainingupload: tmp/training_data.csv to s3://sagemaker-cookbook-bucket/chapter01/input/training_data.csv


In [18]:
library('reticulate')

sagemaker <- import('sagemaker')
session <- sagemaker$Session()
role <- sagemaker$get_execution_role()
boto3 <- import('boto3')
region_name = boto3$Session()$region_name

In [13]:
training.s3_input_location <- s3.destination
training.s3_output_location <- paste0('s3://', s3.bucket, '/', s3.prefix, '/output/')

In [21]:
TrainingInput <- sagemaker$inputs$TrainingInput

sagemaker.train_input <- TrainingInput(training.s3_input_location, content_type="text/csv")

In [22]:
retrieve <- sagemaker$image_uris$retrieve 

container <- retrieve("linear-learner", region_name, "1")
container

In [23]:
Estimator <- sagemaker$estimator$Estimator

estimator <- Estimator(
    container,
    role, 
    instance_count=1L, 
    instance_type="ml.m5.xlarge",
    output_path=training.s3_output_location,
    sagemaker_session=session)

In [24]:
estimator$set_hyperparameters(predictor_type='regressor', mini_batch_size=4L)

In [25]:
estimator$fit(list(train = sagemaker.train_input))

In [30]:
estimator$model_data

In [32]:
saveRDS(estimator$model_data, file="tmp/estimator$model_data")

In [34]:
estimator$image_uri

In [35]:
saveRDS(estimator$image_uri, file="tmp/estimator$image_uri")