### Import Libraries

In [20]:
import numpy as np
import pandas as pd

import boto3
import re

import sagemaker
from sagemaker import get_execution_role

### Read data from S3

In [21]:
#Get the execution role for the notebook instance. This is the IAM role that you created for your notebook instance.
role = get_execution_role() 
#Specify bucket name containing data
bucket='use-case1'    
#Specify the file name (AWS Data Key)
data_key = 'parts_train.csv'
data_location = 's3://{}/{}'.format(bucket, data_key)

pd.read_csv(data_location)

Unnamed: 0,0,108.7,81.24126506024099,14
0,0,103.368544,93.100000,16.0
1,0,107.800000,81.241265,12.9
2,0,103.368544,53.800000,15.4
3,0,95.000000,81.241265,15.8
4,0,107.400000,40.800000,15.5
...,...,...,...,...
1269,1,120.300000,95.001701,14.7
1270,1,133.200000,52.800000,17.1
1271,0,103.368544,110.500000,15.5
1272,0,103.368544,107.700000,14.0


In [25]:
print(role)

arn:aws:iam::479796044641:role/service-role/AmazonSageMaker-ExecutionRole-20210224T191810


### Create a session with SageMaker

In [24]:
sess=sagemaker.Session()

# Implementation of ML models

## (I) Inbuilt algorithms(from AWS Sagemaker)

### (1) Retrieve image of  XGBoost algorithm from container

In [26]:
container = sagemaker.image_uris.retrieve("xgboost",sess.boto_region_name,version="1.2-1")

print (f'Using XGBoost Container {container}')

Using XGBoost Container 257758044811.dkr.ecr.us-east-2.amazonaws.com/sagemaker-xgboost:1.2-1


### Create output file for model.

In [22]:
s3_model_output_location = r's3://use-case1/model'.format(bucket)

In [23]:
print(s3_model_output_location)

s3://use-case1/model


### Build XGBoost model and configure the training job.

In [27]:
hp={'max_depth': 3, 'objective': 'binary:logitraw', 'num_round': 15}
estimator = sagemaker.estimator.Estimator(
    container,
    role,
    instance_count=1,
    instance_type='ml.m4.xlarge',
    output_path=s3_model_output_location,
    sagemaker_session=sess,
    base_job_name = 'parts-classification',hyperparameters=hp)

In [29]:
estimator.hyperparameters()

{'max_depth': 3, 'objective': 'binary:logitraw', 'num_round': 15}

### Specify the training data location.

In [30]:
training_input_config = sagemaker.session.TrainingInput(s3_data=data_location,content_type="csv")

In [31]:
print(training_input_config.config)

{'DataSource': {'S3DataSource': {'S3DataType': 'S3Prefix', 'S3Uri': 's3://use-case1/parts_train.csv', 'S3DataDistributionType': 'FullyReplicated'}}, 'ContentType': 'csv'}


In [32]:
data_channels = {'train': training_input_config}

### Train the  XGBoost model.

In [33]:
estimator.fit(data_channels)

2021-05-27 10:43:19 Starting - Starting the training job...
2021-05-27 10:43:20 Starting - Launching requested ML instancesProfilerReport-1622112198: InProgress
......
2021-05-27 10:44:41 Starting - Preparing the instances for training.........
2021-05-27 10:46:13 Downloading - Downloading input data...
2021-05-27 10:46:33 Training - Downloading the training image...
2021-05-27 10:47:14 Uploading - Uploading generated training model[34m[2021-05-27 10:47:10.764 ip-10-0-127-28.us-east-2.compute.internal:1 INFO utils.py:27] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34mINFO:sagemaker-containers:Imported framework sagemaker_xgboost_container.training[0m
[34mINFO:sagemaker-containers:Failed to parse hyperparameter objective value binary:logitraw to Json.[0m
[34mReturning the value itself[0m
[34mINFO:sagemaker-containers:No GPUs detected (normal if no gpus installed)[0m
[34mINFO:sagemaker_xgboost_container.training:Running XGBoost Sagemaker in algorithm mode[0m
[34mINFO:root:Determ

### (2) Retrieve image of  KNN algorithm from container

In [51]:
container_knn = sagemaker.image_uris.retrieve("knn",sess.boto_region_name)
#container = sagemaker.image_uris.retrieve("xgboost",sess.boto_region_name,version="1.2-1")
print (f'Using KNN Container {container_knn}')

Using KNN Container 404615174143.dkr.ecr.us-east-2.amazonaws.com/knn:1


### Create output file for model.

In [52]:
s3_model_output_location1 = r's3://use-case1/modelknn'.format(bucket)

### Build KNN model and configure the training job.

In [53]:
# Setting up the estimator and hyperparameters
hp_knn={
    'feature_dim': 3,
    'k': 3,
    'sample_size': 1275,
    'predictor_type': 'classifier' 
}
knn_est=sagemaker.estimator.Estimator(container_knn,
                                      role,
                                      instance_count=1,
                                      instance_type='ml.m5.2xlarge',
                                      output_path=s3_model_output_location1,
                                      sagemaker_session=sess,
                                      base_job_name = 'parts-classification-knn',hyperparameters=hp_knn)

In [54]:
knn_est.hyperparameters()

{'feature_dim': 3, 'k': 3, 'sample_size': 1275, 'predictor_type': 'classifier'}

### Specify training location

In [55]:
training_input_config = sagemaker.session.TrainingInput(s3_data=data_location,content_type="text/csv")

In [56]:
data_knn = {'train': training_input_config}

In [57]:
print(training_input_config.config)

{'DataSource': {'S3DataSource': {'S3DataType': 'S3Prefix', 'S3Uri': 's3://use-case1/parts_train.csv', 'S3DataDistributionType': 'FullyReplicated'}}, 'ContentType': 'text/csv'}


### Train the KNN model.

In [58]:
knn_est.fit(data_knn)

2021-05-28 10:58:43 Starting - Starting the training job...
2021-05-28 10:58:45 Starting - Launching requested ML instancesProfilerReport-1622199523: InProgress
......
2021-05-28 11:00:08 Starting - Preparing the instances for training...
2021-05-28 11:00:39 Downloading - Downloading input data...
2021-05-28 11:01:09 Training - Downloading the training image........[34mDocker entrypoint called with argument(s): train[0m
[34mRunning default environment configuration script[0m
[34m[05/28/2021 11:02:19 INFO 139940787996480] Reading default configuration from /opt/amazon/lib/python3.7/site-packages/algorithm/resources/default-conf.json: {'_kvstore': 'dist_async', '_log_level': 'info', '_num_gpus': 'auto', '_num_kv_servers': '1', '_tuning_objective_metric': '', '_faiss_index_nprobe': '5', 'epochs': '1', 'feature_dim': 'auto', 'faiss_index_ivf_nlists': 'auto', 'index_metric': 'L2', 'index_type': 'faiss.Flat', 'mini_batch_size': '5000', '_enable_profiler': 'false'}[0m
[34m[05/28/2021 1

### (3) Retrieve image of Linear Learner algorithm from container

In [43]:
import os

container_logr = sagemaker.image_uris.retrieve("linear-learner",sess.boto_region_name)
#container = sagemaker.image_uris.retrieve("xgboost",sess.boto_region_name,version="1.2-1")
print (f'Using linear-learner Container {container_logr}')


Using linear-learner Container 404615174143.dkr.ecr.us-east-2.amazonaws.com/linear-learner:1


### Create output file for model.

In [44]:
s3_model_output_location2 = r's3://use-case1/modellogr'.format(bucket)

### Build Linear Learner model and configure the training job.

In [45]:
ll_hp = {'feature_dim':3, 'mini_batch_size':4, 'predictor_type':'binary_classifier'}

In [46]:
linear = sagemaker.estimator.Estimator(container_logr,
                                       role=role, 
                                       instance_count=1, 
                                       instance_type='ml.p2.xlarge',
                                       output_path=s3_model_output_location2,
                                       sagemaker_session=sess,
                                       hyperparameters=ll_hp
                                      )

In [47]:
linear.hyperparameters()

{'feature_dim': 3, 'mini_batch_size': 4, 'predictor_type': 'binary_classifier'}

### Specify training location

In [48]:
training_input_config = sagemaker.session.TrainingInput(s3_data=data_location,content_type="text/csv")
data_logr = {'train': training_input_config}
print(training_input_config.config)


{'DataSource': {'S3DataSource': {'S3DataType': 'S3Prefix', 'S3Uri': 's3://use-case1/parts_train.csv', 'S3DataDistributionType': 'FullyReplicated'}}, 'ContentType': 'text/csv'}


### Train the Linear Learner model.

In [49]:
linear.fit(data_logr)

2021-05-28 10:22:21 Starting - Starting the training job...
2021-05-28 10:22:23 Starting - Launching requested ML instancesProfilerReport-1622197341: InProgress
......
2021-05-28 10:23:47 Starting - Preparing the instances for training.........
2021-05-28 10:25:16 Downloading - Downloading input data
2021-05-28 10:25:16 Training - Downloading the training image.....[34mDocker entrypoint called with argument(s): train[0m
[34mRunning default environment configuration script[0m
[34m[05/28/2021 10:26:06 INFO 140611176843072] Reading default configuration from /opt/amazon/lib/python3.7/site-packages/algorithm/resources/default-input.json: {'mini_batch_size': '1000', 'epochs': '15', 'feature_dim': 'auto', 'use_bias': 'true', 'binary_classifier_model_selection_criteria': 'accuracy', 'f_beta': '1.0', 'target_recall': '0.8', 'target_precision': '0.8', 'num_models': 'auto', 'num_calibration_samples': '10000000', 'init_method': 'uniform', 'init_scale': '0.07', 'init_sigma': '0.01', 'init_bia

## (II) Bring your own algorithm (from sklearn) 

### Read data from S3 

In [61]:
role_svm=get_execution_role()
bucket='use-case1'
data_key = 'parts_train.csv'
data_location = 's3://{}/{}'.format(bucket, data_key)

pd.read_csv(data_location)

Unnamed: 0,0,108.7,81.24126506024099,14
0,0,103.368544,93.100000,16.0
1,0,107.800000,81.241265,12.9
2,0,103.368544,53.800000,15.4
3,0,95.000000,81.241265,15.8
4,0,107.400000,40.800000,15.5
...,...,...,...,...
1269,1,120.300000,95.001701,14.7
1270,1,133.200000,52.800000,17.1
1271,0,103.368544,110.500000,15.5
1272,0,103.368544,107.700000,14.0


In [62]:
s3_model_output_location3 = r's3://use-case1/modelsvc'.format(bucket)

### Creating session using account and region input


In [63]:
## getting account and region info with image for model.
account = sess.boto_session.client("sts").get_caller_identity()["Account"]
region = sess.boto_session.region_name
image = "{}.dkr.ecr.{}.amazonaws.com/sagemaker-svm-model:latest".format(account, region)

### (4) Retrieve image of SVM algorithm from Docker container image

In [64]:
svc=sagemaker.estimator.Estimator(image,
                                 role_svm,
                                 1,
                                 "ml.m5.2xlarge",
                                 output_path=s3_model_output_location3,
                                 sagemaker_session=sess)
svc.fit(data_location)

2021-05-29 05:46:29 Starting - Starting the training job...
2021-05-29 05:46:31 Starting - Launching requested ML instancesProfilerReport-1622267189: InProgress
......
2021-05-29 05:47:46 Starting - Preparing the instances for training...
2021-05-29 05:48:28 Downloading - Downloading input data
2021-05-29 05:48:28 Training - Downloading the training image...
2021-05-29 05:48:54 Uploading - Uploading generated training model[34mStarting the training.[0m
[34mTraining complete.[0m

2021-05-29 05:49:28 Completed - Training job completed
Training seconds: 40
Billable seconds: 40


## Hosting the models

###  Deploying the models

### Read the test data from S3

In [82]:
bucket='use-case1'
data_key = 'test.csv'
data_location_test_svm = 's3://{}/{}'.format(bucket, data_key)

df_test_svm=pd.read_csv(data_location_test_svm)

In [83]:
data_location_test_svm

's3://use-case1/test.csv'

In [84]:
df_test_svm.head(15)

Unnamed: 0,139.74734981326856,16.0,93.1,Hinterachse
0,107.8,12.9,77.498078,Hinterachse
1,101.067871,15.4,53.8,Hinterachse
2,95.0,15.8,47.696782,Hinterachse
3,107.4,15.5,40.8,Hinterachse
4,61.7,15.0,20.501597,Hinterachse
5,108.4,14.5,41.5,Hinterachse
6,61.7,15.3,18.691978,Hinterachse
7,105.0,11.0,86.266557,Hinterachse
8,76.8,18.5,13.909332,Vorderachse
9,87.0,14.6,46.8,Hinterachse


In [85]:
column_names=['139.74734981326856','93.1','16.0','Hinterachse']
df_test_svm=df_test_svm.reindex(columns=column_names)

In [87]:
print(df_test_svm.head())
print(df_test_svm.shape)

   139.74734981326856       93.1  16.0  Hinterachse
0          107.800000  77.498078  12.9  Hinterachse
1          101.067871  53.800000  15.4  Hinterachse
2           95.000000  47.696782  15.8  Hinterachse
3          107.400000  40.800000  15.5  Hinterachse
4           61.700000  20.501597  15.0  Hinterachse
(1271, 4)


### Create the test array

In [88]:
test_array = df_test_svm.drop(['Hinterachse'], axis=1)

In [76]:
test_array

Unnamed: 0,139.74734981326856,93.1,16.0
0,107.800000,77.498078,12.9
1,101.067871,53.800000,15.4
2,95.000000,47.696782,15.8
3,107.400000,40.800000,15.5
4,61.700000,20.501597,15.0
...,...,...,...
1266,120.300000,78.660196,14.7
1267,133.200000,52.800000,17.1
1268,151.977219,110.500000,15.5
1269,139.904069,107.700000,14.0


### (1) SVM

In [132]:
from sagemaker.predictor import csv_serializer
svc_predictor=svc.deploy(initial_instance_count=1,instance_type='ml.m4.xlarge',serializer=csv_serializer, endpoint_name = "svm-endpoint")

-----------!

### Get Predictions using SVM

In [120]:
from sagemaker.predictor import CSVSerializer
svc_predictor.serializer = CSVSerializer()
svc_pred = svc_predictor.predict(test_array.values).decode("utf-8") # output is in bytes hence we use decode method 
#print(svc_predictor.predict(test_array.values).decode("utf-8"))
print(svc_pred)

1
1
1
0
1
0
1
1
1
0
1
1
1
1
1
1
1
1
1
1
1
0
1
1
0
1
1
1
0
1
1
1
0
1
1
0
1
1
1
1
1
1
1
1
1
1
1
1
1
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
1
1
1
1
1
1
1
1
1
1
1
1
0
1
1
1
1
0
1
1
1
0
1
1
1
1
1
1
1
1
0
1
1
1
1
1
1
1
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
1
1
1
1
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
1
1
1
1
1
1
1
1
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
1
0
0
1
0
1
1
1
1
1
1
1
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
0
1
1
1
0
1
1
0
1
1
1
1
1
1
1
0
1
0
1
1
0
0
1
1
1
0
0
1
0
1
1
1
0
0
1
1
0
1
0
1
1
1
1
0
1
1
1
1
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
1
1
1
1
1
1
1
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
1
1
1
1
1
0
1
0
1
1
1
1
1
1
1
1
1
1
1
1
1
0
1
1
0
1
0
1
1
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
1
1
1
1
1
1
1
1
1
1
1
0
0
1
1
1
1
1
1
1


In [121]:
svc_pred=svc_pred.replace('\n','')

In [122]:
type(svc_pred)

str

In [123]:
len(svc_pred)

1271

In [126]:
svc_pred

'111010111011111111111011011101110110111111111111101111111111111101111111111110111101110111111110111111101111111111111111111111111110111111111111111111111111111100111111111111111111110111111111111111111011110111111111111110111111110111111111111111111111111111010010111111101111111111111111100111011011111110101100111001011100110101111011110111111111111111111111111111111111111111111111111111111110111111101111111111111111111111111110111110101111111111111011010110111111111111111110111111111110011111111111111111011011110111111111111101111111111011101111111111111011101111101111111111111010111111011111111001110110111111110111110111111101101110111111111111111110111100111111111111111111111101110111111110111111110110111111111111111111011111111111101111111110110111110010111111111101110110111111011101111101111110101111111001011111111111111110110111111111111111101111111110101111111011111111111111100111101111111110111111101111101011111111111111111111111111111011111011111101111111111111111111111111111

In [129]:
svc_predictions=[]
for item in svc_pred:
    if item=='1':
        svc_predictions.append('Vorderachse')
    elif item=='0':
        svc_predictions.append('Hinterachse')

In [130]:
svc_predictions

['Vorderachse',
 'Vorderachse',
 'Vorderachse',
 'Hinterachse',
 'Vorderachse',
 'Hinterachse',
 'Vorderachse',
 'Vorderachse',
 'Vorderachse',
 'Hinterachse',
 'Vorderachse',
 'Vorderachse',
 'Vorderachse',
 'Vorderachse',
 'Vorderachse',
 'Vorderachse',
 'Vorderachse',
 'Vorderachse',
 'Vorderachse',
 'Vorderachse',
 'Vorderachse',
 'Hinterachse',
 'Vorderachse',
 'Vorderachse',
 'Hinterachse',
 'Vorderachse',
 'Vorderachse',
 'Vorderachse',
 'Hinterachse',
 'Vorderachse',
 'Vorderachse',
 'Vorderachse',
 'Hinterachse',
 'Vorderachse',
 'Vorderachse',
 'Hinterachse',
 'Vorderachse',
 'Vorderachse',
 'Vorderachse',
 'Vorderachse',
 'Vorderachse',
 'Vorderachse',
 'Vorderachse',
 'Vorderachse',
 'Vorderachse',
 'Vorderachse',
 'Vorderachse',
 'Vorderachse',
 'Vorderachse',
 'Hinterachse',
 'Vorderachse',
 'Vorderachse',
 'Vorderachse',
 'Vorderachse',
 'Vorderachse',
 'Vorderachse',
 'Vorderachse',
 'Vorderachse',
 'Vorderachse',
 'Vorderachse',
 'Vorderachse',
 'Vorderachse',
 'Vorder

### Evaluation Metrics

In [131]:
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report
print('Accuracy: ',accuracy_score(df_test_svm['Hinterachse'],svc_predictions))
print(confusion_matrix(df_test_svm['Hinterachse'],svc_predictions))
print(classification_report(df_test_svm['Hinterachse'],svc_predictions))

Accuracy:  0.7081038552321007
[[113 334]
 [ 37 787]]
              precision    recall  f1-score   support

 Hinterachse       0.75      0.25      0.38       447
 Vorderachse       0.70      0.96      0.81       824

    accuracy                           0.71      1271
   macro avg       0.73      0.60      0.59      1271
weighted avg       0.72      0.71      0.66      1271



### (2) XGBoost

In [164]:
xgb_predictor=estimator.deploy(initial_instance_count=1,instance_type='ml.m4.xlarge',endpoint_name='xgboost-endpoint')

---------------!

In [134]:
from sagemaker.predictor import csv_serializer,csv_deserializer
#xgb_predictor.content_type ='text/csv'
xgb_predictor.serializer = csv_serializer
xgb_predictor.deserializer=csv_deserializer

In [136]:
predictions = xgb_predictor.predict(test_array.values)

The csv_serializer has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
The csv_deserializer has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


In [137]:
prediction_array=np.array(predictions)

In [138]:
prediction_array

array([['0.46838638186454773', '0.24122707545757294',
        '0.3437258303165436', ..., '0.9835771918296814',
        '0.9857390522956848', '0.9685804843902588']], dtype='<U20')

In [140]:
predictions

[['0.46838638186454773',
  '0.24122707545757294',
  '0.3437258303165436',
  '0.2078370600938797',
  '0.33378297090530396',
  '0.2379278987646103',
  '0.29628241062164307',
  '0.07628385722637177',
  '0.4196672737598419',
  '0.328174889087677',
  '0.7129984498023987',
  '0.933951735496521',
  '0.24122707545757294',
  '0.5822511911392212',
  '0.29628241062164307',
  '0.6439865231513977',
  '0.834439754486084',
  '0.43859755992889404',
  '0.8737695813179016',
  '0.29628241062164307',
  '0.4183740019798279',
  '0.569651186466217',
  '0.8737695813179016',
  '0.421617329120636',
  '0.24122707545757294',
  '0.6439865231513977',
  '0.9742276072502136',
  '0.9695465564727783',
  '0.8449814915657043',
  '0.6439865231513977',
  '0.3232770562171936',
  '0.9861323833465576',
  '0.06028721481561661',
  '0.33378297090530396',
  '0.059160228818655014',
  '0.24122707545757294',
  '0.328174889087677',
  '0.9685804843902588',
  '0.31314635276794434',
  '0.059160228818655014',
  '0.24122707545757294',
  '

In [141]:
list1=[]
for i in predictions[0]:
    if float(i) > 0.5:
        list1.append('Vorderachse')
    else:
        list1.append('Hinterachse')

In [142]:
list1

['Hinterachse',
 'Hinterachse',
 'Hinterachse',
 'Hinterachse',
 'Hinterachse',
 'Hinterachse',
 'Hinterachse',
 'Hinterachse',
 'Hinterachse',
 'Hinterachse',
 'Vorderachse',
 'Vorderachse',
 'Hinterachse',
 'Vorderachse',
 'Hinterachse',
 'Vorderachse',
 'Vorderachse',
 'Hinterachse',
 'Vorderachse',
 'Hinterachse',
 'Hinterachse',
 'Vorderachse',
 'Vorderachse',
 'Hinterachse',
 'Hinterachse',
 'Vorderachse',
 'Vorderachse',
 'Vorderachse',
 'Vorderachse',
 'Vorderachse',
 'Hinterachse',
 'Vorderachse',
 'Hinterachse',
 'Hinterachse',
 'Hinterachse',
 'Hinterachse',
 'Hinterachse',
 'Vorderachse',
 'Hinterachse',
 'Hinterachse',
 'Hinterachse',
 'Vorderachse',
 'Vorderachse',
 'Vorderachse',
 'Hinterachse',
 'Vorderachse',
 'Hinterachse',
 'Vorderachse',
 'Vorderachse',
 'Vorderachse',
 'Hinterachse',
 'Vorderachse',
 'Vorderachse',
 'Vorderachse',
 'Vorderachse',
 'Vorderachse',
 'Hinterachse',
 'Vorderachse',
 'Hinterachse',
 'Hinterachse',
 'Vorderachse',
 'Vorderachse',
 'Vorder

In [125]:
df_test_svm[3]

0       Hinterachse
1       Hinterachse
2       Hinterachse
3       Hinterachse
4       Hinterachse
           ...     
1268    Vorderachse
1269    Vorderachse
1270    Hinterachse
1271    Hinterachse
1272    Vorderachse
Name: 3, Length: 1273, dtype: object

### Evaluation Metrics

In [144]:
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report
print(confusion_matrix(df_test_svm["Hinterachse"],list1))
print('Accuracy: ',accuracy_score(df_test_svm["Hinterachse"],list1))
print(classification_report(df_test_svm["Hinterachse"],list1))

[[261 186]
 [167 657]]
Accuracy:  0.7222659323367427
              precision    recall  f1-score   support

 Hinterachse       0.61      0.58      0.60       447
 Vorderachse       0.78      0.80      0.79       824

    accuracy                           0.72      1271
   macro avg       0.69      0.69      0.69      1271
weighted avg       0.72      0.72      0.72      1271



### (3) Linear Learner

In [145]:
linear_learner_predictor = linear.deploy(initial_instance_count=1,instance_type='ml.m4.xlarge',endpoint_name='lr-endpoint')

-------------!

In [146]:
from sagemaker.predictor import csv_serializer,csv_deserializer
linear_learner_predictor.serializer = csv_serializer
linear_learner_predictor.deserializer=csv_deserializer

In [148]:
lr_predictions = linear_learner_predictor.predict(test_array.values)

The csv_serializer has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
The csv_deserializer has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


In [149]:
lr_predictions

[['0'],
 ['0'],
 ['0'],
 ['0'],
 ['0'],
 ['0'],
 ['0'],
 ['0'],
 ['0'],
 ['0'],
 ['0'],
 ['1'],
 ['0'],
 ['0'],
 ['0'],
 ['0'],
 ['1'],
 ['0'],
 ['0'],
 ['0'],
 ['0'],
 ['0'],
 ['0'],
 ['0'],
 ['0'],
 ['0'],
 ['1'],
 ['1'],
 ['1'],
 ['1'],
 ['0'],
 ['1'],
 ['0'],
 ['0'],
 ['1'],
 ['0'],
 ['0'],
 ['1'],
 ['1'],
 ['1'],
 ['0'],
 ['0'],
 ['0'],
 ['1'],
 ['0'],
 ['1'],
 ['0'],
 ['1'],
 ['1'],
 ['0'],
 ['0'],
 ['1'],
 ['1'],
 ['1'],
 ['1'],
 ['1'],
 ['0'],
 ['1'],
 ['0'],
 ['0'],
 ['1'],
 ['1'],
 ['1'],
 ['1'],
 ['1'],
 ['1'],
 ['0'],
 ['1'],
 ['1'],
 ['1'],
 ['1'],
 ['0'],
 ['1'],
 ['0'],
 ['1'],
 ['1'],
 ['0'],
 ['1'],
 ['1'],
 ['1'],
 ['1'],
 ['1'],
 ['1'],
 ['1'],
 ['1'],
 ['1'],
 ['1'],
 ['1'],
 ['1'],
 ['1'],
 ['1'],
 ['1'],
 ['0'],
 ['1'],
 ['1'],
 ['0'],
 ['1'],
 ['1'],
 ['1'],
 ['1'],
 ['1'],
 ['1'],
 ['1'],
 ['1'],
 ['1'],
 ['1'],
 ['1'],
 ['1'],
 ['1'],
 ['1'],
 ['1'],
 ['1'],
 ['1'],
 ['1'],
 ['1'],
 ['1'],
 ['1'],
 ['1'],
 ['1'],
 ['0'],
 ['1'],
 ['0'],
 ['1'],
 ['1'],
 ['1'],


In [150]:
lr_pred = []
for item in lr_predictions:
    if item[0] == '1':
        lr_pred.append('Vorderachse')
    else:
        lr_pred.append('Hinterachse')
lr_pred_df = pd.DataFrame(lr_pred)
lr_pred_df

Unnamed: 0,0
0,Hinterachse
1,Hinterachse
2,Hinterachse
3,Hinterachse
4,Hinterachse
...,...
1266,Hinterachse
1267,Vorderachse
1268,Vorderachse
1269,Vorderachse


### Evaluation Metrics

In [152]:
print(confusion_matrix(df_test_svm["Hinterachse"],lr_pred_df))
print('Accuracy: ',accuracy_score(df_test_svm["Hinterachse"],lr_pred_df))
print(classification_report(df_test_svm["Hinterachse"],lr_pred_df))

[[311 136]
 [219 605]]
Accuracy:  0.7206923682140047
              precision    recall  f1-score   support

 Hinterachse       0.59      0.70      0.64       447
 Vorderachse       0.82      0.73      0.77       824

    accuracy                           0.72      1271
   macro avg       0.70      0.71      0.70      1271
weighted avg       0.74      0.72      0.73      1271



### (4) KNN 

In [153]:
knn_predictor = knn_est.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge',endpoint_name='knn-endpoint')
#knn_predictor.content_type = 'text/csv'


-----------------!

In [155]:
from sagemaker.predictor import json_deserializer
knn_predictor.serializer = csv_serializer
knn_predictor.deserializer = json_deserializer

In [156]:
knn_predictions = knn_predictor.predict(test_array.values)

The csv_serializer has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
The json_deserializer has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


In [157]:
knn_predictions

{'predictions': [{'predicted_label': 0.0},
  {'predicted_label': 0.0},
  {'predicted_label': 1.0},
  {'predicted_label': 0.0},
  {'predicted_label': 0.0},
  {'predicted_label': 0.0},
  {'predicted_label': 0.0},
  {'predicted_label': 0.0},
  {'predicted_label': 0.0},
  {'predicted_label': 0.0},
  {'predicted_label': 1.0},
  {'predicted_label': 0.0},
  {'predicted_label': 1.0},
  {'predicted_label': 1.0},
  {'predicted_label': 0.0},
  {'predicted_label': 0.0},
  {'predicted_label': 1.0},
  {'predicted_label': 0.0},
  {'predicted_label': 1.0},
  {'predicted_label': 0.0},
  {'predicted_label': 1.0},
  {'predicted_label': 0.0},
  {'predicted_label': 1.0},
  {'predicted_label': 1.0},
  {'predicted_label': 0.0},
  {'predicted_label': 1.0},
  {'predicted_label': 1.0},
  {'predicted_label': 0.0},
  {'predicted_label': 1.0},
  {'predicted_label': 1.0},
  {'predicted_label': 0.0},
  {'predicted_label': 1.0},
  {'predicted_label': 0.0},
  {'predicted_label': 0.0},
  {'predicted_label': 1.0},
  {'p

In [158]:
knn_pred_label = []
for pair in knn_predictions["predictions"]:
    if(int(pair['predicted_label']) == 1):
        knn_pred_label.append("Vorderachse")
    else:
        knn_pred_label.append("Hinterachse")
#knn_pred_label.append(int(pair['predicted_label']))
    
#knn_pred_label
knn_pred_label_df = pd.DataFrame(knn_pred_label)
knn_pred_label_df

Unnamed: 0,0
0,Hinterachse
1,Hinterachse
2,Vorderachse
3,Hinterachse
4,Hinterachse
...,...
1266,Hinterachse
1267,Vorderachse
1268,Vorderachse
1269,Vorderachse


In [210]:
df_test_svm[3]

0       Hinterachse
1       Hinterachse
2       Hinterachse
3       Hinterachse
4       Hinterachse
           ...     
1268    Vorderachse
1269    Vorderachse
1270    Hinterachse
1271    Hinterachse
1272    Vorderachse
Name: 3, Length: 1273, dtype: object

### Evaluation Metrics

In [159]:
print(confusion_matrix(df_test_svm["Hinterachse"],knn_pred_label_df))
print('Accuracy: ',accuracy_score(df_test_svm["Hinterachse"],knn_pred_label_df))
print(classification_report(df_test_svm["Hinterachse"],knn_pred_label_df))

[[276 171]
 [175 649]]
Accuracy:  0.7277734067663257
              precision    recall  f1-score   support

 Hinterachse       0.61      0.62      0.61       447
 Vorderachse       0.79      0.79      0.79       824

    accuracy                           0.73      1271
   macro avg       0.70      0.70      0.70      1271
weighted avg       0.73      0.73      0.73      1271

