In [1]:
# Getting the dataset:
import pandas as pd

data = pd.read_csv('churndata.csv')
data.head()

Unnamed: 0,respondent_id,churned,age,gender,loyalty_level,how_long_a_customer,how_far_away,feel_valued_as_customer,get_often_inspiration,good_product_overview,pick_up_flexibility,fair_prices,looking_forward_to_the_next_12_months,see_improvements_recently,informed_about_new_products,find_productst_i_want,i_enjoy_buying
0,1,0,3,1.0,,2,5.0,1,2,2,1,1,4,1,,1,2
1,10,0,3,2.0,1.0,5,1.0,1,1,2,1,2,1,1,,2,1
2,100,1,3,1.0,1.0,3,2.0,5,1,1,1,4,3,1,,2,1
3,1000,0,3,1.0,3.0,6,1.0,1,1,4,1,5,2,2,,1,1
4,10000,0,0,,1.0,0,1.0,1,2,2,1,2,3,1,1.0,1,3


In [2]:
# Spliting the dataset:

import numpy as np

train_data, test_data, _ = np.split(data.sample(frac=1, random_state=123), 
                                                  [int(0.95 * len(data)), int(len(data))])  

# Save to CSV files
train_data.to_csv('automl-train.csv', index=False, header=True, sep=',') # Need to keep column names
test_data.to_csv('automl-test.csv', index=False, header=True, sep=',')

In [3]:
%%sh

ls -l *.csv

-rw-r--r-- 1 root root  52360 Feb 21 11:45 automl-test.csv
-rw-r--r-- 1 root root 990572 Feb 21 11:45 automl-train.csv
-rw-r--r-- 1 root root 920488 Feb 21 11:44 churndata.csv


In [4]:
# Uploading dataset to s3:

import sagemaker

prefix = 'sagemaker/DEMO-automl-dm/input'
sess   = sagemaker.Session()

uri = sess.upload_data(path="automl-train.csv", key_prefix=prefix)
print(uri)

s3://sagemaker-us-east-1-013275376451/sagemaker/DEMO-automl-dm/input/automl-train.csv


In [None]:
# Endpoint the test data set:

ep_name = <>

In [None]:
import boto3,sys

sm_rt = boto3.Session().client('runtime.sagemaker')

In [None]:
tp = tn = fp = fn = count = 0

with open('automl-test.csv') as f:
    lines = f.readlines()
    for l in lines[1:]:   # Skip header
        l = l.split(',')  # Split CSV line into features
        label = l[-1]     # Store 'yes'/'no' label
        l = l[:-1]        # Remove label
        l = ','.join(l)   # Rebuild CSV line without label
                
        response = sm_rt.invoke_endpoint(EndpointName=ep_name, ContentType='text/csv', Accept='text/csv', Body=l)

        response = response['Body'].read().decode("utf-8")
        #print ("label %s response %s" %(label,response))

        if 'yes' in label:
            # Sample is positive
            if 'yes' in response:
                # True positive
                tp=tp+1
            else:
                # False negative
                fn=fn+1
        else:
            # Sample is negative
            if 'no' in response:
                # True negative
                tn=tn+1
            else:
                # False positive
                fp=fp+1
        count = count+1
        if (count % 100 == 0):   
            sys.stdout.write(str(count)+' ')
            
print ("Done")

In [None]:
print ("%d %d" % (tn, fp))
print ("%d %d" % (fn, tp))

accuracy  = (tp+tn)/(tp+tn+fp+fn)
precision = tp/(tp+fp)
recall    = tn/(tp+fn)
f1        = (2*precision*recall)/(precision+recall)

print ("%.4f %.4f %.4f %.4f" % (accuracy, precision, recall, f1))