## Part 2: Supervised Learning Model

Now that you've found which parts of the population are more likely to be customers of the mail-order company, it's time to build a prediction model. Each of the rows in the "MAILOUT" data files represents an individual that was targeted for a mailout campaign. Ideally, we should be able to use the demographic information from each individual to decide whether or not it will be worth it to include that person in the campaign.

The "MAILOUT" data has been split into two approximately equal parts, each with almost 43 000 data rows. In this part, you can verify your model with the "TRAIN" partition, which includes a column, "RESPONSE", that states whether or not a person became a customer of the company following the campaign. In the next part, you'll need to create predictions on the "TEST" partition, where the "RESPONSE" column has been withheld.

In [20]:
import numpy as np
import pandas as pd

from joblib import dump, load

from time import time
from collections import OrderedDict, Counter

from tqdm.auto import tqdm, trange
tqdm.pandas()

from functions import *
from sklearn.model_selection import train_test_split


import sagemaker

from sagemaker import get_execution_role

from sagemaker.amazon.amazon_estimator import get_image_uri

from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

#from imblearn.over_sampling import SMOTE, ADASYN, BorderlineSMOTE
#from imblearn.under_sampling import TomekLinks, ClusterCentroids
#from imblearn.combine import SMOTETomek

#import xgboost as xgb
#from xgboost import XGBClassifier

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import roc_auc_score, make_scorer
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, VotingClassifier, GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier

# Hyperparameters tuning
#from hyperopt import STATUS_OK, Trials, fmin, hp, tpe

import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="darkgrid")

# magic word for producing visualizations in notebook
%matplotlib inline

'get_image_uri' method will be deprecated in favor of 'ImageURIProvider' class in SageMaker Python SDK v2.
There is a more up to date SageMaker XGBoost image. To use the newer image, please set 'repo_version'='1.0-1'. For example:
	get_image_uri(region, 'xgboost', '1.0-1').


In [9]:
# load in the data
bucket='avratodataset3'
data_key = 'Udacity_MAILOUT_052018_TRAIN.csv'
data_location = 's3://{}/{}'.format(bucket, data_key)
train=pd.read_csv(data_location,sep=';')

  interactivity=interactivity, compiler=compiler, result=result)


In [11]:
train.head()

Unnamed: 0,LNR,AGER_TYP,AKT_DAT_KL,ALTER_HH,ALTER_KIND1,ALTER_KIND2,ALTER_KIND3,ALTER_KIND4,ALTERSKATEGORIE_FEIN,ANZ_HAUSHALTE_AKTIV,...,VK_DHT4A,VK_DISTANZ,VK_ZG11,W_KEIT_KIND_HH,WOHNDAUER_2008,WOHNLAGE,ZABEOTYP,RESPONSE,ANREDE_KZ,ALTERSKATEGORIE_GROB
0,1763,2,1.0,8.0,,,,,8.0,15.0,...,5.0,2.0,1.0,6.0,9.0,3.0,3,0,2,4
1,1771,1,4.0,13.0,,,,,13.0,1.0,...,1.0,2.0,1.0,4.0,9.0,7.0,1,0,2,3
2,1776,1,1.0,9.0,,,,,7.0,0.0,...,6.0,4.0,2.0,,9.0,2.0,3,0,1,4
3,1460,2,1.0,6.0,,,,,6.0,4.0,...,8.0,11.0,11.0,6.0,9.0,1.0,3,0,2,4
4,1783,2,1.0,9.0,,,,,9.0,53.0,...,2.0,2.0,1.0,6.0,9.0,3.0,3,0,1,3


In [14]:
from functions import *

def preprocess(azdias):
    
    feat_info = pd.read_csv('AZDIAS_Feature_Summary.csv', sep=';')
    drop_features = ['D19_LETZTER_KAUF_BRANCHE', 'EINGEFUEGT_AM']

    azdias = azdias.drop(drop_features, axis=1)
    azdias = replace_missing_or_unknown(azdias, feat_info)
    column_nans = nans_count(azdias)
    column_nan_threshold = 75

    nans_to_drop = column_nans[column_nans['nan_count'] > column_nan_threshold].index.tolist()
    azdias.drop(nans_to_drop, axis=1, inplace=True)
    drop_features = drop_features + nans_to_drop
    row_nans = nans_count(azdias, axis=1)
    
    initial_data = feat_info.groupby('type').agg('count')

    feat_info_afer_drop = feat_info[feat_info['attribute'].isin(azdias.columns.values)]
    after_column_drop = feat_info_afer_drop.groupby('type').agg('count')
    categorical_features = feat_info_afer_drop[feat_info_afer_drop['type'] == 'categorical']['attribute'].tolist()
    azdias['OST_WEST_KZ'] = azdias['OST_WEST_KZ'].map({'W': 1, 'O': 2})
    azdias[categorical_features] = azdias[categorical_features].fillna(azdias[categorical_features].mode().iloc[0])
    
    cat_numeric = list(set(categorical_features)-set(['CAMEO_DEU_2015']))
    azdias[cat_numeric] = azdias[cat_numeric].astype('int8')
    azdias[categorical_features] = azdias[categorical_features].astype('category')
    
    feat_info = feat_info[feat_info['attribute'].isin(azdias.columns.values)]
    mixed_type_features = feat_info[feat_info['type'] == 'mixed']['attribute']
    
    azdias[mixed_type_features] = azdias[mixed_type_features].fillna(azdias[mixed_type_features].mode().iloc[0])
    
    
    decade = {'1.0': 1, '2.0': 1, '3.0': 2, '4.0': 2, '5.0': 3, '6.0': 3, '7.0': 3, '8.0': 4, '9.0': 4, '10.0': 5, 
          '11.0': 5, '12.0': 5, '13.0': 5, '14.0': 6, '15.0': 6}

    movement = {'1.0': 1, '2.0': 2, '3.0': 1, '4.0': 2, '5.0': 1, '6.0': 2, '7.0': 2, '8.0': 1, '9.0': 2, '10.0': 1, 
            '11.0': 2, '12.0': 1, '13.0': 2, '14.0': 1, '15.0': 2}

    azdias['PRAEGENDE_JUGENDJAHRE'] = azdias['PRAEGENDE_JUGENDJAHRE'].astype('str')
    azdias['PRAEGENDE_JUGENDJAHRE_DECADE'] = azdias['PRAEGENDE_JUGENDJAHRE'].replace(decade)
    azdias['PRAEGENDE_JUGENDJAHRE_MOVEMENT'] = azdias['PRAEGENDE_JUGENDJAHRE'].replace(movement)
    azdias = azdias.drop('PRAEGENDE_JUGENDJAHRE', axis=1)
    decade_movement = ['PRAEGENDE_JUGENDJAHRE_DECADE', 'PRAEGENDE_JUGENDJAHRE_MOVEMENT']
    azdias[decade_movement] = azdias[decade_movement].astype('int8').astype('category')
    """
    rep={'B':1, 'A':2, 'C':3, 'D':4, 'E':5, 'F':6}
    azdias['CAMEO_DEU_2015'] = azdias['CAMEO_DEU_2015'].astype('str')
    azdias['CAMEO_DEU_2015'] = azdias['CAMEO_DEU_2015'].replace(rep)
    azdias['CAMEO_DEU_2015'] = azdias['CAMEO_DEU_2015'].astype('int8').astype('category')
    """
    rep={'B':1, 'A':2, 'C':3, 'D':4, 'E':5, 'F':6}
    azdias['CAMEO_DEU_2015_1'] = azdias['CAMEO_DEU_2015'].apply(lambda x: list(str(x))[0])
    azdias['CAMEO_DEU_2015_2'] = azdias['CAMEO_DEU_2015'].apply(lambda x: list(str(x))[1])
    azdias = azdias.drop('CAMEO_DEU_2015', axis=1)
    azdias['CAMEO_DEU_2015_2'] = azdias['CAMEO_DEU_2015_2'].replace(rep)
    azdias[['CAMEO_DEU_2015_1','CAMEO_DEU_2015_2']] = azdias[['CAMEO_DEU_2015_1','CAMEO_DEU_2015_2']].astype('int8').astype('category')
    
    
    
    azdias['CAMEO_INTL_2015_WEALTH'] = azdias['CAMEO_INTL_2015'].apply(lambda x: list(str(x))[0])
    azdias['CAMEO_INTL_2015_LIFE_STAGE'] = azdias['CAMEO_INTL_2015'].apply(lambda x: list(str(x))[1])
    azdias = azdias.drop('CAMEO_INTL_2015', axis=1)
    
    wealth_life = ['CAMEO_INTL_2015_WEALTH', 'CAMEO_INTL_2015_LIFE_STAGE']
    azdias[wealth_life] = azdias[wealth_life].astype('int8').astype('category')
    
    feat_info = feat_info[feat_info['attribute'].isin(azdias.columns.values)]
    remaining_mix_type = feat_info[feat_info['type'] == 'mixed']['attribute']
    azdias[remaining_mix_type] = azdias[remaining_mix_type].astype('int8').astype('category')
    
    ordinal_features = feat_info_afer_drop[feat_info_afer_drop['type'] == 'ordinal']['attribute'].tolist()
    azdias[ordinal_features] = azdias[ordinal_features].fillna(azdias[ordinal_features].mode().iloc[0])
    azdias[ordinal_features] = azdias[ordinal_features].astype('int8').astype('category')
    
    interval_features = feat_info[feat_info['type'] == 'interval']['attribute'].tolist()
    azdias[interval_features] = azdias[interval_features].fillna(azdias[interval_features].mode().iloc[0])
    azdias[interval_features] = azdias[interval_features].astype('int8').astype('category')
    
    numeric_features = feat_info[feat_info['type'] == 'numeric']['attribute'].tolist()
    azdias[numeric_features] = azdias[numeric_features].fillna(azdias[numeric_features].median())
    azdias[numeric_features] = azdias[numeric_features].astype('int32')
    
    return azdias

In [16]:
trainY=train['RESPONSE']
trainX = preprocess(train.drop(['RESPONSE'],axis=1))
del train

Processing AGER_TYP


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing ALTERSKATEGORIE_GROB


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing ALTER_HH


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing ANREDE_KZ


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing BALLRAUM


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing CAMEO_DEUG_2015


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing CAMEO_DEU_2015


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing CAMEO_INTL_2015


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing CJT_GESAMTTYP


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_BANKEN_ANZ_12


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_BANKEN_ANZ_24


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_BANKEN_DATUM


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_BANKEN_DIREKT


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_BANKEN_GROSS


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_BANKEN_LOKAL


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_BANKEN_OFFLINE_DATUM


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_BANKEN_ONLINE_DATUM


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_BANKEN_REST


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_BEKLEIDUNG_GEH


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_BEKLEIDUNG_REST


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_BILDUNG


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_BIO_OEKO


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_BUCH_CD


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_DIGIT_SERV


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_DROGERIEARTIKEL


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_ENERGIE


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_FREIZEIT


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_GARTEN


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_GESAMT_ANZ_12


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_GESAMT_ANZ_24


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_GESAMT_DATUM


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_GESAMT_OFFLINE_DATUM


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_GESAMT_ONLINE_DATUM


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_HANDWERK


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_HAUS_DEKO


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_KINDERARTIKEL


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KK_KUNDENTYP


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_KOSMETIK


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_LEBENSMITTEL


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_LOTTO


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_NAHRUNGSERGAENZUNG


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_RATGEBER


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_REISEN


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_SAMMELARTIKEL


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_SCHUHE


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_SONSTIGE


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_TECHNIK


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_TELKO_ANZ_12


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_TELKO_ANZ_24


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_TELKO_DATUM


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_TELKO_MOBILE


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_TELKO_OFFLINE_DATUM


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_TELKO_ONLINE_DATUM


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_TELKO_REST


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_TIERARTIKEL


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_VERSAND_ANZ_12


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_VERSAND_ANZ_24


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_VERSAND_DATUM


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_VERSAND_OFFLINE_DATUM


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_VERSAND_ONLINE_DATUM


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_VERSAND_REST


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_VERSICHERUNGEN


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_VERSI_ANZ_12


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_VERSI_ANZ_24


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_VOLLSORTIMENT


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_WEIN_FEINKOST


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing EWDICHTE


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing FINANZTYP


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing FINANZ_ANLEGER


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing FINANZ_HAUSBAUER


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing FINANZ_MINIMALIST


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing FINANZ_SPARER


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing FINANZ_UNAUFFAELLIGER


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing FINANZ_VORSORGER


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing GEBAEUDETYP


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing GEBURTSJAHR


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing HEALTH_TYP


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing HH_EINKOMMEN_SCORE


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing INNENSTADT


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_ALTER1


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_ALTER2


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_ALTER3


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_ALTER4


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_ANHANG


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_ANTG1


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_ANTG2


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_ANTG3


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_ANTG4


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_AUTOQUOT


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_BAUMAX


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_CCM1


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_CCM2


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_CCM3


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_CCM4


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_DIESEL


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_FRAU


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_GBZ


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_HERST1


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_HERST2


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_HERST3


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_HERST4


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_HERST5


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_HERSTTEMP


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_KRSAQUOT


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_KRSHERST1


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_KRSHERST2


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_KRSHERST3


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_KRSKLEIN


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_KRSOBER


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_KRSVAN


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_KRSZUL


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_KW1


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_KW2


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_KW3


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_MAXAH


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_MAXBJ


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_MAXHERST


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_MAXSEG


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_MAXVORB


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_MOD1


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_MOD2


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_MOD3


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_MOD4


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_MOD8


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_MODTEMP


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_MOTOR


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_MOTRAD


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_SEG1


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_SEG10


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_SEG2


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_SEG3


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_SEG4


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_SEG5


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_SEG6


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_SEG7


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_SEG8


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_SEG9


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_VORB0


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_VORB1


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_VORB2


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_ZUL1


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_ZUL2


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_ZUL3


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA05_ZUL4


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_ALTERHALTER_30


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_ALTERHALTER_45


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_ALTERHALTER_60


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_ALTERHALTER_61


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_AUDI


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_AUTOQUOTE


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_BJ_1999


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_BJ_2000


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_BJ_2004


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_BJ_2006


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_BJ_2008


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_BJ_2009


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_BMW


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_CCM_1000


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_CCM_1200


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_CCM_1400


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_CCM_0_1400


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_CCM_1500


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_CCM_1600


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_CCM_1800


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_CCM_2000


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_CCM_2500


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_CCM_2501


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_CCM_3000


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_CCM_3001


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_FAB_ASIEN


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_FAB_SONSTIGE


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_FIAT


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_FORD


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_HALTER_20


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_HALTER_25


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_HALTER_30


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_HALTER_35


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_HALTER_40


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_HALTER_45


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_HALTER_50


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_HALTER_55


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_HALTER_60


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_HALTER_65


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_HALTER_66


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_HERST_ASIEN


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_HERST_AUDI_VW


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_HERST_BMW_BENZ


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_HERST_EUROPA


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_HERST_FORD_OPEL


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_HERST_SONST


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_KMH_110


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_KMH_140


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_KMH_180


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_KMH_0_140


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_KMH_140_210


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_KMH_211


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_KMH_250


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_KMH_251


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_KRSAQUOT


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_KRSHERST_AUDI_VW


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_KRSHERST_BMW_BENZ


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_KRSHERST_FORD_OPEL


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_KRSSEG_KLEIN


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_KRSSEG_OBER


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_KRSSEG_VAN


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_KRSZUL_NEU


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_KW_30


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_KW_40


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_KW_50


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_KW_60


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_KW_0_60


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_KW_70


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_KW_61_120


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_KW_80


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_KW_90


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_KW_110


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_KW_120


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_KW_121


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_MAZDA


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_MERCEDES


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_MOTOR


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_NISSAN


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_OPEL


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_PEUGEOT


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_RENAULT


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_SEG_GELAENDEWAGEN


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_SEG_GROSSRAUMVANS


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_SEG_KLEINST


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_SEG_KLEINWAGEN


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_SEG_KOMPAKTKLASSE


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_SEG_MINIVANS


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_SEG_MINIWAGEN


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_SEG_MITTELKLASSE


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_SEG_OBEREMITTELKLASSE


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_SEG_OBERKLASSE


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_SEG_SONSTIGE


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_SEG_SPORTWAGEN


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_SEG_UTILITIES


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_SEG_VAN


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_SEG_WOHNMOBILE


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_SITZE_4


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_SITZE_5


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_SITZE_6


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_TOYOTA


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_VORB_0


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_VORB_1


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_VORB_1_2


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_VORB_2


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_VORB_3


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_VW


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KKK


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing LP_FAMILIE_FEIN


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing LP_FAMILIE_GROB


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing LP_LEBENSPHASE_FEIN


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing LP_LEBENSPHASE_GROB


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing LP_STATUS_FEIN


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing LP_STATUS_GROB


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing NATIONALITAET_KZ


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing ORTSGR_KLS9


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing OST_WEST_KZ


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing PLZ8_ANTG1


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing PLZ8_ANTG2


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing PLZ8_ANTG3


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing PLZ8_ANTG4


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing PLZ8_GBZ


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing PLZ8_HHZ


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing PRAEGENDE_JUGENDJAHRE


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing REGIOTYP


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing RELAT_AB


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing RETOURTYP_BK_S


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing SEMIO_DOM


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing SEMIO_ERL


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing SEMIO_FAM


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing SEMIO_KAEM


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing SEMIO_KRIT


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing SEMIO_KULT


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing SEMIO_LUST


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing SEMIO_MAT


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing SEMIO_PFLICHT


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing SEMIO_RAT


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing SEMIO_REL


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing SEMIO_SOZ


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing SEMIO_TRADV


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing SEMIO_VERT


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing SHOPPER_TYP


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing TITEL_KZ


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing VERS_TYP


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing WOHNDAUER_2008


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing WOHNLAGE


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing W_KEIT_KIND_HH


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing ZABEOTYP


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing ARBEIT


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing ALTERSKATEGORIE_FEIN


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_SOZIALES


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_TELKO_ONLINE_QUOTE_12


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_VERSI_DATUM


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_VERSI_OFFLINE_DATUM


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing D19_VERSI_ONLINE_DATUM


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_ANTG1


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_ANTG2


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_ANTG3


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_ANTG4


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_CCM_1401_2500


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_GBZ


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_HHZ


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_KMH_210


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KOMBIALTER


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))


Processing KBA13_BAUMAX


HBox(children=(FloatProgress(value=0.0, max=42962.0), HTML(value='')))




In [22]:
trainX, valX, trainY, valY = train_test_split(trainX, trainY, test_size=0.2, random_state=1)

In [24]:
import os
data_dir = '../data/supervised model'
if not os.path.exists(data_dir):
    os.makedirs(data_dir)

In [25]:
pd.concat([valY, valX], axis=1).to_csv(os.path.join(data_dir, 'validation.csv'), header=False, index=False)
pd.concat([trainY, trainX], axis=1).to_csv(os.path.join(data_dir, 'train.csv'), header=False, index=False)

In [27]:
del trainX, valX, trainY, valY

In [28]:
session = sagemaker.Session()
prefix = 'supervised model'
role = get_execution_role()
container = get_image_uri(session.boto_region_name, 'xgboost')
val_location = session.upload_data(os.path.join(data_dir, 'validation.csv'), key_prefix=prefix)
train_location = session.upload_data(os.path.join(data_dir, 'train.csv'), key_prefix=prefix)

'get_image_uri' method will be deprecated in favor of 'ImageURIProvider' class in SageMaker Python SDK v2.
There is a more up to date SageMaker XGBoost image. To use the newer image, please set 'repo_version'='1.0-1'. For example:
	get_image_uri(region, 'xgboost', '1.0-1').


In [29]:
# First we create a SageMaker estimator object for our model.
xgb = sagemaker.estimator.Estimator(container, # The location of the container we wish to use
                                    role,                                    # What is our current IAM Role
                                    train_instance_count=1,                  # How many compute instances
                                    train_instance_type='ml.m4.xlarge',      # What kind of compute instances
                                    output_path='s3://{}/{}/output'.format(bucket, prefix),
                                    sagemaker_session=session)


Parameter image_name will be renamed to image_uri in SageMaker Python SDK v2.


In [31]:
# And then set the algorithm specific parameters.
xgb.set_hyperparameters(max_depth=10,
                        eta=0.2,
                        gamma=4,
                        min_child_weight=6,
                        subsample=0.8,
                        silent=0,
                        objective='binary:logistic',
                        early_stopping_rounds=10,
                        num_round=500)

In [32]:
s3_input_train = sagemaker.s3_input(s3_data=train_location, content_type='csv')
s3_input_validation = sagemaker.s3_input(s3_data=val_location, content_type='csv')

's3_input' class will be renamed to 'TrainingInput' in SageMaker Python SDK v2.
's3_input' class will be renamed to 'TrainingInput' in SageMaker Python SDK v2.


In [33]:
xgb.fit({'train': s3_input_train, 'validation': s3_input_validation})

2020-09-02 11:08:33 Starting - Starting the training job...
2020-09-02 11:08:36 Starting - Launching requested ML instances.........
2020-09-02 11:10:11 Starting - Preparing the instances for training......
2020-09-02 11:11:21 Downloading - Downloading input data...
2020-09-02 11:12:03 Training - Training image download completed. Training in progress..[34mArguments: train[0m
[34m[2020-09-02:11:12:04:INFO] Running standalone xgboost training.[0m
[34m[2020-09-02:11:12:04:INFO] File size need to be processed in the node: 27.56mb. Available memory size in the node: 8496.65mb[0m
[34m[2020-09-02:11:12:04:INFO] Determined delimiter of CSV input is ','[0m
[34m[11:12:04] S3DistributionType set as FullyReplicated[0m
[34m[11:12:05] 34369x325 matrix with 11169925 entries loaded from /opt/ml/input/data/train?format=csv&label_column=0&delimiter=,[0m
[34m[2020-09-02:11:12:05:INFO] Determined delimiter of CSV input is ','[0m
[34m[11:12:05] S3DistributionType set as FullyReplicated[0m


In [34]:
from sagemaker.tuner import IntegerParameter, ContinuousParameter, HyperparameterTuner

xgb_hyperparameter_tuner = HyperparameterTuner(estimator = xgb, # The estimator object to use as the basis for the training jobs.
                                               objective_metric_name = 'validation:rmse', # The metric used to compare trained models.
                                               objective_type = 'Minimize', # Whether we wish to minimize or maximize the metric.
                                               max_jobs = 20, # The total number of models to train
                                               max_parallel_jobs = 3, # The number of models to train in parallel
                                               hyperparameter_ranges = {
                                                    'max_depth': IntegerParameter(5, 15),
                                                    'eta'      : ContinuousParameter(0.05, 0.5),
                                                    'min_child_weight': IntegerParameter(2, 8),
                                                    'subsample': ContinuousParameter(0.5, 0.9),
                                                    'gamma': ContinuousParameter(0, 10),
                                               })

In [35]:
# This is a wrapper around the location of our train and validation data, to make sure that SageMaker
# knows our data is in csv format.
s3_input_train = sagemaker.s3_input(s3_data=train_location, content_type='csv')
s3_input_validation = sagemaker.s3_input(s3_data=val_location, content_type='csv')

xgb_hyperparameter_tuner.fit({'train': s3_input_train, 'validation': s3_input_validation})

's3_input' class will be renamed to 'TrainingInput' in SageMaker Python SDK v2.
's3_input' class will be renamed to 'TrainingInput' in SageMaker Python SDK v2.


In [37]:
xgb_hyperparameter_tuner.wait()

..................................................................................................................................................................................................................................................................................................................................................!


In [38]:
xgb_hyperparameter_tuner.best_training_job()

'xgboost-200902-1114-006-b7695509'

In [39]:
xgb_best = sagemaker.estimator.Estimator.attach(xgb_hyperparameter_tuner.best_training_job())

Parameter image_name will be renamed to image_uri in SageMaker Python SDK v2.


2020-09-02 11:22:14 Starting - Preparing the instances for training
2020-09-02 11:22:14 Downloading - Downloading input data
2020-09-02 11:22:14 Training - Training image download completed. Training in progress.
2020-09-02 11:22:14 Uploading - Uploading generated training model
2020-09-02 11:22:14 Completed - Training job completed[34mArguments: train[0m
[34m[2020-09-02:11:21:57:INFO] Running standalone xgboost training.[0m
[34m[2020-09-02:11:21:57:INFO] Setting up HPO optimized metric to be : rmse[0m
[34m[2020-09-02:11:21:57:INFO] File size need to be processed in the node: 27.56mb. Available memory size in the node: 8483.18mb[0m
[34m[2020-09-02:11:21:57:INFO] Determined delimiter of CSV input is ','[0m
[34m[11:21:57] S3DistributionType set as FullyReplicated[0m
[34m[11:21:57] 34369x325 matrix with 11169925 entries loaded from /opt/ml/input/data/train?format=csv&label_column=0&delimiter=,[0m
[34m[2020-09-02:11:21:57:INFO] Determined delimiter of CSV input is ','[0m
[

In [40]:
xgb_transformer = xgb_best.transformer(instance_count = 1, instance_type = 'ml.m4.xlarge')

Parameter image will be renamed to image_uri in SageMaker Python SDK v2.


## Part 3: Kaggle Competition

Now that you've created a model to predict which individuals are most likely to respond to a mailout campaign, it's time to test that model in competition through Kaggle. If you click on the link [here](http://www.kaggle.com/t/21e6d45d4c574c7fa2d868f0e8c83140), you'll be taken to the competition page where, if you have a Kaggle account, you can enter. If you're one of the top performers, you may have the chance to be contacted by a hiring manager from Arvato or Bertelsmann for an interview!

Your entry to the competition should be a CSV file with two columns. The first column should be a copy of "LNR", which acts as an ID number for each individual in the "TEST" partition. The second column, "RESPONSE", should be some measure of how likely each individual became a customer – this might not be a straightforward probability. As you should have found in Part 2, there is a large output class imbalance, where most individuals did not respond to the mailout. Thus, predicting individual classes and using accuracy does not seem to be an appropriate performance evaluation method. Instead, the competition will be using AUC to evaluate performance. The exact values of the "RESPONSE" column do not matter as much: only that the higher values try to capture as many of the actual customers as possible, early in the ROC curve sweep.

In [75]:
data_key = 'Udacity_MAILOUT_052018_TEST.csv'
data_location = 's3://{}/{}'.format(bucket, data_key)
test=pd.read_csv(data_location,sep=';')

  interactivity=interactivity, compiler=compiler, result=result)


In [76]:
test = preprocess(test)

Processing AGER_TYP


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing ALTERSKATEGORIE_GROB


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing ALTER_HH


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing ANREDE_KZ


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing BALLRAUM


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing CAMEO_DEUG_2015


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing CAMEO_DEU_2015


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing CAMEO_INTL_2015


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing CJT_GESAMTTYP


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_BANKEN_ANZ_12


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_BANKEN_ANZ_24


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_BANKEN_DATUM


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_BANKEN_DIREKT


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_BANKEN_GROSS


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_BANKEN_LOKAL


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_BANKEN_OFFLINE_DATUM


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_BANKEN_ONLINE_DATUM


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_BANKEN_REST


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_BEKLEIDUNG_GEH


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_BEKLEIDUNG_REST


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_BILDUNG


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_BIO_OEKO


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_BUCH_CD


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_DIGIT_SERV


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_DROGERIEARTIKEL


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_ENERGIE


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_FREIZEIT


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_GARTEN


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_GESAMT_ANZ_12


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_GESAMT_ANZ_24


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_GESAMT_DATUM


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_GESAMT_OFFLINE_DATUM


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_GESAMT_ONLINE_DATUM


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_HANDWERK


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_HAUS_DEKO


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_KINDERARTIKEL


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KK_KUNDENTYP


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_KOSMETIK


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_LEBENSMITTEL


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_LOTTO


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_NAHRUNGSERGAENZUNG


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_RATGEBER


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_REISEN


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_SAMMELARTIKEL


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_SCHUHE


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_SONSTIGE


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_TECHNIK


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_TELKO_ANZ_12


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_TELKO_ANZ_24


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_TELKO_DATUM


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_TELKO_MOBILE


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_TELKO_OFFLINE_DATUM


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_TELKO_ONLINE_DATUM


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_TELKO_REST


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_TIERARTIKEL


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_VERSAND_ANZ_12


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_VERSAND_ANZ_24


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_VERSAND_DATUM


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_VERSAND_OFFLINE_DATUM


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_VERSAND_ONLINE_DATUM


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_VERSAND_REST


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_VERSICHERUNGEN


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_VERSI_ANZ_12


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_VERSI_ANZ_24


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_VOLLSORTIMENT


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_WEIN_FEINKOST


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing EWDICHTE


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing FINANZTYP


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing FINANZ_ANLEGER


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing FINANZ_HAUSBAUER


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing FINANZ_MINIMALIST


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing FINANZ_SPARER


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing FINANZ_UNAUFFAELLIGER


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing FINANZ_VORSORGER


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing GEBAEUDETYP


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing GEBURTSJAHR


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing HEALTH_TYP


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing HH_EINKOMMEN_SCORE


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing INNENSTADT


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_ALTER1


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_ALTER2


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_ALTER3


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_ALTER4


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_ANHANG


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_ANTG1


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_ANTG2


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_ANTG3


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_ANTG4


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_AUTOQUOT


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_BAUMAX


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_CCM1


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_CCM2


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_CCM3


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_CCM4


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_DIESEL


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_FRAU


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_GBZ


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_HERST1


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_HERST2


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_HERST3


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_HERST4


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_HERST5


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_HERSTTEMP


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_KRSAQUOT


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_KRSHERST1


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_KRSHERST2


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_KRSHERST3


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_KRSKLEIN


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_KRSOBER


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_KRSVAN


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_KRSZUL


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_KW1


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_KW2


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_KW3


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_MAXAH


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_MAXBJ


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_MAXHERST


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_MAXSEG


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_MAXVORB


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_MOD1


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_MOD2


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_MOD3


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_MOD4


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_MOD8


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_MODTEMP


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_MOTOR


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_MOTRAD


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_SEG1


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_SEG10


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_SEG2


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_SEG3


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_SEG4


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_SEG5


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_SEG6


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_SEG7


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_SEG8


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_SEG9


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_VORB0


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_VORB1


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_VORB2


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_ZUL1


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_ZUL2


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_ZUL3


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA05_ZUL4


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_ALTERHALTER_30


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_ALTERHALTER_45


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_ALTERHALTER_60


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_ALTERHALTER_61


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_AUDI


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_AUTOQUOTE


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_BJ_1999


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_BJ_2000


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_BJ_2004


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_BJ_2006


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_BJ_2008


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_BJ_2009


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_BMW


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_CCM_1000


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_CCM_1200


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_CCM_1400


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_CCM_0_1400


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_CCM_1500


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_CCM_1600


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_CCM_1800


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_CCM_2000


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_CCM_2500


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_CCM_2501


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_CCM_3000


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_CCM_3001


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_FAB_ASIEN


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_FAB_SONSTIGE


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_FIAT


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_FORD


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_HALTER_20


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_HALTER_25


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_HALTER_30


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_HALTER_35


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_HALTER_40


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_HALTER_45


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_HALTER_50


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_HALTER_55


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_HALTER_60


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_HALTER_65


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_HALTER_66


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_HERST_ASIEN


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_HERST_AUDI_VW


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_HERST_BMW_BENZ


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_HERST_EUROPA


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_HERST_FORD_OPEL


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_HERST_SONST


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_KMH_110


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_KMH_140


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_KMH_180


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_KMH_0_140


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_KMH_140_210


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_KMH_211


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_KMH_250


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_KMH_251


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_KRSAQUOT


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_KRSHERST_AUDI_VW


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_KRSHERST_BMW_BENZ


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_KRSHERST_FORD_OPEL


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_KRSSEG_KLEIN


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_KRSSEG_OBER


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_KRSSEG_VAN


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_KRSZUL_NEU


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_KW_30


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_KW_40


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_KW_50


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_KW_60


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_KW_0_60


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_KW_70


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_KW_61_120


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_KW_80


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_KW_90


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_KW_110


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_KW_120


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_KW_121


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_MAZDA


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_MERCEDES


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_MOTOR


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_NISSAN


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_OPEL


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_PEUGEOT


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_RENAULT


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_SEG_GELAENDEWAGEN


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_SEG_GROSSRAUMVANS


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_SEG_KLEINST


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_SEG_KLEINWAGEN


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_SEG_KOMPAKTKLASSE


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_SEG_MINIVANS


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_SEG_MINIWAGEN


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_SEG_MITTELKLASSE


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_SEG_OBEREMITTELKLASSE


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_SEG_OBERKLASSE


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_SEG_SONSTIGE


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_SEG_SPORTWAGEN


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_SEG_UTILITIES


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_SEG_VAN


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_SEG_WOHNMOBILE


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_SITZE_4


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_SITZE_5


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_SITZE_6


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_TOYOTA


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_VORB_0


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_VORB_1


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_VORB_1_2


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_VORB_2


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_VORB_3


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_VW


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KKK


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing LP_FAMILIE_FEIN


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing LP_FAMILIE_GROB


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing LP_LEBENSPHASE_FEIN


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing LP_LEBENSPHASE_GROB


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing LP_STATUS_FEIN


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing LP_STATUS_GROB


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing NATIONALITAET_KZ


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing ORTSGR_KLS9


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing OST_WEST_KZ


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing PLZ8_ANTG1


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing PLZ8_ANTG2


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing PLZ8_ANTG3


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing PLZ8_ANTG4


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing PLZ8_GBZ


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing PLZ8_HHZ


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing PRAEGENDE_JUGENDJAHRE


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing REGIOTYP


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing RELAT_AB


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing RETOURTYP_BK_S


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing SEMIO_DOM


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing SEMIO_ERL


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing SEMIO_FAM


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing SEMIO_KAEM


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing SEMIO_KRIT


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing SEMIO_KULT


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing SEMIO_LUST


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing SEMIO_MAT


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing SEMIO_PFLICHT


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing SEMIO_RAT


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing SEMIO_REL


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing SEMIO_SOZ


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing SEMIO_TRADV


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing SEMIO_VERT


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing SHOPPER_TYP


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing TITEL_KZ


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing VERS_TYP


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing WOHNDAUER_2008


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing WOHNLAGE


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing W_KEIT_KIND_HH


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing ZABEOTYP


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing ARBEIT


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing ALTERSKATEGORIE_FEIN


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_SOZIALES


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_TELKO_ONLINE_QUOTE_12


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_VERSI_DATUM


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_VERSI_OFFLINE_DATUM


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing D19_VERSI_ONLINE_DATUM


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_ANTG1


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_ANTG2


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_ANTG3


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_ANTG4


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_CCM_1401_2500


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_GBZ


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_HHZ


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_KMH_210


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KOMBIALTER


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))


Processing KBA13_BAUMAX


HBox(children=(FloatProgress(value=0.0, max=42833.0), HTML(value='')))




In [77]:
pd.DataFrame(test).to_csv(os.path.join(data_dir, 'test.csv'), header=False, index=False)

In [78]:
test_location = session.upload_data(os.path.join(data_dir, 'test.csv'), key_prefix=prefix)

In [79]:
xgb_transformer.transform(test_location, content_type='text/csv', split_type='Line')

In [80]:
xgb_transformer.wait()

.............................[32m2020-09-02T12:23:16.018:[sagemaker logs]: MaxConcurrentTransforms=4, MaxPayloadInMB=6, BatchStrategy=MULTI_RECORD[0m
[34mArguments: serve[0m
[34m[2020-09-02 12:23:15 +0000] [1] [INFO] Starting gunicorn 19.7.1[0m
[34m[2020-09-02 12:23:15 +0000] [1] [INFO] Listening at: http://0.0.0.0:8080 (1)[0m
[34m[2020-09-02 12:23:15 +0000] [1] [INFO] Using worker: gevent[0m
[34m[2020-09-02 12:23:15 +0000] [36] [INFO] Booting worker with pid: 36[0m
[35mArguments: serve[0m
[35m[2020-09-02 12:23:15 +0000] [1] [INFO] Starting gunicorn 19.7.1[0m
[35m[2020-09-02 12:23:15 +0000] [1] [INFO] Listening at: http://0.0.0.0:8080 (1)[0m
[35m[2020-09-02 12:23:15 +0000] [1] [INFO] Using worker: gevent[0m
[35m[2020-09-02 12:23:15 +0000] [36] [INFO] Booting worker with pid: 36[0m
[34m[2020-09-02 12:23:15 +0000] [37] [INFO] Booting worker with pid: 37[0m
[34m[2020-09-02 12:23:15 +0000] [38] [INFO] Booting worker with pid: 38[0m
[34m[2020-09-02 12:23:15 +0000]

In [81]:
!aws s3 cp --recursive $xgb_transformer.output_path $data_dir


Unknown options: model


In [82]:
Y_pred = pd.read_csv('s3://sagemaker-us-east-1-104297441907/xgboost-200902-1114-006-b7695509-2020-09-02-11-43-37-705/test.csv.out',header=None)

In [83]:
Y_pred.to_csv('test_output.csv')

In [84]:
Y_pred

Unnamed: 0,0
0,0.026718
1,0.026718
2,0.010001
3,0.008961
4,0.015184
...,...
42828,0.012146
42829,0.008876
42830,0.021401
42831,0.008961


In [85]:
test

Unnamed: 0,LNR,AGER_TYP,AKT_DAT_KL,ALTER_HH,ALTERSKATEGORIE_FEIN,ANZ_HAUSHALTE_AKTIV,ANZ_HH_TITEL,ANZ_KINDER,ANZ_PERSONEN,ANZ_STATISTISCHE_HAUSHALTE,...,WOHNLAGE,ZABEOTYP,ANREDE_KZ,ALTERSKATEGORIE_GROB,PRAEGENDE_JUGENDJAHRE_DECADE,PRAEGENDE_JUGENDJAHRE_MOVEMENT,CAMEO_DEU_2015_1,CAMEO_DEU_2015_2,CAMEO_INTL_2015_WEALTH,CAMEO_INTL_2015_LIFE_STAGE
0,1754,2,1,7,6,2,0,0,2,2,...,3,3,1,4,1,2,2,1,1,3
1,1770,2,1,9,9,20,0,0,1,21,...,5,3,1,4,3,1,5,2,3,1
2,1465,2,9,16,11,2,0,0,4,2,...,4,3,2,4,3,2,7,2,4,1
3,1470,2,7,9,9,1,0,0,0,1,...,2,3,2,4,3,1,2,1,1,3
4,1478,1,1,21,13,1,0,0,4,1,...,7,4,2,4,4,1,5,2,3,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
42828,67615,2,1,9,9,2,0,0,2,2,...,3,3,2,1,2,1,6,1,2,4
42829,67938,2,1,10,10,2,0,0,1,2,...,3,3,1,4,2,1,6,2,3,1
42830,67942,2,1,16,15,1,0,2,3,1,...,3,2,1,3,5,2,4,3,2,4
42831,67949,2,1,9,9,1,0,0,1,1,...,3,3,2,3,4,2,1,1,1,4


In [86]:
test['RESPONSE'] = Y_pred

In [88]:
test

Unnamed: 0,LNR,AGER_TYP,AKT_DAT_KL,ALTER_HH,ALTERSKATEGORIE_FEIN,ANZ_HAUSHALTE_AKTIV,ANZ_HH_TITEL,ANZ_KINDER,ANZ_PERSONEN,ANZ_STATISTISCHE_HAUSHALTE,...,ZABEOTYP,ANREDE_KZ,ALTERSKATEGORIE_GROB,PRAEGENDE_JUGENDJAHRE_DECADE,PRAEGENDE_JUGENDJAHRE_MOVEMENT,CAMEO_DEU_2015_1,CAMEO_DEU_2015_2,CAMEO_INTL_2015_WEALTH,CAMEO_INTL_2015_LIFE_STAGE,RESPONSE
0,1754,2,1,7,6,2,0,0,2,2,...,3,1,4,1,2,2,1,1,3,0.026718
1,1770,2,1,9,9,20,0,0,1,21,...,3,1,4,3,1,5,2,3,1,0.026718
2,1465,2,9,16,11,2,0,0,4,2,...,3,2,4,3,2,7,2,4,1,0.010001
3,1470,2,7,9,9,1,0,0,0,1,...,3,2,4,3,1,2,1,1,3,0.008961
4,1478,1,1,21,13,1,0,0,4,1,...,4,2,4,4,1,5,2,3,1,0.015184
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
42828,67615,2,1,9,9,2,0,0,2,2,...,3,2,1,2,1,6,1,2,4,0.012146
42829,67938,2,1,10,10,2,0,0,1,2,...,3,1,4,2,1,6,2,3,1,0.008876
42830,67942,2,1,16,15,1,0,2,3,1,...,2,1,3,5,2,4,3,2,4,0.021401
42831,67949,2,1,9,9,1,0,0,1,1,...,3,2,3,4,2,1,1,1,4,0.008961


In [89]:
submit = test[['LNR','RESPONSE']]

In [96]:
submit.to_csv('submit.csv')

In [None]:
xgb_predictor = xgb.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge')

In [91]:
from sagemaker.predictor import csv_serializer
xgb_predictor.content_type = 'text/csv'
xgb_predictor.serializer = csv_serializer

In [None]:
Y_pred = xgb_predictor.predict(test.drop(['RESPONSE'],axis=1).values)

In [95]:
xgb_predictor.delete_endpoint()