# Hands-On Tutorial: Automated Predictive (APL) in SAP HANA Cloud
## Data upload
https://blogs.sap.com/2020/07/27/hands-on-tutorial-automated-predictive-apl-in-sap-hana-cloud/

In [1]:
import hana_ml
print(hana_ml.__version__)

2.18.23092701


### Connect to SAP HANA Cloud

In [6]:
import hana_ml.dataframe as dataframe
conn = dataframe.ConnectionContext(userkey = 'MYHANACLOUD',
                                   encrypt = 'true')
conn.connection.isconnected()

True

### Prepare and upload data to train a Machine Learning model

In [8]:
import pandas as pd
df_data = pd.read_csv('bank-full.csv', sep = ';')
df_data.head(5)

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,58,management,married,tertiary,no,2143,yes,no,unknown,5,may,261,1,-1,0,unknown,no
1,44,technician,single,secondary,no,29,yes,no,unknown,5,may,151,1,-1,0,unknown,no
2,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,may,76,1,-1,0,unknown,no
3,47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,may,92,1,-1,0,unknown,no
4,33,unknown,single,unknown,no,1,no,no,unknown,5,may,198,1,-1,0,unknown,no


In [9]:
df_data.columns = map(str.upper, df_data.columns)

In [10]:
df_data = df_data.drop(['DURATION'],
                       axis = 1)

In [11]:
df_data = df_data.rename(index = str, columns = {'Y': 'PURCHASE'})

In [12]:
df_data.insert(0, 'CUSTOMER_ID', df_data.reset_index().index)

In [13]:
df_data.head(5)

Unnamed: 0,CUSTOMER_ID,AGE,JOB,MARITAL,EDUCATION,DEFAULT,BALANCE,HOUSING,LOAN,CONTACT,DAY,MONTH,CAMPAIGN,PDAYS,PREVIOUS,POUTCOME,PURCHASE
0,0,58,management,married,tertiary,no,2143,yes,no,unknown,5,may,1,-1,0,unknown,no
1,1,44,technician,single,secondary,no,29,yes,no,unknown,5,may,1,-1,0,unknown,no
2,2,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,may,1,-1,0,unknown,no
3,3,47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,may,1,-1,0,unknown,no
4,4,33,unknown,single,unknown,no,1,no,no,unknown,5,may,1,-1,0,unknown,no


In [15]:
df_remote = dataframe.create_dataframe_from_pandas(connection_context = conn, 
                                                   pandas_df = df_data, 
                                                   table_name = 'BANKMARKETING',
                                                   force = True,
                                                   replace = False)

100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.00s/it]


### Prepare and upload data to create predictions on

In [30]:
df_topredict = pd.DataFrame(data = None, 
                            columns = df_data.columns.drop('PURCHASE'))
for xx in df_topredict.columns:
    df_topredict[xx] = df_topredict[xx].astype(df_data[xx].dtypes.name)

In [31]:
df_topredict = pd.concat([df_topredict, pd.DataFrame({'CUSTOMER_ID': 1, 
                                    'AGE': 40, 
                                    'JOB': 'entrepreneur', 
                                    'MARITAL': 'married', 
                                    'EDUCATION': 'secondary',  
                                    'DEFAULT': 'no', 
                                    'BALANCE': 3000,
                                    'HOUSING': 'yes',
                                    'LOAN': 'no',
                                    'CONTACT': 'unknown',
                                    'DAY': 10,
                                    'MONTH': 'may',
                                    'CAMPAIGN': 1,
                                    'PDAYS': -1,
                                    'PREVIOUS': 0,
                                    'POUTCOME': 'unknown'}, index=[0])])
df_topredict = pd.concat([df_topredict, pd.DataFrame({'CUSTOMER_ID': 2, 
                                    'AGE': 65, 
                                    'JOB': 'management', 
                                    'MARITAL': 'single', 
                                    'EDUCATION': 'tertiary',  
                                    'DEFAULT': 'no', 
                                    'BALANCE': 3000,
                                    'HOUSING': 'no',
                                    'LOAN': 'no',
                                    'CONTACT': 'telephone',
                                    'DAY': 10,
                                    'MONTH': 'mar',
                                    'CAMPAIGN': 1,
                                    'PDAYS': -1,
                                    'PREVIOUS': 12,
                                    'POUTCOME': 'success'}, index=[0])])

In [18]:
df_remote = dataframe.create_dataframe_from_pandas(connection_context = conn, 
                                                   pandas_df = df_topredict, 
                                                   table_name = 'BANKMARKETING_TOPREDICT',
                                                   force = True,
                                                   replace = False)

100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 12.94it/s]
