# Set up and configuration
Run next cell to load the data stored in S3 bucket to Redshif table. If you are willing to discover about real-time ML on streaming data, please follow this blog post: https://aws.amazon.com/blogs/big-data/real-time-analytics-with-amazon-redshift-streaming-ingestion/

### Create Schema && Table

In [None]:
CREATE SCHEMA <<YOUR_SCHEMA_NAME>>;

In [None]:
CREATE TABLE <<YOUR_SCHEMA_NAME>>.<<YOUR_TABLE_NAME>> (
	age DECIMAL NOT NULL, 
	job VARCHAR NOT NULL, 
	marital VARCHAR NOT NULL, 
	education VARCHAR NOT NULL, 
	credit_default VARCHAR NOT NULL, 
	housing VARCHAR NOT NULL, 
	loan VARCHAR NOT NULL, 
	contact VARCHAR NOT NULL, 
	month VARCHAR NOT NULL, 
	day_of_week VARCHAR NOT NULL, 
	duration DECIMAL NOT NULL, 
	campaign DECIMAL NOT NULL, 
	pdays DECIMAL NOT NULL, 
	previous DECIMAL NOT NULL, 
	poutcome VARCHAR NOT NULL, 
	emp_var_rate DECIMAL NOT NULL, 
	cons_price_idx DECIMAL NOT NULL, 
	cons_conf_idx DECIMAL NOT NULL, 
	euribor3m DECIMAL NOT NULL, 
	nr_employed DECIMAL NOT NULL, 
	y BOOLEAN NOT NULL
);

### Load data to Redshift Database table

In [None]:
COPY <<YOUR_SCHEMA_NAME>>.<<YOUR_TABLE_NAME>> FROM 's3://<<YOUR_S3_BUCKET>>/bank-additional-full.csv' IAM_ROLE '<<YOUR_IAM_ROLE_ARN>>' FORMAT AS CSV DELIMITER ',' QUOTE '"' IGNOREHEADER 1 REGION AS '<<YOUR_AWS_REGION>>'

### Create Model SQL Command

In [None]:
CREATE MODEL model_name
    FROM { table_name | ( select_statement )  | 'job_name' }
    [ TARGET column_name ]
    FUNCTION function_name ( data_type [, ...] ) 
    IAM_ROLE { default | 'arn:aws:iam::<account-id>:role/<role-name>' }
    [ AUTO ON / OFF ]
      -- default is AUTO ON
    [ MODEL_TYPE { XGBOOST | MLP | LINEAR_LEARNER | KMEANS } ]
      -- not required for non AUTO OFF case, default is the list of all supported types
      -- required for AUTO OFF
    [ PROBLEM_TYPE ( REGRESSION | BINARY_CLASSIFICATION | MULTICLASS_CLASSIFICATION ) ]
      -- not supported when AUTO OFF
    [ OBJECTIVE ( 'MSE' | 'Accuracy' | 'F1' | 'F1_Macro' | 'AUC' |
                  'reg:squarederror' | 'reg:squaredlogerror'| 'reg:logistic'|
                  'reg:pseudohubererror' | 'reg:tweedie' | 'binary:logistic' | 'binary:hinge',
                  'multi:softmax' ) ]
      -- for AUTO ON: first 5 are valid
      -- for AUTO OFF: 6-13 are valid
    [ PREPROCESSORS 'string' ]
      -- required for AUTO OFF, when it has to be 'none'
      -- optional for AUTO ON
    [ HYPERPARAMETERS { DEFAULT | DEFAULT EXCEPT ( Key 'value' (,...) ) } ]
      -- support XGBoost hyperparameters, except OBJECTIVE
      -- required and only allowed for AUTO OFF
      -- default NUM_ROUND is 100
      -- NUM_CLASS is required if objective is multi:softmax (only possible for AUTO OFF)
     [ SETTINGS (
       S3_BUCKET 'bucket',  |
        -- required
      KMS_KEY_ID 'kms_string', |
        -- optional
      S3_GARBAGE_COLLECT on / off, |
        -- optional, defualt is on.
      MAX_CELLS integer, |
        -- optional, default is 1,000,000
      MAX_RUNTIME integer (, ...) |
        -- optional, default is 5400 (1.5 hours)
      HORIZON integer, |
        -- required if creating a forecast model
      FREQUENCY integer, |
        -- required if creating a forecast model
      PERCENTILES string
        -- optional if creating a forecast model
    ) ]