In [None]:
# Import necessary libraries

import boto3, re, sys, math, json, os, sagemaker, urllib.request
import sagemaker
from sagemaker.debugger import Rule, ProfilerRule, rule_configs
from sagemaker.session import TrainingInput
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import Image
from IPython.display import display
from time import gmtime, strftime

# Display AWS Region, IAM Role used by the SageMaker and SageMaker Image URI

region = sagemaker.Session().boto_region_name
print("AWS Region: {}".format(region))

role = sagemaker.get_execution_role()
print("RoleArn: {}".format(role))

container=sagemaker.image_uris.retrieve("xgboost", region, "1.2-1")
print("SageMaker Image:{}".format(container))

In [None]:
# Load environment variables

!pip install python-dotenv
%load_ext dotenv
%dotenv

In [None]:
# Creating a bucket in the ECS 

bucket_name = 'bucket_name' # <--- CHANGE THIS VARIABLE TO A UNIQUE NAME FOR YOUR BUCKET
ecs_access_key_id=os.environ['ECS_ACCESS_KEY_ID']  # <--- Environment variable for ECS Access Key
ecs_secret_access_key=os.environ['ECS_SECRET_ACCESS_KEY'] # <--- Environment variable for Secret Access Key
endpoint_url='https://1.2.3.4:5678' # <--- Replace this variable with the IP Address and Port number
s3 = boto3.resource(service_name='s3',aws_access_key_id=ecs_access_key_id,aws_secret_access_key=ecs_secret_access_key,endpoint_url=endpoint_url)
try:
    s3.create_bucket(Bucket=bucket_name)
    print('S3 bucket created successfully')
except Exception as e:
    print('S3 error: ',e)

In [None]:
# Download csv to train
try:
  urllib.request.urlretrieve ("https://d1.awsstatic.com/tmt/build-train-deploy-machine-learning-model-sagemaker/bank_clean.27f01fbbdf43271788427f3682996ae29ceca05d.csv", "bank_clean.csv")
  print('Success: downloaded bank_clean.csv.')
except Exception as e:
  print('Data load error: ',e)

try:
  model_data = pd.read_csv('./bank_clean.csv',index_col=0)
  print('Success: Data loaded into dataframe.')
except Exception as e:
    print('Data load error: ',e)

In [None]:
# Train data step
train_data, test_data = np.split(model_data.sample(frac=1, random_state=1729), [int(0.7 * len(model_data))])
print(train_data.shape, test_data.shape)

In [None]:
# Train data step
pd.concat([train_data['y_yes'], train_data.drop(['y_no', 'y_yes'], axis=1)], axis=1).to_csv('train.csv', index=False, header=False)

In [None]:
# Upload file to ECS bucket with "upload_" prefix
s3=boto3.resource(service_name='s3',aws_access_key_id=ecs_access_key_id,aws_secret_access_key=ecs_secret_access_key,endpoint_url=endpoint_url)
s3_data=s3.Bucket(bucket_name).Object(os.path.join('train/upload_train.csv')).upload_file('train.csv')

In [None]:
# Download file from ECS bucket
s3_ddata=s3.Bucket(bucket_name).Object(os.path.join('train/upload_train.csv')).download_file('download_train.csv')

# Check in the terminal for downloaded file
# You should be able to see download_train.csv and train.csv