In [1]:
# import libraries
import boto3
import sagemaker
from sagemaker import get_execution_role

# Define IAM role
role = get_execution_role()
prefix = 'sagemaker/DEMO-xgboost-dm'
my_region = boto3.session.Session().region_name # set the region of the instance

# this line automatically looks for the XGBoost image URI and builds an XGBoost container.
xgboost_container = sagemaker.image_uris.retrieve("xgboost", my_region, "latest")

print("Success - the MySageMakerInstance is in the " + my_region + " region. You will use the " + xgboost_container + " container for your SageMaker endpoint.")

Success - the MySageMakerInstance is in the us-east-1 region. You will use the 811284229777.dkr.ecr.us-east-1.amazonaws.com/xgboost:latest container for your SageMaker endpoint.


In [2]:
bucket_name = 'awsgis' # <--- CHANGE THIS VARIABLE TO A UNIQUE NAME FOR YOUR BUCKET
s3 = boto3.resource('s3')
try:
    if  my_region == 'us-east-1':
      s3.create_bucket(Bucket=bucket_name)
    else: 
      s3.create_bucket(Bucket=bucket_name, CreateBucketConfiguration={ 'LocationConstraint': my_region })
    print('S3 bucket created successfully')
except Exception as e:
    print('S3 error: ',e)

S3 error:  An error occurred (IllegalLocationConstraintException) when calling the CreateBucket operation: The unspecified location constraint is incompatible for the region specific endpoint this request was sent to.


In [3]:
import os
import warnings
from datetime import datetime

import numpy as np
import matplotlib.pyplot as plt

!pip install folium
import folium

warnings.filterwarnings('ignore')

In [42]:
def load_data(_input_dir, _data_size=-1):
    """
    :param _input_dir: input directory name
                      AWS S3 directory name, where the input files are stored
    :param _data_size: size of data
                      Data size, that needs to be tested, by default it takes value of
                      -1, which means consider all the data
    :return:
            the processed data, and demand data
    """
    import pandas as pd
    from pandas import DataFrame

    # load all the data
    months = ["apr", "may", "jun", "jul", "aug", "sep"]
    file_format = "uber-processed-data-{}14.csv"
    _data = DataFrame()
    for month in months:
        file_name = _input_dir + "/" + file_format.format(month)
        df_sub = pd.read_csv(file_name)
        _data = _data.append(df_sub)

    # sample the data
    if _data_size > 0:
        _data = _data.sample(n=_data_size)

    _demand_wh = (_data.groupby(['zip', 'weekday', 'hour']).count()['Date/Time']).reset_index()
    _demand_wh.columns = ['Zip', 'Weekday', 'Hour', 'Number of Trips']

    return _demand_wh

In [58]:
demand_wh = load_data(
    _input_dir="s3://cloud-project-x", _data_size=-1,
)

import io
import s3fs
import json
import boto3 

s3 = boto3.resource('s3')
bucket = 'cloud-project-x'
key = 'nyc.geojson'
obj = s3.Object(bucket, key)
data = obj.get()['Body'].read().decode('utf-8')
tmp = json.loads(data)

geozips = []
for i in range(len(tmp['features'])):
    if int(tmp['features'][i]['properties']['postalCode']) in list(demand_wh['Zip'].unique()):
        tmp["features"][i]['properties']['postalCode'] = int(tmp["features"][i]['properties']['postalCode'])
        geozips.append(tmp['features'][i])

new_json = dict.fromkeys(['type','features'])
new_json['type'] = 'FeatureCollection'
new_json['features'] = geozips

s3object = s3.Object('cloud-project-x', 'updated-file.json')
s3object.put(
    Body=(bytes(json.dumps(new_json, sort_keys=True, indent=4, separators=(',', ': ')).encode('UTF-8')))
)

def create_map(table, zips, mapped_feature):
    s3 = boto3.resource('s3')
    bucket = 'cloud-project-x'
    key = 'updated-file.json'
    obj = s3.Object(bucket, key)
    data = obj.get()['Body'].read().decode('utf-8')
    ny_geo = json.loads(data)

    m = folium.Map(location = [40.7128, -74.0060], zoom_start = 11)
    m.choropleth(
        geo_data = ny_geo,
        fill_opacity = 1,
        line_opacity = 0.2,
        data = table,
        key_on = 'feature.properties.postalCode',
        columns = [zips, mapped_feature],
        fill_color = 'YlGnBu',
        legend_name = (' ').join(mapped_feature.split('_')).title() + ' Across NY'
    )
    folium.LayerControl().add_to(m)
    m.save(outfile = mapped_feature + '_map.html')
    with open(mapped_feature + '_map.html', 'rb') as f:
        s3 = boto3.client('s3')
        s3.put_object(Bucket='cloud-project-x', Key=mapped_feature + '_map.html', Body=f)

create_map(demand_wh, 'Zip', 'Number of Trips')