In [1]:
import os
import warnings
from datetime import datetime

import numpy as np
import matplotlib.pyplot as plt

!pip install folium
import folium

warnings.filterwarnings('ignore')

  from cryptography.utils import int_from_bytes
  from cryptography.utils import int_from_bytes
You should consider upgrading via the '/home/ec2-user/anaconda3/envs/amazonei_tensorflow_p36/bin/python -m pip install --upgrade pip' command.[0m


In [2]:
def load_data(_input_dir, _data_size=-1):
    """
    :param _input_dir: input directory name
                      AWS S3 directory name, where the input files are stored
    :param _data_size: size of data
                      Data size, that needs to be tested, by default it takes value of
                      -1, which means consider all the data
    :return:
            the demand data
    """
    import pandas as pd
    from pandas import DataFrame

    # load all the data
    months = ["apr", "may", "jun", "jul", "aug", "sep"]
    file_format = "uber-processed-data-{}14.csv"
    _data = DataFrame()
    for month in months:
        file_name = _input_dir + "/" + file_format.format(month)
        df_sub = pd.read_csv(file_name)
        _data = _data.append(df_sub)

    # sample the data
    if _data_size > 0:
        _data = _data.sample(n=_data_size)

    _demand_wh = (_data.groupby(['zip', 'weekday', 'hour']).count()['Date/Time']).reset_index()
    _demand_wh.columns = ['Zip', 'Weekday', 'Hour', 'Number of Trips']

    return _demand_wh

In [3]:
import io
import s3fs
import json
import boto3 

def visualize_demand(_input_bucket, _data_size, _weekdays, _hours):
    # generate demand data
    demand_wh = load_data(
        _input_dir=f"s3://{_input_bucket}", _data_size=_data_size,
    )
    
    _weekday_key = "".join([str(_w) for _w in _weekdays])
    _hour_key = "".join([str(_h) for _h in _hours])
    
    # filterout the weekdays
    missing_weekdays = list(set([_w for _w in range(7)]).symmetric_difference(_weekdays))
    for m_weekday in missing_weekdays:
        demand_wh = demand_wh[demand_wh["Weekday"] != m_weekday]
    
    # filterout the hour
    missing_hours = list(set([_h for _h in range(24)]).symmetric_difference(_hours))
    for m_hour in missing_hours:
        demand_wh = demand_wh[demand_wh["Hour"] != m_hour]

    # load the initial geojson data for NEWYORK
    s3 = boto3.resource('s3')
    s3_object = s3.Object(_input_bucket, 'nyc.geojson')
    json_input_data = s3_object.get()['Body'].read().decode('utf-8')
    geo_data = json.loads(json_input_data)

    # filterout the geojson data for zip from our dataset
    geozips = []
    for i in range(len(geo_data['features'])):
        if int(geo_data['features'][i]['properties']['postalCode']) in list(demand_wh['Zip'].unique()):
            geo_data["features"][i]['properties']['postalCode'] = int(geo_data["features"][i]['properties']['postalCode'])
            geozips.append(geo_data['features'][i])

    updated_json = dict.fromkeys(['type','features'])
    updated_json['type'] = 'FeatureCollection'
    updated_json['features'] = geozips

    # store the updated json to s3 bucket
    _updated_bucket = _input_bucket + "-processed-data"
    s3.create_bucket(Bucket=_updated_bucket)
    s3object = s3.Object(_input_bucket + "-processed-data", 'nyc-updated.json')
    s3object.put(
        Body=(bytes(json.dumps(updated_json, sort_keys=True, indent=4, separators=(',', ': ')).encode('UTF-8')))
    )

    # function to create map
    def create_map(table, zips, mapped_feature):
        
        # reading updated json data
        s3 = boto3.resource('s3')
        s3_object = s3.Object(_updated_bucket, 'nyc-updated.json')
        data = s3_object.get()['Body'].read().decode('utf-8')
        ny_geo = json.loads(data)

        # initializing the map
        m = folium.Map(location = [40.7128, -74.0060], zoom_start = 11)
        m.choropleth(
            geo_data = ny_geo,
            fill_opacity = 1,
            line_opacity = 0.2,
            data = table,
            key_on = 'feature.properties.postalCode',
            columns = [zips, mapped_feature],
            fill_color = 'YlGnBu',
            legend_name = (' ').join(mapped_feature.split('_')).title() + ' Across NY'
        )
        folium.LayerControl().add_to(m)
        final_map_file = f"{mapped_feature}_w{_weekday_key}_h{_hour_key}_map.html"
        m.save(outfile=final_map_file)
        
        # writing the html file to s3 bucket
        with open(final_map_file, 'rb') as f:
            _map_bucket = _input_bucket + "-map-output"
            s3.create_bucket(Bucket=_map_bucket)
            s3 = boto3.client('s3')
            s3.put_object(Bucket=_map_bucket, Key=final_map_file, Body=f)

    create_map(demand_wh, 'Zip', 'Number of Trips')

s3_bucket_name = "cloud-project-x"

# full week
visualize_demand(
    _input_bucket=s3_bucket_name, _data_size=-1, 
    _weekdays=[w for w in range(7)], _hours=[h for h in range(24)]
)

# monday wednesday friday
visualize_demand(
    _input_bucket=s3_bucket_name, _data_size=-1, 
    _weekdays=[0, 2, 4], _hours=[h for h in range(24)]
)

# tuesday thursday
visualize_demand(
    _input_bucket=s3_bucket_name, _data_size=-1, 
    _weekdays=[1, 3], _hours=[h for h in range(24)]
)

# saturday sunday
visualize_demand(
    _input_bucket=s3_bucket_name, _data_size=-1, 
    _weekdays=[5, 6], _hours=[h for h in range(24)]
)