In [109]:
import json, glob, boto3, os
import pdb
import pandas as pd

In [110]:
# from https://alexwlchan.net/2019/07/listing-s3-keys/
def get_matching_s3_objects(bucket, prefix="", suffix=""):
    """
    Generate objects in an S3 bucket.

    :param bucket: Name of the S3 bucket.
    :param prefix: Only fetch objects whose key starts with
        this prefix (optional).
    :param suffix: Only fetch objects whose keys end with
        this suffix (optional).
    """
    s3 = boto3.client("s3")
    paginator = s3.get_paginator("list_objects_v2")

    kwargs = {'Bucket': bucket}

    # We can pass the prefix directly to the S3 API.  If the user has passed
    # a tuple or list of prefixes, we go through them one by one.
    if isinstance(prefix, str):
        prefixes = (prefix, )
    else:
        prefixes = prefix

    for key_prefix in prefixes:
        kwargs["Prefix"] = key_prefix

        for page in paginator.paginate(**kwargs):
            try:
                contents = page["Contents"]
            except KeyError:
                return

            for obj in contents:
                key = obj["Key"]
                if key.endswith(suffix):
                    yield obj


def get_matching_s3_keys(bucket, prefix="", suffix=""):
    """
    Generate the keys in an S3 bucket.

    :param bucket: Name of the S3 bucket.
    :param prefix: Only fetch keys that start with this prefix (optional).
    :param suffix: Only fetch keys that end with this suffix (optional).
    """
    for obj in get_matching_s3_objects(bucket, prefix, suffix):
        yield obj["Key"]

In [111]:
session = boto3.Session()
BUCKET_NAME = 'snowbot-pv'

# S3 Connect
s3 = session.resource('s3')

bucket = s3.Bucket(BUCKET_NAME)

In [112]:
DATA_DIR = "./data/"
MERGED_FILENAME = "merged_file.json"
merged_file = DATA_DIR + MERGED_FILENAME

result = []

for f in get_matching_s3_keys(BUCKET_NAME, suffix=".json"):
    
    # Write the file from S3 into a local temp file
    with open('temp', 'wb') as tfw:
        bucket.download_fileobj(f, tfw)

    # Append the local temp file into the result list
    with open('temp', 'rb') as tfr:          
        result.append(json.load(tfr))
        
os.remove("temp")

# Fill the output file with the merged content
with open(merged_file, "w") as outfile:
     json.dump(result, outfile)

In [None]:
# add:
# save other data for whistler
# daily: for each chair calculate most open status of the day: O > H > X
# Days since each chair was last seen open with timestamp of most recent open time.
# snowfall since last open

In [116]:
#load the merged json as a dataframe
with open(merged_path, "r") as f:
    d = json.load(f)
    df = pd.DataFrame.from_dict(json_normalize(d, record_path='lifts', meta='timestamp'))

In [117]:
# set datatypes
df = df.astype({
    "liftID": 'category',
    "resortID": 'category',
    "liftName": 'category',
    "status": 'category',
    "timeToRide": "int"
})
df["timestamp"] = pd.to_datetime(df["timestamp"])

In [118]:
df

Unnamed: 0,liftID,resortID,liftName,status,timeToRide,timestamp
0,69,13,Blackcomb Gondola Lower,X,7,2020-01-01 21:48:15.788181
1,70,13,Blackcomb Gondola Upper,X,7,2020-01-01 21:48:15.788181
2,5,13,Excalibur Gondola Lower,X,3,2020-01-01 21:48:15.788181
3,71,13,Excalibur Gondola Upper,X,5,2020-01-01 21:48:15.788181
4,8,13,Excelerator Express,X,6,2020-01-01 21:48:15.788181
...,...,...,...,...,...,...
1329,44,13,Franz's Chair,X,8,2020-01-01 21:38:17.789306
1330,43,13,Peak Express,X,3,2020-01-01 21:38:17.789306
1331,37,13,Harmony 6 Express,X,6,2020-01-01 21:38:17.789306
1332,42,13,Symphony Express,X,7,2020-01-01 21:38:17.789306


In [119]:
def get_status_changes(df):
    '''Returns a dataframe that only includes the times when there was a change to a new status'''
    df = df.groupby('liftName', group_keys=False)\
           .apply(lambda x: x[x.status.ne(x.status.shift())])\
           .reset_index(drop=True)
    return df

In [121]:
get_status_changes(df)

Unnamed: 0,liftID,resortID,liftName,status,timeToRide,timestamp
0,3,13,7th Heaven Express,O,6,2020-01-02 14:27:25.929318-08:00
1,3,13,7th Heaven Express,X,6,2020-01-02 14:49:08.207807-08:00
2,3,13,7th Heaven Express,O,6,2020-01-02 17:49:08.482345
3,3,13,7th Heaven Express,X,6,2020-01-01 21:46:01.659990
4,36,13,Big Red Express,O,8,2020-01-02 14:27:25.929318-08:00
...,...,...,...,...,...,...
79,33,13,Whistler Village Gondola Lower,X,5,2020-01-01 21:46:01.659990
80,72,13,Whistler Village Gondola Upper,O,11,2020-01-02 14:27:25.929318-08:00
81,72,13,Whistler Village Gondola Upper,X,11,2020-01-02 15:19:09.683749-08:00
82,72,13,Whistler Village Gondola Upper,O,11,2020-01-02 17:49:08.482345
