In [1]:
import json, glob, boto3
import pdb

In [None]:
def get_matching_s3_objects(bucket, prefix="", suffix=""):
    """
    Generate objects in an S3 bucket.

    :param bucket: Name of the S3 bucket.
    :param prefix: Only fetch objects whose key starts with
        this prefix (optional).
    :param suffix: Only fetch objects whose keys end with
        this suffix (optional).
    """
    s3 = boto3.client("s3")
    paginator = s3.get_paginator("list_objects_v2")

    kwargs = {'Bucket': bucket}

    # We can pass the prefix directly to the S3 API.  If the user has passed
    # a tuple or list of prefixes, we go through them one by one.
    if isinstance(prefix, str):
        prefixes = (prefix, )
    else:
        prefixes = prefix

    for key_prefix in prefixes:
        kwargs["Prefix"] = key_prefix

        for page in paginator.paginate(**kwargs):
            try:
                contents = page["Contents"]
            except KeyError:
                return

            for obj in contents:
                key = obj["Key"]
                if key.endswith(suffix):
                    yield obj


def get_matching_s3_keys(bucket, prefix="", suffix=""):
    """
    Generate the keys in an S3 bucket.

    :param bucket: Name of the S3 bucket.
    :param prefix: Only fetch keys that start with this prefix (optional).
    :param suffix: Only fetch keys that end with this suffix (optional).
    """
    for obj in get_matching_s3_objects(bucket, prefix, suffix):
        yield obj["Key"]

In [None]:
session = boto3.Session()
BUCKET_NAME = 'snowbot-pv'

# S3 Connect
s3 = session.resource('s3')

bucket = s3.Bucket(BUCKET_NAME)

In [None]:
result = []

for f in get_matching_s3_keys(BUCKET_NAME, suffix=".json"):
    
    # Write the file from S3 into a local temp file
    with open('temp', 'wb') as tfw:
        bucket.download_fileobj(f, tfw)

    # Append the local temp file into the result list
    with open('temp', 'rb') as tfr:          
        result.append(json.load(tfr))

In [209]:
with open("merged_file.json", "w") as outfile:    #filling the resultant file with Jason content
     json.dump(result, outfile)                 #json.dump will fill output file with merged data

In [195]:
with open('temp_new', 'wb') as tfn:
    bucket.download_fileobj("2020_01_01_21_48_wb_lifts.json", tfn)
    
    #print(json.load(tfn))
    #print(tfn.read())

In [197]:
with open('temp_new', 'rb') as test:
    print(json.load(test))

{'timestamp': '2020-01-01 21:48:49.440097', 'lifts': [{'liftID': 69, 'resortID': 13, 'liftName': 'Blackcomb Gondola Lower', 'status': 'X', 'timeToRide': '7'}, {'liftID': 70, 'resortID': 13, 'liftName': 'Blackcomb Gondola Upper', 'status': 'X', 'timeToRide': '7'}, {'liftID': 5, 'resortID': 13, 'liftName': 'Excalibur Gondola Lower', 'status': 'X', 'timeToRide': '3'}, {'liftID': 71, 'resortID': 13, 'liftName': 'Excalibur Gondola Upper', 'status': 'X', 'timeToRide': '5'}, {'liftID': 8, 'resortID': 13, 'liftName': 'Excelerator Express', 'status': 'X', 'timeToRide': '6'}, {'liftID': 6, 'resortID': 13, 'liftName': 'Magic Chair', 'status': 'X', 'timeToRide': '6'}, {'liftID': 4, 'resortID': 13, 'liftName': 'Jersey Cream Express', 'status': 'X', 'timeToRide': '5'}, {'liftID': 9, 'resortID': 13, 'liftName': 'Catskinner Express', 'status': 'X', 'timeToRide': '4'}, {'liftID': 22, 'resortID': 13, 'liftName': 'Peak 2 Peak Gondola', 'status': 'X', 'timeToRide': '12'}, {'liftID': 10, 'resortID': 13, 'l

In [172]:
    
with open('temp_new', 'r') as tfn:
    print(type(tfn))
    print(tfn.read().strip("'<>() ").replace('\'', '\"'))
    tfn_json = str(tfn.read()) #.strip("'<>() ").replace('\'', '\"')
    print(tfn_json)
    print(json.loads(tfn_json))

<class '_io.TextIOWrapper'>
{"timestamp": "2020-01-01 21:48:49.440097", "lifts": [{"liftID": 69, "resortID": 13, "liftName": "Blackcomb Gondola Lower", "status": "X", "timeToRide": "7"}, {"liftID": 70, "resortID": 13, "liftName": "Blackcomb Gondola Upper", "status": "X", "timeToRide": "7"}, {"liftID": 5, "resortID": 13, "liftName": "Excalibur Gondola Lower", "status": "X", "timeToRide": "3"}, {"liftID": 71, "resortID": 13, "liftName": "Excalibur Gondola Upper", "status": "X", "timeToRide": "5"}, {"liftID": 8, "resortID": 13, "liftName": "Excelerator Express", "status": "X", "timeToRide": "6"}, {"liftID": 6, "resortID": 13, "liftName": "Magic Chair", "status": "X", "timeToRide": "6"}, {"liftID": 4, "resortID": 13, "liftName": "Jersey Cream Express", "status": "X", "timeToRide": "5"}, {"liftID": 9, "resortID": 13, "liftName": "Catskinner Express", "status": "X", "timeToRide": "4"}, {"liftID": 22, "resortID": 13, "liftName": "Peak 2 Peak Gondola", "status": "X", "timeToRide": "12"}, {"lif

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [185]:
test = str(test.read())
test = test.strip("'<>() ").replace('\'', '\"')
test.strip('\'')

'b"{"timestamp": "2020-01-01 21:48:49.440097", "lifts": [{"liftID": 69, "resortID": 13, "liftName": "Blackcomb Gondola Lower", "status": "X", "timeToRide": "7"}, {"liftID": 70, "resortID": 13, "liftName": "Blackcomb Gondola Upper", "status": "X", "timeToRide": "7"}, {"liftID": 5, "resortID": 13, "liftName": "Excalibur Gondola Lower", "status": "X", "timeToRide": "3"}, {"liftID": 71, "resortID": 13, "liftName": "Excalibur Gondola Upper", "status": "X", "timeToRide": "5"}, {"liftID": 8, "resortID": 13, "liftName": "Excelerator Express", "status": "X", "timeToRide": "6"}, {"liftID": 6, "resortID": 13, "liftName": "Magic Chair", "status": "X", "timeToRide": "6"}, {"liftID": 4, "resortID": 13, "liftName": "Jersey Cream Express", "status": "X", "timeToRide": "5"}, {"liftID": 9, "resortID": 13, "liftName": "Catskinner Express", "status": "X", "timeToRide": "4"}, {"liftID": 22, "resortID": 13, "liftName": "Peak 2 Peak Gondola", "status": "X", "timeToRide": "12"}, {"liftID": 10, "resortID": 13,

In [168]:
print(tfn.read())

ValueError: I/O operation on closed file.

In [146]:
f = "/Users/paul/Downloads/2020_01_01_21_48_wb_lifts.json"

with open(f, "rb+") as infile:
    print(type(f))
    print(f)
    print(json.load(infile))

<class 'str'>
/Users/paul/Downloads/2020_01_01_21_48_wb_lifts.json
{'timestamp': '2020-01-01 21:48:49.440097', 'lifts': [{'liftID': 69, 'resortID': 13, 'liftName': 'Blackcomb Gondola Lower', 'status': 'X', 'timeToRide': '7'}, {'liftID': 70, 'resortID': 13, 'liftName': 'Blackcomb Gondola Upper', 'status': 'X', 'timeToRide': '7'}, {'liftID': 5, 'resortID': 13, 'liftName': 'Excalibur Gondola Lower', 'status': 'X', 'timeToRide': '3'}, {'liftID': 71, 'resortID': 13, 'liftName': 'Excalibur Gondola Upper', 'status': 'X', 'timeToRide': '5'}, {'liftID': 8, 'resortID': 13, 'liftName': 'Excelerator Express', 'status': 'X', 'timeToRide': '6'}, {'liftID': 6, 'resortID': 13, 'liftName': 'Magic Chair', 'status': 'X', 'timeToRide': '6'}, {'liftID': 4, 'resortID': 13, 'liftName': 'Jersey Cream Express', 'status': 'X', 'timeToRide': '5'}, {'liftID': 9, 'resortID': 13, 'liftName': 'Catskinner Express', 'status': 'X', 'timeToRide': '4'}, {'liftID': 22, 'resortID': 13, 'liftName': 'Peak 2 Peak Gondola', 's

In [68]:
result

[]

In [None]:
test.download_fileobj

In [9]:
client = boto3.client('s3')
client.list_objects(Bucket=BUCKET_NAME, Prefix="Jan")

{'ResponseMetadata': {'RequestId': '184604047F22FFA5',
  'HostId': 'pDbahna6N4UZWGH3f6aMFrrw00vx3Lc0Cz9ZVDNdXGSTe+axAvvkUiCLs3PwKQaI5vWxymJ4wDs=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'pDbahna6N4UZWGH3f6aMFrrw00vx3Lc0Cz9ZVDNdXGSTe+axAvvkUiCLs3PwKQaI5vWxymJ4wDs=',
   'x-amz-request-id': '184604047F22FFA5',
   'date': 'Thu, 02 Jan 2020 18:13:58 GMT',
   'x-amz-bucket-region': 'us-west-2',
   'content-type': 'application/xml',
   'transfer-encoding': 'chunked',
   'server': 'AmazonS3'},
  'RetryAttempts': 0},
 'IsTruncated': False,
 'Marker': '',
 'Contents': [{'Key': 'January 01, 2020 21:46wb_lifts.json',
   'LastModified': datetime.datetime(2020, 1, 2, 5, 46, 2, tzinfo=tzutc()),
   'ETag': '"05eecf73cad912b8fee16d510465979d"',
   'Size': 2948,
   'StorageClass': 'STANDARD',
   'Owner': {'DisplayName': 'vial.paul',
    'ID': 'dc55c0d2e12dc3724ee87621686dc16731b91d561d60ec19a5ae982ff90d3e10'}},
  {'Key': 'January 01, 2020ADD_LABEL_HERE.json',
   'LastModified': datetim