In [13]:
import json
import statistics
import boto3
from decimal import Decimal

In [14]:
dynamodb = boto3.resource('dynamodb')
table = dynamodb.Table('CoffeeShops')
print(table)

dynamodb.Table(name='CoffeeShops')


In [15]:
def analyze(review):
    review = review.lower()
    coldbrew = ['cold-brew', 'cold brew']
    cappuccino = ['cappuccino']
    espresso = ['espresso']
    service = ['nice', 'friendly', 'service', 'patient']
    ambiance = ['study', 'ambiance', 'quite', 'vibe', 'chill']
    value = ['worth', 'cheap', 'value', 'price']
    filternames = ['coldbrew', 'cappuccino', 'espresso', 'service', 'ambiance', 'value']
    filters = [coldbrew, cappuccino, espresso, service, ambiance, value]

    counts = {
        'coldbrew': 0,
        'cappuccino': 0,
        'espresso': 0,
        'service': 0,
        'ambiance': 0,
        'value': 0
    }

    for i in range(len(filters)):
        category_count = 0
        for keyword in filters[i]:
            keyword_count = review.count(keyword)
            # print(keyword, keyword_count)
            category_count = category_count + keyword_count
        # print("\n")

        counts[filternames[i]] = counts[filternames[i]] + category_count
        
    # print(counts)

    return counts

In [16]:
import webbrowser
import datetime
import time
import boto3

shopDataList = []

with open('reviews_nyc.json') as f:
    data = json.load(f)
    seen_ids = []
    for shop in data:
        shopId = shop['title']
        if shopId in seen_ids:
            continue
        seen_ids.append(shopId)
        shopCid = shop['cid']
        address=shop['address']
        analysis = { "cappuccino" : 0, "espresso" : 0, "ambiance" : 0, "coldbrew" : 0, "value" : 0, "service" : 0 }
        locationInfo = {"latitude": shop['location']['lat'], "longitude": shop['location']['lng'], "tags": []}
        reviews = [review for review in shop['reviews'] if review['text'] != None]
        numRatings = len(reviews)
        avgRating = statistics.mean([review['stars'] for review in reviews])
        reviewData = [{'name': review['name'], 'rating': review['stars'], 'userId': review['reviewerId'], 'review': review['text'], 'timestamp': str(time.mktime(datetime.datetime.strptime(review['publishedAtDate'], "%Y-%m-%dT%H:%M:%S.%fZ").timetuple()))} for review in reviews]
        # print(reviewData)
        for review in reviewData:
            count = analyze(review['review'])
            analysis = {k: count.get(k, 0) + analysis.get(k, 0) for k in set(analysis)}
        imageUrl = shop['imageUrls'][0]

        
        # webbrowser.open(imageUrl)

        shopData = {
            'shopid': shopId,
            'shopname': shopId,
            'shopCid': shopCid,
            'address': address,
            'analysis': analysis,
            'locationinfo': locationInfo,
            'numrating': numRatings,
            'rating': avgRating,
            'reviews': reviewData,
            'imageOrigUrl': imageUrl
        }
        shopDataList.append(shopData)
        
        parsedShopData = json.loads(json.dumps(shopData), parse_float=Decimal)
        
        # response = table.put_item(Item=parsedShopData)



In [17]:
import requests
import shutil
for shopData in shopDataList:
    imgUrl = shopData['imageOrigUrl']
    targetFileName = 'images/' + shopData['shopCid']
    print(targetFileName)
    res = requests.get(imgUrl, stream = True)
    if res.status_code==200:
        with open(targetFileName, 'wb') as f:
            shutil.copyfileobj(res.raw, f)
        print('Image successfully downloaded: ', targetFileName)
    else:
        print('Image couldn\'t be retrieved')

images/6027150019995756238
Image successfully downloaded:  images/6027150019995756238
images/3923933014609822664
Image successfully downloaded:  images/3923933014609822664
images/17109205974731136392
Image successfully downloaded:  images/17109205974731136392
images/16194238829966024966
Image successfully downloaded:  images/16194238829966024966
images/2419970586883276967
Image successfully downloaded:  images/2419970586883276967
images/17058280269791006516
Image successfully downloaded:  images/17058280269791006516
images/11759132225502559662
Image successfully downloaded:  images/11759132225502559662
images/5147489099688753640
Image successfully downloaded:  images/5147489099688753640
images/7405663049912743431
Image successfully downloaded:  images/7405663049912743431
images/17650561671694508433
Image successfully downloaded:  images/17650561671694508433
images/4415530143815304736
Image successfully downloaded:  images/4415530143815304736
images/4148875047957229961
Image successfull

In [18]:
for shopData in shopDataList:
    targetFileName = 'images/' + shopData['shopCid']
    s3_client = boto3.client('s3')
    response = s3_client.upload_file(targetFileName, 'coffee-photos-bucket', shopData['shopCid'])
    shopData['imageUrl'] = 'https://coffee-photos-bucket.s3.amazonaws.com/' + shopData['shopCid']
    print(shopData['imageUrl'])

https://coffee-photos-bucket.s3.amazonaws.com/6027150019995756238
https://coffee-photos-bucket.s3.amazonaws.com/3923933014609822664
https://coffee-photos-bucket.s3.amazonaws.com/17109205974731136392
https://coffee-photos-bucket.s3.amazonaws.com/16194238829966024966
https://coffee-photos-bucket.s3.amazonaws.com/2419970586883276967
https://coffee-photos-bucket.s3.amazonaws.com/17058280269791006516
https://coffee-photos-bucket.s3.amazonaws.com/11759132225502559662
https://coffee-photos-bucket.s3.amazonaws.com/5147489099688753640
https://coffee-photos-bucket.s3.amazonaws.com/7405663049912743431
https://coffee-photos-bucket.s3.amazonaws.com/17650561671694508433
https://coffee-photos-bucket.s3.amazonaws.com/4415530143815304736
https://coffee-photos-bucket.s3.amazonaws.com/4148875047957229961
https://coffee-photos-bucket.s3.amazonaws.com/9856758285518493814
https://coffee-photos-bucket.s3.amazonaws.com/15190362872322481999
https://coffee-photos-bucket.s3.amazonaws.com/7232349319319611490
http

In [20]:
for shopData in shopDataList:
    print(shopData['imageUrl'])
    parsedShopData = json.loads(json.dumps(shopData), parse_float=Decimal)
    response = table.put_item(Item=parsedShopData)

https://coffee-photos-bucket.s3.amazonaws.com/6027150019995756238
https://coffee-photos-bucket.s3.amazonaws.com/3923933014609822664
https://coffee-photos-bucket.s3.amazonaws.com/17109205974731136392
https://coffee-photos-bucket.s3.amazonaws.com/16194238829966024966
https://coffee-photos-bucket.s3.amazonaws.com/2419970586883276967
https://coffee-photos-bucket.s3.amazonaws.com/17058280269791006516
https://coffee-photos-bucket.s3.amazonaws.com/11759132225502559662
https://coffee-photos-bucket.s3.amazonaws.com/5147489099688753640
https://coffee-photos-bucket.s3.amazonaws.com/7405663049912743431
https://coffee-photos-bucket.s3.amazonaws.com/17650561671694508433
https://coffee-photos-bucket.s3.amazonaws.com/4415530143815304736
https://coffee-photos-bucket.s3.amazonaws.com/4148875047957229961
https://coffee-photos-bucket.s3.amazonaws.com/9856758285518493814
https://coffee-photos-bucket.s3.amazonaws.com/15190362872322481999
https://coffee-photos-bucket.s3.amazonaws.com/7232349319319611490
http