In [12]:
import nltk
from nltk.corpus import wordnet as wn
import requests
import pandas as pd

In [32]:
# Read the content of the text file
with open('project_important_info.txt', 'r') as file:
    lines = file.readlines()

# Define a variable to hold the API key
api_key = None

# Loop through each line to find the API key
for line in lines:
    if "Google map API key:" in line:
        # Extract the API key from the line
        api_key = line.split(': ')[1].strip()

# Print the extracted API key
print("Extracted API key:", api_key)


Extracted API key: AIzaSyDUI59SBo58ubrKL6sQzBJo8phEoMfBG4U


In [94]:
nltk.download('wordnet')
nltk.download('omw-1.4')
def merge_sets(sets):
    merged_set = set()
    for s in sets:
        merged_set |= s
    return merged_set
    
def get_synonyms(word):
    synonyms = set()
    for syn in wn.synsets(word):
        for lemma in syn.lemmas():
            synonyms.add(lemma.name().replace('_', ' '))
    synonyms.add(word)
    return synonyms
def get_merge_syns(*words):
    syn_lst = []
    for w in words:
        syn_lst.append(get_synonyms(w))
    return merge_sets(syn_lst)
    
# Example synonyms for 'quiet'
quiet_synonyms = get_merge_syns('noisy', 'loud')
print(quiet_synonyms)

{'gaudy', 'flash', 'brassy', 'garish', 'meretricious', 'aloud', 'loud', 'trashy', 'noisy', 'loudly', 'tacky', 'forte', 'flashy', 'gimcrack', 'cheap', 'tawdry', 'tatty'}


[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/hanzhitan/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     /Users/hanzhitan/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [87]:
def fetch_place_details(place_id, api_key):
    details_url = f"https://maps.googleapis.com/maps/api/place/details/json?place_id={place_id}&fields=reviews&key={api_key}"
    response = requests.get(details_url)
    if response.status_code == 200:
        return response.json().get('result', {}).get('reviews', [])
    else:
        return []

def fetch_places_nearby(api_key, location, radius, place_type):
    url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json"
    params = {
        'key': api_key,
        'location': location,  # "latitude,longitude"
        'radius': radius,
        'type': place_type
    }
    response = requests.get(url, params=params)
    results = response.json().get('results', [])
    print(len(results))
    
    places = []
    for place in results:
        place_id = place.get("place_id") 
        reviews = fetch_place_details(place_id, api_key)
        places.append({
            'name': place.get('name'),
            'address': place.get('vicinity'),
            'type': place_type,
            'rating': place.get('rating', None),
            'user_ratings_total': place.get('user_ratings_total', 0),
            'latitude': place['geometry']['location']['lat'],
            'longitude': place['geometry']['location']['lng'],
            'reviews': reviews,
        })
    return places

def fetch_all_places(api_key, location, radius, place_types):
    all_places = []
    for place_type in place_types:
        places = fetch_places_nearby(api_key, location, radius, place_type)
        all_places.extend(places)
    return all_places

# Define the boundaries of Chicago
chicago_boundaries = [
    (41.644335, -87.940267),  # South West corner
    (42.023015, -87.523660)   # North East corner
]

# Set the radius and place types you want to fetch
radius = 1000  # 10 km radius
place_types = ['cafe', 'library']  # Add more types if needed

# Fetch all places within the boundaries of Chicago
all_places_chicago = fetch_all_places(api_key, '41.8781,-87.6298', radius, place_types)
print(len(all_places_chicago))


20
20
40


In [103]:
labels_with_synonyms = {
    'quiet': get_merge_syns('quiet', 'silent', 'private', 'self study'),
    'noisy': get_merge_syns('noisy', 'loud', 'crowded'),
    'spacious': get_merge_syns('spacious', 'large space', 'many tables', 'many seating'),
    'collaborate': get_merge_syns('discuss','collaborate', 'group', 'group project', 'meet'),
    'cozy': get_merge_syns('cozy'),
    'wi-fi': get_merge_syns('internet', 'wi-fi', 'wifi', 'remote meeting', 'online meeting'),
}
print(labels_with_synonyms)
# def label_extraction(place_info):
#     description = ""
#     for r in place_info.get('reviews'):
#         description += r['text'].lower()
#     features = {label: 0 for label in labels_with_synonyms.keys()}
#     print(description)
#     for label, synonyms in labels_with_synonyms.items():
#         count = sum(description.count(word) for word in synonyms)
#         features[label] += count
    
#     return features



# # Test the function
# for i in range(len(all_places_chicago)):
#     print(label_extraction(all_places_chicago[i]))



# # {
# #     "quiet": 0,
# #     "wifi_available": 1,
# #     "ambience": ["cozy"],
# #     "group_work": 1
# # }

{'quiet': {'hush', 'restrained', 'tranquillity', 'smooth', 'quiet down', 'muted', 'mute', 'quiet', 'private', 'hushed', 'secret', 'mum', 'soundless', 'tranquillize', 'quieten', 'self study', 'tranquil', 'silent', 'buck private', 'tranquility', 'dumb', 'calm', 'silence', 'individual', 'placid', 'calm down', 'tacit', 'tranquilize', 'pipe down', 'repose', 'serenity', 'unsounded', 'unruffled', 'common soldier', 'quiesce', 'still', 'lull', 'subdued', 'placidity', 'understood', 'tranquillise', 'quietly'}, 'noisy': {'gaudy', 'flash', 'brassy', 'garish', 'crowd', 'meretricious', 'aloud', 'loud', 'trashy', 'noisy', 'herd', 'loudly', 'tacky', 'crowded', 'forte', 'crowd together', 'flashy', 'gimcrack', 'push', 'cheap', 'tawdry', 'tatty'}, 'spacious': {'many seating', 'broad', 'wide', 'many tables', 'roomy', 'spacious', 'large space'}, 'collaborate': {'fulfill', 'assemble', 'foregather', 'encounter', 'fulfil', 'fit', 'aggroup', 'discus', 'satisfy', 'forgather', 'play', 'fill', 'receive', 'group pr

In [104]:
import re

def label_extraction(place_info):
    description = ""
    for r in place_info.get('reviews'):
        description += r['text'].lower()
    
    features = {label: 0 for label in labels_with_synonyms.keys()}
    
    for label, synonyms in labels_with_synonyms.items():
        count = 0
        for word in synonyms:
            # Use regular expressions to match synonyms in the description
            pattern = r"\b" + re.escape(word) + r"\b"
            matches = re.findall(pattern, description)
            count += len(matches)
        
        # Exclude false positives
        for word in synonyms:
            negation_pattern = r"\bnot " + re.escape(word) + r"\b"
            negation_matches = re.findall(negation_pattern, description)
            count -= len(negation_matches)
        
        # Handle more complex patterns like "many ... seating"
        if label == 'spacious':
            complex_pattern = r"many [\w\s]* seating"
            complex_matches = re.findall(complex_pattern, description)
            count += len(complex_matches)
        
        features[label] += max(count, 0)  # Ensure count is non-negative
    
    return features

# Test the function
for i in range(len(all_places_chicago)):
    print(label_extraction(all_places_chicago[i]))


{'quiet': 1, 'noisy': 0, 'spacious': 0, 'collaborate': 0, 'cozy': 0, 'wi-fi': 0}
{'quiet': 0, 'noisy': 0, 'spacious': 0, 'collaborate': 0, 'cozy': 0, 'wi-fi': 0}
{'quiet': 0, 'noisy': 0, 'spacious': 0, 'collaborate': 0, 'cozy': 0, 'wi-fi': 0}
{'quiet': 1, 'noisy': 0, 'spacious': 1, 'collaborate': 1, 'cozy': 0, 'wi-fi': 0}
{'quiet': 1, 'noisy': 0, 'spacious': 0, 'collaborate': 0, 'cozy': 0, 'wi-fi': 0}
{'quiet': 1, 'noisy': 2, 'spacious': 0, 'collaborate': 0, 'cozy': 0, 'wi-fi': 0}
{'quiet': 1, 'noisy': 0, 'spacious': 0, 'collaborate': 0, 'cozy': 0, 'wi-fi': 0}
{'quiet': 0, 'noisy': 0, 'spacious': 0, 'collaborate': 0, 'cozy': 0, 'wi-fi': 0}
{'quiet': 0, 'noisy': 0, 'spacious': 0, 'collaborate': 3, 'cozy': 0, 'wi-fi': 0}
{'quiet': 0, 'noisy': 1, 'spacious': 0, 'collaborate': 0, 'cozy': 0, 'wi-fi': 0}
{'quiet': 2, 'noisy': 1, 'spacious': 0, 'collaborate': 0, 'cozy': 0, 'wi-fi': 1}
{'quiet': 0, 'noisy': 0, 'spacious': 0, 'collaborate': 1, 'cozy': 1, 'wi-fi': 0}
{'quiet': 0, 'noisy': 1, 'sp