In [149]:
import nltk
from nltk.corpus import wordnet as wn
import requests
import pandas as pd
import openai
import math
import json
import numpy as np


In [18]:
# Read the content of the text file
with open('project_important_info.txt', 'r') as file:
    lines = file.readlines()

# Define a variable to hold the API key
api_key = None

# Loop through each line to find the API key
for line in lines:
    if "Google map API key:" in line:
        # Extract the API key from the line
        api_key = line.split(': ')[1].strip()
    if "OpenAI API key:" in line:
        # Extract the API key from the line
        openai.api_key = line.split(': ')[1].strip()

# Print the extracted API key
print("Extracted Google API key:", api_key)
print("Extracted OpenAI API key:", openai.api_key)


Extracted Google API key: AIzaSyDUI59SBo58ubrKL6sQzBJo8phEoMfBG4U
Extracted OpenAI API key: sk-proj-p6KGmf5zP6xaJTYMDckfT3BlbkFJbYoGJJi5bbTcMEGnjCRW


In [3]:
nltk.download('wordnet')
nltk.download('omw-1.4')
def merge_sets(sets):
    merged_set = set()
    for s in sets:
        merged_set |= s
    return merged_set
    
def get_synonyms(word):
    synonyms = set()
    for syn in wn.synsets(word):
        for lemma in syn.lemmas():
            synonyms.add(lemma.name().replace('_', ' '))
    synonyms.add(word)
    return synonyms
def get_merge_syns(*words):
    syn_lst = []
    for w in words:
        syn_lst.append(get_synonyms(w))
    return merge_sets(syn_lst)
    
# Example synonyms for 'quiet'
quiet_synonyms = get_merge_syns('noisy', 'loud')
print(quiet_synonyms)

[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/hanzhitan/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     /Users/hanzhitan/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


{'tacky', 'brassy', 'gaudy', 'tawdry', 'loud', 'aloud', 'gimcrack', 'noisy', 'cheap', 'flash', 'flashy', 'trashy', 'tatty', 'forte', 'meretricious', 'loudly', 'garish'}


In [42]:
def fetch_place_details(place_id, api_key):
    details_url = f"https://maps.googleapis.com/maps/api/place/details/json?place_id={place_id}&fields=reviews&key={api_key}"
    response = requests.get(details_url)
    if response.status_code == 200:
        return response.json().get('result', {}).get('reviews', [])
    else:
        return []

def fetch_places_nearby(api_key, location, radius, place_type):
    url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json"
    params = {
        'key': api_key,
        'location': location,  # "latitude,longitude"
        'radius': radius,
        'type': place_type
    }
    response = requests.get(url, params=params)
    results = response.json().get('results', [])
    places = []
    for place in results:
        place_id = place.get("place_id") 
        reviews = fetch_place_details(place_id, api_key)
        places.append({
            'name': place.get('name'),
            'address': place.get('vicinity'),
            'type': place_type,
            'rating': place.get('rating', None),
            'user_ratings_total': place.get('user_ratings_total', 0),
            'latitude': place['geometry']['location']['lat'],
            'longitude': place['geometry']['location']['lng'],
            'reviews': reviews,
        })
    return places

def fetch_all_places(api_key, raw_location, radius, place_types):
    all_places = []
    seen_places = set()
    la_1km, long_1km = 0.009009, 0.011944
    change = [(0, long_1km), (0, -long_1km), (la_1km, 0), (-la_1km, 0), (0, 0)]
    for place_type in place_types:
        for a, b in change:
            location_str = (f"{raw_location[0]+a}, {raw_location[1]+b}")
            places = fetch_places_nearby(api_key, location_str, radius, place_type)
            print(places)
            for place in places:
                place_id = place.get('place_id') or place.get('name')  # Use unique identifier like place_id or name
                if place_id not in seen_places:
                    seen_places.add(place_id)
                    all_places.append(place)
                
    return all_places



# Set the radius and place types you want to fetch
radius = 1000  # 1 km radius
place_types = ['cafe', 'library']  # Add more types if needed

# Fetch all places within the boundaries of Chicago
all_places_chicago = fetch_all_places(api_key, (41.7923666, -87.5998346), radius, place_types)
print(len(all_places_chicago))


[{'name': 'Starbucks', 'address': '1174 East 55th Street, Chicago', 'type': 'cafe', 'rating': 4.1, 'user_ratings_total': 650, 'latitude': 41.79518699999999, 'longitude': -87.596741, 'reviews': [{'author_name': 'Kendra Torres', 'author_url': 'https://www.google.com/maps/contrib/102908067659372163963/reviews', 'language': 'en', 'original_language': 'en', 'profile_photo_url': 'https://lh3.googleusercontent.com/a-/ALV-UjUl9tAghO445MZaxnOnpf_soN6Wvi2zfYGnbtcYMj9gy-JJfBxP=s128-c0x00000000-cc-rp-mo', 'rating': 3, 'relative_time_description': 'a week ago', 'text': 'Just so you guys know this Starbucks is claiming that they no longer except in person or orders you have to order from the app. I find no issue with that at all, but I do feel for people who don’t have smart phones although I think everybody does now but what if you don’t have the app on your phone and what if you don’t want the app Starbucks what’s going on?', 'time': 1715461905, 'translated': False}, {'author_name': 'Imanurse Nich

In [44]:
labels_with_synonyms = {
    'quiet': get_merge_syns('quiet', 'silent', 'private', 'self study'),
    'noisy': get_merge_syns('noisy', 'loud', 'crowded'),
    'spacious': get_merge_syns('spacious', 'large space', 'many tables', 'many seating'),
    'collaborate': get_merge_syns('discuss','collaborate', 'group', 'group project', 'meet'),
    'cozy': get_merge_syns('cozy'),
    'wi-fi': get_merge_syns('internet', 'wi-fi', 'wifi', 'remote meeting', 'online meeting'),
    'indoor sitting': 'indoor sitting',
    'exclusive to student': 'exclusive to student'
}
print(labels_with_synonyms)
# def label_extraction(place_info):
#     description = ""
#     for r in place_info.get('reviews'):
#         description += r['text'].lower()
#     features = {label: 0 for label in labels_with_synonyms.keys()}
#     print(description)
#     for label, synonyms in labels_with_synonyms.items():
#         count = sum(description.count(word) for word in synonyms)
#         features[label] += count
    
#     return features



# # Test the function
# for i in range(len(all_places_chicago)):
#     print(label_extraction(all_places_chicago[i]))



# # {
# #     "quiet": 0,
# #     "wifi_available": 1,
# #     "ambience": ["cozy"],
# #     "group_work": 1
# # }

{'quiet': {'pipe down', 'still', 'silence', 'self study', 'unruffled', 'hush', 'common soldier', 'tranquillise', 'quiet down', 'soundless', 'private', 'tranquillize', 'mum', 'placidity', 'tranquility', 'repose', 'placid', 'unsounded', 'tacit', 'serenity', 'quieten', 'restrained', 'smooth', 'tranquilize', 'tranquil', 'calm down', 'mute', 'understood', 'quietly', 'hushed', 'lull', 'buck private', 'calm', 'muted', 'subdued', 'individual', 'dumb', 'quiesce', 'silent', 'secret', 'tranquillity', 'quiet'}, 'noisy': {'tacky', 'brassy', 'gaudy', 'tawdry', 'loud', 'aloud', 'gimcrack', 'noisy', 'cheap', 'flash', 'herd', 'flashy', 'crowd together', 'push', 'trashy', 'tatty', 'forte', 'crowded', 'crowd', 'meretricious', 'loudly', 'garish'}, 'spacious': {'many seating', 'large space', 'many tables', 'spacious', 'broad', 'wide', 'roomy'}, 'collaborate': {'grouping', 'collaborate', 'fit', 'touch', 'converge', 'take on', 'suffer', 'fill', 'forgather', 'talk about', 'fulfil', 'foregather', 'chemical gro

In [130]:
def check_properties(review, properties):
    prompt = f"Given the review: '{review}', determine whether the following properties are present (will you conclude this place as having following property?) ALWAYS respond with a JSON dictionary where the keys are the properties and the values are true or false.\n"
    for prop in properties:
        prompt += f"Property: {prop}\n"

    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are an assistant that helps analyze text user reviews about a self-study place."},
            {"role": "user", "content": prompt}
        ]
    )

    result = response['choices'][0]['message']['content']
    print(result)
    try:
        result_dict = json.loads(result)
        print(type(result_dict))
        return result_dict
    except json.JSONDecodeError:
        print("Failed to decode JSON from response.")
        return None

In [131]:
import re

def label_extraction(place_info, gpt=False):
    description = ""
    for r in place_info.get('reviews'):
        description += r['text'].lower()
    features = {label: 0 for label in labels_with_synonyms.keys()}
    if not gpt:
        for label, synonyms in labels_with_synonyms.items():
            count = 0
            for word in synonyms:
                # Use regular expressions to match synonyms in the description
                pattern = r"\b" + re.escape(word) + r"\b"
                matches = re.findall(pattern, description)
                count += len(matches)
            
            # Exclude false positives
            for word in synonyms:
                negation_pattern = r"\bnot " + re.escape(word) + r"\b"
                negation_matches = re.findall(negation_pattern, description)
                count -= len(negation_matches)
            
            # Handle more complex patterns like "many ... seating"
            if label == 'spacious':
                complex_pattern = r"many [\w\s]* seating"
                complex_matches = re.findall(complex_pattern, description)
                count += len(complex_matches)
            
            features[label] += max(count, 0)  # Ensure count is non-negative
        
        return features
    else:
        return check_properties(description, labels_with_synonyms.keys())

In [132]:
# Test the function
for i in range(len(all_places_chicago)):
    gpt_label_result = label_extraction(all_places_chicago[i], gpt=True)
    all_places_chicago[i]['label'] = gpt_label_result
    print(all_places_chicago[i]['name'], all_places_chicago[i]['label'])

{
  "quiet": false,
  "noisy": true,
  "spacious": false,
  "collaborate": false,
  "cozy": false,
  "wi-fi": true,
  "indoor sitting": true,
  "exclusive to student": false
}
<class 'dict'>
Starbucks {'quiet': False, 'noisy': True, 'spacious': False, 'collaborate': False, 'cozy': False, 'wi-fi': True, 'indoor sitting': True, 'exclusive to student': False}
{
    "quiet": false,
    "noisy": true,
    "spacious": false,
    "collaborate": false,
    "cozy": true,
    "wi-fi": true,
    "indoor sitting": true,
    "exclusive to student": false
}
<class 'dict'>
Cafe 53 {'quiet': False, 'noisy': True, 'spacious': False, 'collaborate': False, 'cozy': True, 'wi-fi': True, 'indoor sitting': True, 'exclusive to student': False}
{
    "quiet": false,
    "noisy": false,
    "spacious": false,
    "collaborate": false,
    "cozy": true,
    "wi-fi": true,
    "indoor sitting": true,
    "exclusive to student": true
}
<class 'dict'>
Ex Libris Cafe {'quiet': False, 'noisy': False, 'spacious': Fals

In [48]:
print(all_places_chicago)

[{'name': 'Starbucks', 'address': '1174 East 55th Street, Chicago', 'type': 'cafe', 'rating': 4.1, 'user_ratings_total': 650, 'latitude': 41.79518699999999, 'longitude': -87.596741, 'reviews': [{'author_name': 'Kendra Torres', 'author_url': 'https://www.google.com/maps/contrib/102908067659372163963/reviews', 'language': 'en', 'original_language': 'en', 'profile_photo_url': 'https://lh3.googleusercontent.com/a-/ALV-UjUl9tAghO445MZaxnOnpf_soN6Wvi2zfYGnbtcYMj9gy-JJfBxP=s128-c0x00000000-cc-rp-mo', 'rating': 3, 'relative_time_description': 'a week ago', 'text': 'Just so you guys know this Starbucks is claiming that they no longer except in person or orders you have to order from the app. I find no issue with that at all, but I do feel for people who don’t have smart phones although I think everybody does now but what if you don’t have the app on your phone and what if you don’t want the app Starbucks what’s going on?', 'time': 1715461905, 'translated': False}, {'author_name': 'Imanurse Nich

In [52]:
def haversine_distance(lat1, lon1, lat2, lon2):
    # Convert latitude and longitude from degrees to radians
    lat1 = math.radians(lat1)
    lon1 = math.radians(lon1)
    lat2 = math.radians(lat2)
    lon2 = math.radians(lon2)

    # Haversine formula
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = math.sin(dlat / 2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2)**2
    c = 2 * math.asin(math.sqrt(a))
    
    # Radius of Earth in kilometers. Use 3956 for miles
    r = 6371

    # Calculate the distance
    distance = c * r
    
    return distance

In [139]:
# Function to calculate the score for each destination
def calculate_score(location, place_info, property_weights):
    properties = list(property_weights.keys())
    property_presence = place_info['label']
    # Calculate property score
    property_score = sum(property_weights[prop] for prop, present in property_presence.items() if present)

    # Apply distance penalty
    distance = haversine_distance(location[0], location[1], place_info['latitude'], place_info['longitude'])
    distance_penalty = -0.1 * distance

    # Calculate final score
    final_score = property_score + distance_penalty

    return final_score

In [140]:
# Define the properties and their weights
property_weights = {
    "quiet": 2,
    "noisy": -2,
    "spacious": 1.5,
    "collaborate": 1,
    "cozy": 1,
    "wi-fi": 2,
    "indoor sitting": 3,
    "exclusive to student": 1
}

In [141]:
print(calculate_score((41.795187, -87.596741), all_places_chicago[0], property_weights))

7.073230889886872e-13
2.9999999999999294


In [142]:
destinations = all_places_chicago

In [152]:
# Calculate scores for each destination
example_location = (41.795187, -87.596741)
to_be_remove = []
for i, destination in enumerate(destinations):
    if destination['label'] == None:
        to_be_remove.append(i)
        continue
    score = calculate_score(example_location, destination, property_weights)
    destination["score"] = score


filtered_list = [item for idx, item in enumerate(destinations) if idx not in to_be_remove]
print(filtered_list)  # Output: ['apple', 'cherry', 'grape']


# Rank destinations by score
ranked_destinations = sorted(destinations, key=lambda x: x["score"], reverse=True)

# Print ranked destinations
for destination in ranked_destinations:
    try:
        print(f"Destination: {destination['name']}, Score: {destination['score']}")
    except:
        continue


7.073230889886872e-13
0.5831838406814952
0.4236241508085032
0.9566468718342825
1.3301700926947095
0.6983902406784688
0.4583163666996685
0.48733401124091125
1.2113489379243019
0.576480175778341
0.7790934143407485
0.6884414196381577
1.1705824556874465
0.4682991439880085
0.8876962387323413
0.38607308433004633
1.1486457034761792
0.18769677894309422
1.2217867608320567
0.7118324545729592
0.7717737695686891
1.8280061605305824
1.8388032948089525
0.8334022461912537
0.7326847280517973
0.5351713141191852
1.6403499879716965
0.18909460395342811
1.1970899629782967
1.643097514469229
0.7406330851453485
0.8338073882225849
1.6582464920944202
2.0773143988801546
0.5890581395721273
0.41893547206083537
0.5793704619306983
1.1571134215074947
0.1589508558183444
1.1929077704529691
0.43630266085819525
0.7250328099387042
0.7749337151567791
0.49512780668831624
1.2976703063252646
0.8387440963898315
0.8473004320484452
1.8736300043647551
1.1359374692704118


TypeError: only integer scalar arrays can be converted to a scalar index