In [3]:
import requests
import json
from pprint import pprint
from dotenv import load_dotenv
import os
from geopy.distance import geodesic
import json
import random
from shapely.geometry import shape, Point, mapping
from shapely.ops import transform
from pyproj import CRS, Transformer
import mysql.connector
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options

load_dotenv()
API_KEY = os.getenv("GOOGLE_API_KEY")
HOST= os.getenv("SQL_HOST")
USER= os.getenv("SQL_USER")
DATABASE= os.getenv("SQL_DATABASE")
PASSWORD=os.getenv("SQL_PASSWORD")

In [4]:
def getPlaces(latitude, longitude, api_key):
    url = "https://places.googleapis.com/v1/places:searchNearby"

    data = {
        "maxResultCount": 20,
        "rankPreference": "DISTANCE",
        "locationRestriction": {
            "circle": {
                "center": {
                    "latitude": latitude,
                    "longitude": longitude
                },
                "radius": 50000.0
            }
        }
    }

    headers = {
        "Content-Type": "application/json",
        "X-Goog-Api-Key": api_key,
        "X-Goog-FieldMask": "places.displayName,places.location,places.formattedAddress,places.types,places.primaryType,places.id"
    }

    response = requests.post(url, json=data, headers=headers)
    response.raise_for_status()  # Will raise an HTTPError for non-200 responses

    return response.json()

In [None]:

# Define the endpoint and payload
url = "https://places.googleapis.com/v1/places:searchNearby"

data = {
    "maxResultCount": 20,
    "rankPreference": "DISTANCE",
    "locationRestriction": {
        "circle": {
            "center": {
                "latitude": 34.861743,
                "longitude": -82.40443
            },
            "radius": 50000.0
        }
    }
}

# Headers to include specific fields
headers = {
    "Content-Type": "application/json",
    "X-Goog-Api-Key": API_KEY,
    "X-Goog-FieldMask": "places.displayName,places.location,places.formattedAddress,places.types,places.primaryType,places.id"
}

# Send the request
response = requests.post(url, json=data, headers=headers)
places_data = response.json()

print(response)
print(response.text)


In [None]:
places_data=getPlaces(34.861743, -82.40443, API_KEY)

In [None]:
#filtered_test=clean_places_data(places_data)
pprint(filtered_test[0])

In [None]:
#print all values
for location in places_data.get("places", []):
    address = location.get('formattedAddress', 'NULL')
    lat = location.get('location', {}).get('latitude', 'NULL')
    lon = location.get('location', {}).get('longitude', 'NULL')
    primary_type = location.get('primaryType', 'NULL')
    display_name = location.get('displayName', {}).get('text', 'NULL')

    print(f"{address}\n{lat}\n{lon}\n{primary_type}\n{display_name}\n")


In [None]:
# Extract the list of places from the JSON
places = places_data.get("places", [])

# Filter places where the formatted address starts with a digit
filtered_places = [
    place for place in places
    if "formattedAddress" in place and place["formattedAddress"][0].isdigit()
]
for location in filtered_places:
    address = location.get('formattedAddress', 'NULL')
    lat = location.get('location', {}).get('latitude', 'NULL')
    lon = location.get('location', {}).get('longitude', 'NULL')
    primary_type = location.get('primaryType', 'NULL')
    display_name = location.get('displayName', {}).get('text', 'NULL')

    print(f"{address}\n{lat}\n{lon}\n{primary_type}\n{display_name}\n")


In [None]:
print(places_data.get("places", [])[1].get('displayName', {}).get('text', 'NULL'))
print(places_data.get("places", [])[2].get('displayName', {}).get('text', 'NULL'))

In [None]:
#get furthest point (radius of area)
reference_point = (34.861743, -82.40443)

# Assume places_data is already defined from your Places API v1 response
location_array = []
for i in places_data['places']:
    lat = i['location']['latitude']
    lng = i['location']['longitude']
    location_array.append((lat, lng))

# Calculate distances from reference point
distances = [
    (geodesic(reference_point, loc).meters, loc)
    for loc in location_array
]

# Find the furthest point
max_distance, furthest_point = max(distances, key=lambda x: x[0])

print(f"Furthest point: {furthest_point}")
print(f"Distance: {max_distance:} meters")

In [5]:
def get_max_distance(lat, lon, places_data):
    reference_point = (lat, lon)

    max_distance = -1
    furthest_place_name = "Unknown"
    
    for place in places_data.get('places', []):
        try:
            place_location = (place['location']['latitude'], place['location']['longitude'])
            distance = geodesic(reference_point, place_location).meters
            
            if distance > max_distance:
                max_distance = distance
                furthest_place_name = place.get('displayName', {}).get('text', 'Unnamed place')

        except KeyError:
            continue  # Skip places with missing coordinates

    #print(f"📍 Furthest place: {furthest_place_name} ({max_distance:.2f} meters away)")
    return max_distance

In [6]:
#add websites to places v2
def get_websites(filtered_places):
        # Setup Chrome
    options = webdriver.ChromeOptions()
    # options.add_argument("--headless")  # Uncomment if you want headless mode
    driver = webdriver.Chrome(options=options)
    wait = WebDriverWait(driver, 1)
    
    #for location in places_data.get("places", []):
    for location in filtered_places:
        # Open the target URL
        driver.get("https://www.google.com/maps/place/")
        
        businessAddress=location.get('formattedAddress', 'NULL')
        businessName=location.get('displayName', {}).get('text', 'NULL')
        safe_business_name = businessName.replace('"', '&quot;')
        #print(businessAddress)
        #print(businessName)
        # Clear any pre-filled text and enter your query
        if not businessAddress[0].isdigit():
            #print("not an address")
            continue
            
    
        search_input = wait.until(EC.visibility_of_element_located((By.ID, "searchboxinput")))
    
        search_input.send_keys(f"{businessName} {businessAddress}")
        
        search_input.send_keys(Keys.ENTER)      
        
        # Locate the element using the 'data-tooltip' attribute
        try:
            # XPath that matches the "Open website" button
            
            xpath = f"""//a[
                (contains(@class, 'CsEnBe') and @data-tooltip='Open website') or 
                @aria-label="Visit {safe_business_name}'s website"
            ]"""
            
            element = wait.until(
                EC.element_to_be_clickable((By.XPATH, xpath))
            )
            # Get the href attribute
            href_value = element.get_attribute('href')
            #assign the website to the location
            location['website']=href_value
            #print(href_value)
        except:
            #print("no website")
            location['website']=None
        
    #Close the driver
    driver.quit()
    return filtered_places

In [None]:
pprint(filtered_places)

In [7]:
def remove_non_bmp(text):
    return ''.join(c for c in text if ord(c) <= 0xFFFF)
#removes unwanted datatypes such as bus stops, parks, etc. Also removes POI and establishment from types list
def clean_places_data(places_data):
    filtered_places = [
        {
            **place,
            'displayName': {
                **place.get('displayName', {}),
                'text': remove_non_bmp(place.get('displayName', {}).get('text', ''))
            },
            'types': [t for t in place.get('types', []) if t not in ['point_of_interest', 'establishment']]
        }
        for place in places_data.get("places", [])
        if "formattedAddress" in place and place["formattedAddress"][0].isdigit()
    ]
    return filtered_places

In [None]:
filterTest=clean_places_data(response.json())
pprint(filterTest)

In [8]:
def calculate_geojson_area(geojson_file):
    # Load the GeoJSON
    try:
        with open(geojson_file, 'r') as f:
            data = json.load(f)
    
        total_area_m2 = 0.0
    
        for feature in data['features']:
            geom = shape(feature['geometry'])
    
            # Get centroid for local projection (UTM)
            centroid = geom.centroid
            lon, lat = centroid.x, centroid.y
    
            # Determine UTM zone
            utm_zone = int((lon + 180) / 6) + 1
            is_northern = lat >= 0
    
            crs_wgs84 = CRS("EPSG:4326")
            crs_utm = CRS.from_proj4(
                f"+proj=utm +zone={utm_zone} +datum=WGS84 +units=m +{'north' if is_northern else 'south'}"
            )
    
            # Create projection function
            project = Transformer.from_crs(crs_wgs84, crs_utm, always_xy=True).transform
    
            # Project geometry to meters (UTM) and calculate area
            geom_projected = transform(project, geom)
            total_area_m2 += geom_projected.area
            if total_area_m2==None:
                print(f"returning {total_area_m2} (none)")
                return 0
            else:
                print(f"returning {total_area_m2}")
                return total_area_m2  # square meters
    except: return 0


In [9]:
def get_random_point_within_geojson(geojson_file):
    # Load the GeoJSON
    with open(geojson_file, 'r') as f:
        data = json.load(f)

    # Combine all geometries
    combined_geom = shape(data['features'][0]['geometry'])
    for feature in data['features'][1:]:
        combined_geom = combined_geom.union(shape(feature['geometry']))

    # Bounding box for sampling
    minx, miny, maxx, maxy = combined_geom.bounds

    # Try generating a point inside the polygon
    for _ in range(100000):
        lat = random.uniform(minx, maxx)
        lon = random.uniform(miny, maxy)
        point = Point(lat, lon)
        if combined_geom.contains(point):
            return [point.y, point.x]  # [lat,lon]

    raise ValueError("Couldn't find a point inside the GeoJSON geometry.")

In [10]:
def remove_overlap_with_circle(geojson_number, lat, lon, radius_meters):
    # Determine UTM zone
    utm_zone = int((lon + 180) / 6) + 1
    is_northern = lat >= 0

    crs_wgs84 = CRS("EPSG:4326")
    crs_utm = CRS.from_proj4(
        f"+proj=utm +zone={utm_zone} +datum=WGS84 +units=m +{'north' if is_northern else 'south'}"
    )

    # Set up projection transformers
    to_utm = Transformer.from_crs(crs_wgs84, crs_utm, always_xy=True).transform
    to_wgs84 = Transformer.from_crs(crs_utm, crs_wgs84, always_xy=True).transform

    # Create circle in UTM
    center_utm = transform(to_utm, Point(lon, lat))
    circle_utm = center_utm.buffer(radius_meters, resolution=64)

    # Load input GeoJSON and next highest incase there is overlap at the edges
    for i in range(geojson_number, geojson_number + 2):
        with open(rf"C:\Users\Joe\Desktop\geojsons\circle{i}.geojson", 'r') as f:
            data = json.load(f)
    
        updated_features = []
        for feature in data.get("features", []):
            geom = shape(feature["geometry"])
            geom_utm = transform(to_utm, geom)
    
            # Remove overlap if present
            if geom_utm.intersects(circle_utm):
                diff = geom_utm.difference(circle_utm)
                if not diff.is_empty:
                    new_geom = transform(to_wgs84, diff)
                    updated_features.append({
                        "type": "Feature",
                        "geometry": mapping(new_geom),
                        "properties": feature.get("properties", {})
                    })
            else:
                updated_features.append(feature)
    
        # Overwrite the input file
        updated_geojson = {
            "type": "FeatureCollection",
            "features": updated_features
        }
    
        with open(rf"C:\Users\Joe\Desktop\geojsons\circle{i}.geojson", 'w') as f:
            json.dump(updated_geojson, f, indent=2)

    #print(f"Overlapping areas removed and geojson{geojson_number} has been updated.")


In [None]:
#create initial database
conn = mysql.connector.connect(
    host=HOST,
    user=USER,
    password=PASSWORD,
    database=DATABASE
)

cursor = conn.cursor()

create_table_query = """
CREATE TABLE IF NOT EXISTS businesses (
    business_id INT AUTO_INCREMENT PRIMARY KEY,
    business_name VARCHAR(255),
    primary_type VARCHAR(255),
    website VARCHAR(255),
    business_address VARCHAR(255),
    coordinates POINT NOT NULL,
    google_id VARCHAR(255) UNIQUE,
    SPATIAL INDEX(coordinates)
);
"""
create_business_types_query="""
CREATE TABLE IF NOT EXISTS business_types (
    id INT AUTO_INCREMENT PRIMARY KEY,
    google_id VARCHAR(255),
    type VARCHAR(100),
    FOREIGN KEY (google_id) REFERENCES businesses(google_id)
);
"""
create_survey_area_query="""
CREATE TABLE IF NOT EXISTS mapping_progress(
    id INT AUTO_INCREMENT PRIMARY KEY,
    circle_number INT,
    radius FLOAT,
    center POINT NOT NULL,
    focal_point VARCHAR(100),
    percent_mapped FLOAT,
    SPATIAL INDEX(center)
);
"""
cursor.execute(create_table_query)
conn.commit()
cursor.execute(create_business_types_query)
conn.commit()
cursor.execute(create_survey_area_query)
conn.commit()


cursor.close()
conn.close()

In [11]:
#upload data from places API call to sql
def sql_upload_data(filtered_places, HOST, USER, PASSWORD, DATABASE):
    conn = connect_with_retry(HOST, USER, PASSWORD, DATABASE)
    cursor = conn.cursor()
    
    business_data = []
    types_data=[]
    # Prepare business data
    for place in filtered_places:
        business_name = place.get('displayName', {}).get('text', '')
        primary_type = place.get('primaryType')
        if primary_type==None and place.get('types')!=None and place.get('types')!=[]:
            primary_type=place.get('types')[0]
        website = place.get('website')
        business_address = place.get('formattedAddress')
        latitude = place.get('location', {}).get('latitude', None)
        longitude = place.get('location', {}).get('longitude', None)
        google_id = place.get('id')  # This should be available in your data
            
        business_data.append((
            business_name,
            primary_type, 
            website,
            business_address,
            longitude,
            latitude,
            google_id
        ))

        #get all additional types
        if place.get('types')!=None and place.get('types')!=[]:
            for type_name in place['types']:
                types_data.append((
                    google_id,
                    type_name
                ))

    insert_business_query = """
        INSERT IGNORE INTO businesses (business_name, primary_type, website, business_address, coordinates, google_id)
        VALUES (%s, %s, %s, %s, POINT(%s, %s), %s)
    """
    insert_types_query= """
        INSERT IGNORE INTO business_types (google_id, type)
        VALUES (%s, %s)
    """
    cursor.executemany(insert_business_query, business_data)
    conn.commit()
    print(f"Total rows inserted: {cursor.rowcount}")
    cursor.executemany(insert_types_query, types_data)
    conn.commit()
    
    #print(f"Prepared {len(business_data)} businesses for insertion")


In [12]:
def connect_with_retry(HOST, USER, PASSWORD, DATABASE):
    while True:
        try:
            conn = mysql.connector.connect(
                host=HOST,
                user=USER,
                password=PASSWORD,
                database=DATABASE
            )
            return conn
        except mysql.connector.Error as err:
            print(f"❌ Connection failed: {err}")
            print("⏳ Retrying in 10 seconds...")
            time.sleep(10)

In [13]:
def collect_places_data(current_circle_number, focal_point):
    #get the current area of the circle
    current_area=calculate_geojson_area(rf"C:\Users\Joe\Desktop\geojsons\circle{current_circle_number}.geojson")
    #get the calculated maximum area of the circle
    with open(r"C:\Users\Joe\Desktop\geojsons\geojson_total_areas.json") as f:
        areas = json.load(f)
    max_area=areas[f"circle{current_circle_number}.geojson"]

    #loop until 97% of circle has been mapped
    while (current_area/max_area)>0.03:
        #generate a random point that has not been mapped
        randomPoint=get_random_point_within_geojson(rf"C:\Users\Joe\Desktop\geojsons\circle{current_circle_number}.geojson")
        #get google places data from that random point
        places_data=getPlaces(randomPoint[0],randomPoint[1], API_KEY)
        #get the size of the circle created from the google places data
        places_radius=get_max_distance(randomPoint[0],randomPoint[1], places_data)
        #filter the places data for non-businesses
        places_data=clean_places_data(places_data)
        #use selenium to add websites to places_data
        filtered_places=get_websites(filtered_places)
        #add places to database
        sql_upload_data(filtered_places, conn, cursor)
        #modify geojson to remove the area
        remove_overlap_with_circle(rf"C:\Users\Joe\Desktop\geojsons\circle{current_circle_number}.geojson", randomPoint[0], randomPoint[1], places_radius)
        #calculate new area
        current_area=calculate_geojson_area(rf"C:\Users\Joe\Desktop\geojsons\circle{current_circle_number}.geojson")
        #calculate current percentage mapped
        percentage_mapped=(current_area/max_area)
        #add point, radius, and new percentage mapped to database
        sql_upload_mapping_data(current_circle_number, places_radius, randomPoint[1],randomPoint[0], focal_point, percentage_mapped)
        

In [14]:
def sql_upload_mapping_data(circle_number, radius, longitude, latitude, focal_point, percent_mapped, HOST, USER, PASSWORD, DATABASE):
    conn = connect_with_retry(HOST, USER, PASSWORD, DATABASE)
    cursor = conn.cursor()

    insert_mapping_query = """
    INSERT IGNORE INTO mapping_progress (circle_number, radius, center, focal_point, percent_mapped)
    VALUES (%s, %s, POINT(%s, %s), %s, %s)
    """
    values = (circle_number, radius, longitude, latitude, focal_point, percent_mapped)
    cursor.execute(insert_mapping_query, values)
    conn.commit()


In [None]:
test_data=sql_upload_data((get_websites(clean_places_data(response.json()))), conn, cursor)
pprint (test_data)

In [None]:
def collect_places_data_test(current_circle_number, focal_point, api_key, HOST, USER, PASSWORD, DATABASE):
    #get the current area of the circle
    current_area=calculate_geojson_area(rf"C:\Users\Joe\Desktop\geojsons\circle{current_circle_number}.geojson")
    #get the calculated maximum area of the circle
    with open(r"C:\Users\Joe\Desktop\geojsons\geojson_total_areas.json") as f:
        areas = json.load(f)
    max_area=areas[f"circle{current_circle_number}.geojson"]
        #generate a random point that has not been mapped
    randomPoint=get_random_point_within_geojson(rf"C:\Users\Joe\Desktop\geojsons\circle{current_circle_number}.geojson")
        #get google places data from that random point
    places_data=getPlaces(randomPoint[0],randomPoint[1], api_key)
        #get the size of the circle created from the google places data
    places_radius=get_max_distance(randomPoint[0],randomPoint[1], places_data)
        #filter the places data for non-businesses
    filtered_places=clean_places_data(places_data)
        #use selenium to add websites to places_data
    filtered_places=get_websites(filtered_places)
        #add places to database
    sql_upload_data(filtered_places, HOST, USER, PASSWORD, DATABASE)
        #modify geojson to remove the area
    remove_overlap_with_circle(current_circle_number, randomPoint[0], randomPoint[1], places_radius)
        #calculate new area
    current_area=calculate_geojson_area(rf"C:\Users\Joe\Desktop\geojsons\circle{current_circle_number}.geojson")
        #calculate current percentage mapped
    percentage_mapped=(current_area/max_area)
        #add point, radius, and new percentage mapped to database
    sql_upload_mapping_data(current_circle_number, places_radius, randomPoint[1],randomPoint[0], focal_point, percentage_mapped, HOST, USER, PASSWORD, DATABASE)
    print(f"{randomPoint[0]}, {randomPoint[1]}, {places_radius}")
    print(f"percentage: {percentage_mapped}")
        

In [None]:
def collect_places_data_loop_49(focal_point, api_key, HOST, USER, PASSWORD, DATABASE):
    counter=0
    for i in range(1, 50):
        #get the current area of the circle
        current_area=calculate_geojson_area(rf"C:\Users\Joe\Desktop\geojsons\circle{i}.geojson")
        #get the calculated maximum area of the circle
        with open(r"C:\Users\Joe\Desktop\geojsons\geojson_total_areas.json") as f:
            areas = json.load(f)
        max_area=areas[f"circle{i}.geojson"]

        #loop until 97% of circle has been mapped
        while (current_area/max_area)>0.03:
                #generate a random point that has not been mapped
            randomPoint=get_random_point_within_geojson(rf"C:\Users\Joe\Desktop\geojsons\circle{i}.geojson")
                #get google places data from that random point
            places_data=getPlaces(randomPoint[0],randomPoint[1], api_key)
                #get the size of the circle created from the google places data
            places_radius=get_max_distance(randomPoint[0],randomPoint[1], places_data)
                #filter the places data for non-businesses
            filtered_places=clean_places_data(places_data)
                #use selenium to add websites to places_data
            filtered_places=get_websites(filtered_places)
                #add places to database
            sql_upload_data(filtered_places, HOST, USER, PASSWORD, DATABASE)
                #modify geojson to remove the area
            remove_overlap_with_circle(i, randomPoint[0], randomPoint[1], places_radius)
                #calculate new area
            current_area=calculate_geojson_area(rf"C:\Users\Joe\Desktop\geojsons\circle{i}.geojson")
                #calculate current percentage mapped
            percentage_mapped=(current_area/max_area)
                #add point, radius, and new percentage mapped to database
            sql_upload_mapping_data(i, places_radius, randomPoint[1],randomPoint[0], focal_point, percentage_mapped, HOST, USER, PASSWORD, DATABASE)
            counter+=1
            print(f"{counter} successful requests")
            #print(f"{randomPoint[0]}, {randomPoint[1]}, {places_radius}")
            #print(f"percentage: {percentage_mapped}")
            if counter>9990:
                return
        

In [None]:
#get all businesses within the first 10 miles of greenville
def collect_places_data_loop_10(focal_point, api_key, HOST, USER, PASSWORD, DATABASE):
    counter=0
    for i in range(1, 11):
        #get the current area of the circle
        current_area=calculate_geojson_area(rf"C:\Users\Joe\Desktop\geojsons\circle{i}.geojson")
        #get the calculated maximum area of the circle
        with open(r"C:\Users\Joe\Desktop\geojsons\geojson_total_areas.json") as f:
            areas = json.load(f)
        max_area=areas[f"circle{i}.geojson"]

        #loop until 99% of circle has been mapped
        while (current_area/max_area)>0.01:
                #generate a random point that has not been mapped
            randomPoint=get_random_point_within_geojson(rf"C:\Users\Joe\Desktop\geojsons\circle{i}.geojson")
                #get google places data from that random point
            places_data=getPlaces(randomPoint[0],randomPoint[1], api_key)
                #get the size of the circle created from the google places data
            places_radius=get_max_distance(randomPoint[0],randomPoint[1], places_data)
                #filter the places data for non-businesses
            filtered_places=clean_places_data(places_data)
                #use selenium to add websites to places_data
            filtered_places=get_websites(filtered_places)
                #add places to database
            sql_upload_data(filtered_places, HOST, USER, PASSWORD, DATABASE)
                #modify geojson to remove the area
            remove_overlap_with_circle(i, randomPoint[0], randomPoint[1], places_radius)
                #calculate new area
            current_area=calculate_geojson_area(rf"C:\Users\Joe\Desktop\geojsons\circle{i}.geojson")
                #calculate current percentage mapped
            percentage_mapped=(current_area/max_area)
                #add point, radius, and new percentage mapped to database
            sql_upload_mapping_data(i, places_radius, randomPoint[1],randomPoint[0], focal_point, percentage_mapped, HOST, USER, PASSWORD, DATABASE)
            counter+=1
            print(f"{counter} successful requests")
            #print(f"{randomPoint[0]}, {randomPoint[1]}, {places_radius}")
            #print(f"percentage: {percentage_mapped}")
            if counter>9990:
                return
        

In [None]:
collect_places_data_loop_10("Greenville", API_KEY, HOST, USER, PASSWORD, DATABASE)

In [15]:
def remove_overlap_with_secondary_circle(key, lat, lon, radius_meters):
    # Determine UTM zone
    geojson_number=int(''.join(filter(str.isdigit, key)))
    geojson_text=''.join(c for c in key if c.isalpha() or c == '_')
    utm_zone = int((lon + 180) / 6) + 1
    is_northern = lat >= 0

    crs_wgs84 = CRS("EPSG:4326")
    crs_utm = CRS.from_proj4(
        f"+proj=utm +zone={utm_zone} +datum=WGS84 +units=m +{'north' if is_northern else 'south'}"
    )

    # Set up projection transformers
    to_utm = Transformer.from_crs(crs_wgs84, crs_utm, always_xy=True).transform
    to_wgs84 = Transformer.from_crs(crs_utm, crs_wgs84, always_xy=True).transform

    # Create circle in UTM
    center_utm = transform(to_utm, Point(lon, lat))
    circle_utm = center_utm.buffer(radius_meters, resolution=64)

    # Load input GeoJSON and next highest incase there is overlap at the edges
    for i in range(geojson_number, geojson_number + 2):
        with open(rf"C:\Users\Joe\Desktop\geojsons\circle{i}{geojson_text}.geojson", 'r') as f:
            data = json.load(f)
    
        updated_features = []
        for feature in data.get("features", []):
            geom = shape(feature["geometry"])
            geom_utm = transform(to_utm, geom)
    
            # Remove overlap if present
            if geom_utm.intersects(circle_utm):
                diff = geom_utm.difference(circle_utm)
                if not diff.is_empty:
                    new_geom = transform(to_wgs84, diff)
                    updated_features.append({
                        "type": "Feature",
                        "geometry": mapping(new_geom),
                        "properties": feature.get("properties", {})
                    })
            else:
                updated_features.append(feature)
    
        # Overwrite the input file
        updated_geojson = {
            "type": "FeatureCollection",
            "features": updated_features
        }
    
        with open(rf"C:\Users\Joe\Desktop\geojsons\circle{i}{geojson_text}.geojson", 'w') as f:
            json.dump(updated_geojson, f, indent=2)


In [16]:
#collect all businesses within 10 miles of secondary cities
def collect_places_data_loop_10_secondary_areas(areas_json, api_key, HOST, USER, PASSWORD, DATABASE):
    counter=0
    with open(areas_json) as f:
        areas = json.load(f)
        
    for key, value in areas.items():
        #get the current area of the circle
        print(key)
        current_area=calculate_geojson_area(rf"C:\Users\Joe\Desktop\geojsons\circle{key}.geojson")
        print(current_area)
        if current_area== None:
            current_area=0
        #get the calculated maximum area of the circle
        max_area=areas[key]
        print(max_area)
        geojson_number=int(''.join(filter(str.isdigit, key)))
    
        #loop until 99% of circle has been mapped
        while current_area>0 and (current_area/max_area)>0.001:
                #generate a random point that has not been mapped
            randomPoint=get_random_point_within_geojson(rf"C:\Users\Joe\Desktop\geojsons\circle{key}.geojson")
                #get google places data from that random point
            places_data=getPlaces(randomPoint[0],randomPoint[1], api_key)
                #get the size of the circle created from the google places data
            places_radius=get_max_distance(randomPoint[0],randomPoint[1], places_data)
                #filter the places data for non-businesses
            filtered_places=clean_places_data(places_data)
                #use selenium to add websites to places_data
            filtered_places=get_websites(filtered_places)
                #add places to database
            sql_upload_data(filtered_places, HOST, USER, PASSWORD, DATABASE)
                #modify geojson to remove the area
            remove_overlap_with_secondary_circle(key, randomPoint[0], randomPoint[1], places_radius)
                #calculate new area
            current_area=calculate_geojson_area(rf"C:\Users\Joe\Desktop\geojsons\circle{key}.geojson")
            if current_area== None:
                current_area=0
                #calculate current percentage mapped
            percentage_mapped=(current_area/max_area)
                #add point, radius, and new percentage mapped to database
            sql_upload_mapping_data(geojson_number, places_radius, randomPoint[1],randomPoint[0], ''.join(c for c in key if c.isalpha() or c == '_'), percentage_mapped, HOST, USER, PASSWORD, DATABASE)
            counter+=1
            print(f"{counter} successful requests")
            #print(f"{randomPoint[0]}, {randomPoint[1]}, {places_radius}")
            #print(f"percentage: {percentage_mapped}")
            if counter>7241:
                return

In [17]:
collect_places_data_loop_10_secondary_areas(r"C:\Users\Joe\Desktop\geojsons\secondary_areas_total_areas.JSON", API_KEY, HOST, USER, PASSWORD, DATABASE)

1Seneca
returning 4203.172106507291
4203.172106507291
8135830.288069866
2Seneca
returning 22643.627648534708
22643.627648534708
24407490.86420819
3Seneca
returning 36836.77402530692
36836.77402530692
40679151.44034812
4Seneca
returning 1585.548127543976
1585.548127543976
56950812.01648402
5Seneca
returning 30443.620697236805
30443.620697236805
73222472.59262949
6Seneca
returning 84855.19479077455
84855.19479077455
89494133.16876283
7Seneca
returning 104997.67382501137
104997.67382501137
105765793.74490184
8Seneca
returning 94996.46299297479
94996.46299297479
122037454.32104409
9Seneca
returning 112948.53473960875
112948.53473960875
138309114.8971858
10Seneca
returning 117691.83566243196
117691.83566243196
154580775.10195208
1Clemson
None
0
2Clemson
None
0
3Clemson
None
0
4Clemson
returning 40.863983008000595
40.863983008000595
5384749.167036149
5Clemson
returning 11037.838375858653
11037.838375858653
21173137.545745417
6Clemson
returning 3475.8474905323355
3475.8474905323355
34161347.0

In [None]:
tester=get_websites(clean_places_data(getPlaces(34.843794,-82.397254, API_KEY)))

In [None]:
import mysql.connector

# Connect to your MySQL/MariaDB server
conn = mysql.connector.connect(
    host=HOST,
    user=USER,
    password=PASSWORD,
    database=DATABASE
)

cursor = conn.cursor()

# Fetch all data from the table
cursor.execute("SELECT id, circle_number, radius, ST_X(center) AS longitude, ST_Y(center) AS latitude, focal_point, percent_mapped FROM mapping_progress;")
results = cursor.fetchall()



In [None]:
print(len(results[0]))
for i in range(len(results)-1):
    print(f"iteration {i}")
    remove_overlap_with_circle(1, results[i][4], results[i][3], results[i][2])
    


In [None]:
print(calculate_geojson_area(rf"C:\Users\Joe\Desktop\geojsons\circle5.geojson"))