In [None]:
import requests
import googlemaps
import os
import pandas as pd
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)


#### Place your google API key (with full Maps API access) in the main project directory.

In [None]:

# Reading api key + cities to pull data for
parent_folder = os.path.dirname(os.path.dirname(__name__))
google_api_key = open(parent_folder + "google_maps_api_key.txt", "r").read()

In [None]:
### Given a coord, randomly select a nearby coord to grab. This is best for picking randomly inside countries, allowing for rural picks.

import geopandas as gpd
import random
import shapely.geometry as geom

#args: countries (list of countries we want), world (GPD object)
def gen_coords(countries, world):
    searching = True
    while searching:
        lat = random.uniform(-90, 90) ### Randomly selecting a latlong
        lon = random.uniform(-180, 180)
        ### Create a shapely Point from the latitude and longitude
        point = geom.Point(lon, lat)

        ### Create a GeoDataFrame with the random point
        point_gdf = gpd.GeoDataFrame(geometry=[point], crs="EPSG:4326")

        ### Perform a spatial join to find the country that contains the point
        result = gpd.sjoin(point_gdf, world, op='within')

        if not result.empty:
            country = result.iloc[0]['name']
            if country in countries:
                #print(f"The point ({lat}, {lon}) is in {country}, which is one of the target countries.")
                return (lat, lon, country)
            # else:
            #     print(f"Country is {country}. Trying again")



In [None]:
### Given a coord, randomly select a nearby coord to grab. This is best for picking within cities.
import geopandas as gpd
import random
import shapely.geometry as geom
#args: countries (list of countries we want), world (GPD object), urban_areas (gpkg object containing info on urban regions)
def city_coords(countries, world, urban_areas):
    searching = True
    while searching:
        lat = random.uniform(-90, 90)
        lon = random.uniform(-180, 180)

        # Create a shapely Point from the latitude and longitude
        point = geom.Point(lon, lat)

        # Create a GeoDataFrame with the random point
        point_gdf = gpd.GeoDataFrame(geometry=[point], crs="EPSG:4326")
        ### Casting to 50049
        point_gdf = point_gdf.to_crs(urban_areas.crs)

        # Perform a spatial join to find the urban area that contains the point
        result_c = gpd.sjoin(point_gdf, world.to_crs(urban_areas.crs), op='within')
        
        # Check if the point is within a target country
        if not result_c.empty:
            country = result_c.iloc[0]['name']
            if country in countries:
                # Perform a spatial join to find the urban area that contains the point
                result_u = gpd.sjoin(point_gdf, urban_areas, op='within')
                
                # Check if the point is within an urban area
                if not result_u.empty:
                    urban_area = result_u.iloc[0]['eFUA_name']
                    print(f"The point ({lat}, {lon}) is in the urban area: {urban_area}")
                    return (lat, lon, country)
                # else:
                #     print(f"The point ({lat}, {lon}) is in {country} but not in an urban area. Trying again.")
            # else:
            #     print(f"Country is {country}. Trying again.")

In [None]:
### Converting lat-long to street address
#args: gmaps = googlemaps object, coords = (lat, long)

def get_address(gmaps, coords):
    try:
        # Look up an address with reverse geocoding
        reverse_geocode_result = gmaps.reverse_geocode(coords)
        
        if reverse_geocode_result:
            geocode_components = reverse_geocode_result[0]
            coms = geocode_components['address_components']
            
            street_number = ""
            street_name = ""
            city = ""
            state = ""
            country = ""
            postal_code = ""
            
            for component in coms:
                if 'street_number' in component['types']:
                    street_number = component['long_name']
                elif 'route' in component['types']:
                    street_name = component['long_name']
                elif 'locality' in component['types']:
                    city = component['long_name']
                elif 'administrative_area_level_1' in component['types']:
                    state = component['long_name']
                elif 'country' in component['types']:
                    country = component['long_name']
                elif 'postal_code' in component['types']:
                    postal_code = component['long_name']
            
            address = f"{street_number} {street_name}, {city}, {state}, {country} {postal_code}"
            return city, address
        else:
            #print("No address found for the given coordinates.")
            return None, None
    
    except Exception as e:
        print(f"An error occurred: {str(e)}")
        return None, None

In [None]:
### gmaps object
gmaps = googlemaps.Client(key=google_api_key)

### list of target countries
country_list = pd.read_csv('./Data/country_list.csv')
european = country_list['country'].unique()

### GPD world object
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres')) ### Creating here so it doesn't need to be re-created every execution

### urban areas databse (GHS)
urban_areas = gpd.read_file('./Data/urban/GHS_FUA_UCDB2015_GLOBE_R2019A_54009_1K_V1_0.gpkg') ### A list of polygon-bounded coordinates


dataset_file = "./Data/gmaps/dataset.csv"

# Check if the dataset file exists
if os.path.exists(dataset_file):
    # If the file exists, load the dataset from the file
    dataset = pd.read_csv(dataset_file, index_col=0)
    start_index = len(dataset)
    print(f"Resuming from index {start_index}")
else:
    # If the file doesn't exist, create a new dataset DataFrame
    dataset = pd.DataFrame(columns=["lat", "lon", "country", "address"])
    start_index = 0

### Dataset to be saved for identification of each image w/ latlong 
dataset = pd.DataFrame(columns=["lat", "lon", "country", "address"])

N = 10000
for n in range(start_index, start_index + N):
    ### Selecting 33% rural (randomly sampled within the country) and 66% urban (bounded by urban_areas)
    searching = True
    while searching:
        if n % 3:
            lat, lon, country = city_coords(european, world, urban_areas)
        else:
            lat, lon, country = gen_coords(european, world)
        city, address = get_address(gmaps, (lat, lon))
        if city is None or address is None:
            continue

        ### Russia is gigantic. Skipping ~2/3 of all Russia hits. 
        if n % 3 and country == "Russia":
            continue

        # print(f"Address found for image {n}!")

        ### If we've found an address that exists in the database, we can try querying the API for a streetview image.
        pic_base = 'https://maps.googleapis.com/maps/api/streetview?'

        ### define the params for the picture request
        pic_params = {'key': google_api_key,
                'location': address,
                'size': "500x500"}
        
        ### Requesting data
        pic_response = requests.get(pic_base, params=pic_params)
        # print(f"API Response:")
        # print(f"Status Code: {pic_response.status_code}")
        # print(f"Headers: {pic_response.headers}")
        # print(f"Content Type: {pic_response.headers['Content-Type']}")
        # print(f"Content Length: {len(pic_response.content)} bytes")

        if pic_response.status_code == 200:
            ### If the image cannot be retrieved, gmaps will still return a blank image. The image size is typically ~6kB-9kB. 
            if len(pic_response.content) > 10000:
                image_name = f"{n}.jpg"
                image_path = parent_folder + "Data/gmaps/" + image_name
                
                with open(image_path, "wb") as file:
                    file.write(pic_response.content)
                dataset.loc[n] = [lat, lon, country, address]
                searching = False
                print(f"Image found for image {n} in {country}.")
            # else:
            #     print(f"Image size is too small, {len(pic_response.content)} bytes - no image exists.")
        else:
            print(f"Failed to retrieve street view image for address: {address}")

    if n % 100 == 0:
        print(f"Saved dataset at n = {n}.")
        dataset.to_csv(dataset_file)

dataset.to_csv(dataset_file)