In [None]:
import requests
import shutil
import os
import random

In [12]:
# function to donwload images from the googlestaticmap api
def download_images(coordinates, zoom=16):
    # define the request parameters
    url = "https://maps.googleapis.com/maps/api/staticmap?"
    api_key = ""
    size = "640x640"
    scale = "1"
    maptype = "satellite"

    # pose get request
    response = requests.get(url + "center=" + str(coordinates[0]) + "," + str(coordinates[1]) + "&zoom=" + str(zoom) + "&size=" + size + "&maptype=" + maptype + "&scale=" + scale + "&sensor=false" + "&key=" + api_key, stream=True,)
    #stream repsonse into file and save it
    with open(os.getcwd() + os.sep + "Data" + os.sep + "Images" + os.sep + str(coordinates[0]) + "_" + str(coordinates[1]) + ".png", "wb",) as out_file:
        shutil.copyfileobj(response.raw, out_file)
    #delete the repsonse arifact
    del response

In [13]:


# function to randomly pick coordinates around the world and generate those images
def load_random_sample(n=5):
    for i in range(n):
        random_lat = round(random.uniform(-90, 90), 3)
        random_lon = round(random.uniform(-180, 180), 3)
        download_images(coordinates=(random_lat, random_lon))

This approach doesn't work that well. The biggest reason is, that 70% of the earth is made out of water, so you would also expect this distribution in the randomly gathered images

Solution: Get random coordinates of a landuse, take image, repeat until enough examples

In [14]:
import geopandas as gpd
import osmnx as ox
import pandas as pd
import overpy
import random
from datapackage import Package
import math

In [15]:
#list with all landuses
dictionary_landuse = [
    "commercial",
    "construction",
    "education",
    "fairground",
    "industrial",
    "residential",
    "retail",
    "institutional",
    "aquaculture",
    "allotments",
    "farmland",
    "farmyard",
    "paddy",
    "animal_keeping",
    "flowerbed",
    "forest",
    "greenhouse_horticulture",
    "meadow",
    "orchard",
    "plant_nursery",
    "vineyard",
    "basin",
    "salt_pond",
    "brownfield",
    "cemetery",
    "depot",
    "garages",
    "grass",
    "greenfield",
    "landfill",
    "military",
    "port",
    "quarry",
    "railway",
    "recreation_ground",
    "religious",
    "village_green",
    "winter_sports",
]

In [16]:
# function to get the coordinates of a node using its openstreetmapid
def get_coordinates_from_id(osmid):
    
    # connect to overpass api endpoind
    api = overpy.Overpass()
    
    # query the api and save the response
    result = api.query(f"node({osmid});out;")

    for node in result.nodes:
        # extract coordinates from response
        coordinates = (float(node.lat), float(node.lon))
        
    # if coordinates are available return them
    try:
        return coordinates

    #else print error
    except:
        print(f"No coordinates for {osmid}")

In [17]:
# load dataset of all cities in the world with >15000 inhabitants
package = Package("https://datahub.io/core/world-cities/datapackage.json")

cities_list = package.get_resource("world-cities_csv").read()

In [18]:
# fetch random city contained in the dataset
def get_random_city():

    city = random.choice(cities_list)

    return city

In [19]:
# function to get one example for each landuse in a given city
def get_coordinates_for_each_landuse(city):
    coordinates = list()

    try:
        # query openstreetmap for all landuse data in a given city

        data = ox.features_from_place(
            "{}, {}".format(city[1], city[0]), tags={"landuse": True}
        )

        # reset the index to be able to extract osmid
        data = data.reset_index()

        # loop through each available landuse
        for type in data["landuse"].unique():
            # sort the data by the currently selected landuse
            landusedata = data.loc[
                (data["landuse"] == type) & (data["element_type"] == "way")
            ]

            # select a random example for the selected landuse
            id = landusedata.osmid.iloc[random.randint(0, len(landusedata.osmid) - 1)]

            # add the coordinates to the list
            coordinates.append(get_coordinates_from_id(id))

        # filter empty values out of the list
        coordinates = list(filter(lambda item: item is not None, coordinates))
        return coordinates

    # print an error if the provided city couldn't be found through the openstreetmap query
    except:
        print(f"City {city[0]} in {city[1]} not found")

Code to calculate the bounds of a given center point and zoom level on google maps (modified from [here](https://stackoverflow.com/questions/12507274/how-to-get-bounds-of-a-google-static-map))

In [20]:
def latLngToPoint(mapWidth, mapHeight, lat, lng):
    x = (lng + 180) * (mapWidth / 360)

    y = ((1 - math.log(math.tan(lat * math.pi / 180) + 1 / math.cos(lat * math.pi / 180)) / math.pi) / 2) * mapHeight

    return (x, y)



def pointToLatLng(mapWidth, mapHeight, x, y):
    lng = x / mapWidth * 360 - 180
    n = math.pi - 2 * math.pi * y / mapHeight
    lat = 180 / math.pi * math.atan(0.5 * (math.exp(n) - math.exp(-n)))


    return (lat, lng)



def getImageBounds(lat, lng, zoom):
    picHeight = 640
    picWidth = 640


    mapHeight = 256
    mapWidth = 256


    xScale = math.pow(2, zoom) / (picWidth / mapWidth)
    yScale = math.pow(2, zoom) / (picHeight / mapWidth)


    centreX, centreY = latLngToPoint(mapWidth, mapHeight, lat, lng)


    southWestX = centreX - (mapWidth / 2) / xScale
    southWestY = centreY + (mapHeight / 2) / yScale
    SWlat, SWlng = pointToLatLng(mapWidth, mapHeight, southWestX, southWestY)


    northEastX = centreX + (mapWidth / 2) / xScale
    northEastY = centreY - (mapHeight / 2) / yScale
    NElat, NElng = pointToLatLng(mapWidth, mapHeight, northEastX, northEastY)


    return [SWlat, SWlng, NElat, NElng]

In [25]:
# function to generate a set of images for n cities
def get_random_samples(n=1):
    # loop to do multiple cities

    for x in range(n):
        
        # get a random city
        city = get_random_city()
        
        # get the coordinates for one of all the different landuses
        coordinates = get_coordinates_for_each_landuse(city)

        try:
            
            # loop over each coordinate
            for coordinate in coordinates:

                try:
                    # check if data is actually present in the given area
                    south, east, north, west = getImageBounds(
                        float(coordinate[0]), float(coordinate[1]), 16
                    )
                    landuse = ox.features_from_bbox(
                        north, south, east, west, tags={"landuse": True}
                    )
                    # download the image of the given coordinate
                    download_images(coordinate)

                except:
                    print("No landuse data was found")

        except:
            print("Coordinates were returned empty")

In [28]:
# initial count of how much data is already in the folder
datanames = os.listdir(os.getcwd() + os.sep + "Data" + os.sep + "Images")

# loop as long as less then 5000 examples
while len(datanames) < 5000:

    get_random_samples()

    datanames = os.listdir(os.getcwd() + os.sep + "Data" + os.sep + "Images")

City Parit Buntar in Malaysia not found

No coordinates for 769549140
No coordinates for 314620483
No coordinates for 469846346
No coordinates for 469846345
No coordinates for 583791773
No coordinates for 1037789243

No coordinates for 50314878
No coordinates for 91938107
No coordinates for 691375586
No coordinates for 91717181
No coordinates for 147711876
No coordinates for 1024040722







No coordinates for 607777160






City Talamba in Pakistan not found

No coordinates for 240390683
No coordinates for 426850607
No coordinates for 284556816
No coordinates for 317346326
No coordinates for 726852885
No coordinates for 733243091
No coordinates for 427126343


City Barnsley in United Kingdom not found

No coordinates for 98441807
No coordinates for 47499082
No coordinates for 137269164
No coordinates for 769264475






No coordinates for 333220679
No coordinates for 334634666
No coordinates for 1219512720
No coordinates for 922752363
No coordinates for 1092041392
No coordinates for