# Download the buildings data

Question: How many buildings were mapped with RapID (Microsoft buildings) in selected region?

#### Import libs

In [1]:
import json
import os
import requests
import glob
from tqdm import tqdm
import time
import pandas as pd

### Get the data for every specified region

#### Definitions

In [2]:
os.chdir('..')


In [3]:
# initializing variables
desired_countries = ["US1", "NGA", "DEU", "CZE", "VNM"]

In [4]:
def get_geojson_names():

    home_dir = os.getcwd()
    geojson_dir = os.path.join(home_dir, f"geojson-regions", "")

    # Construct the file pattern
    file_pattern = os.path.join(geojson_dir, '*.geojson')
    # print(file_pattern)

    # Use glob to get the list of file names matching the pattern
    file_names = glob.glob(file_pattern)
    # print(file_names)

    # Extract the base names of the files without the extension
    names = [os.path.splitext(os.path.basename(file_name))[0]
            for file_name in file_names]
    # print(names)
    # Print the names

    return names
    

geojson_names = get_geojson_names()
geojson_names


['geojson-africa-states',
 'geojson-africa-test-states',
 'geojson-asia-states',
 'geojson-continent-states',
 'geojson-europe-states',
 'geojson-northamerica-states',
 'geojson-world-states-ohsome',
 'geojson-world-states']

In [5]:
# def get_geojson_names():

#     # os.chdir('..')
#     home_dir = os.getcwd()
#     geojson_dir = os.path.join(home_dir, f"geojson-regions", "")

#     geojson_names = []
#     for root, dirs, files in os.walk(geojson_dir):
#         for file in files:
#             if file.endswith('.geojson'):
#                 file_path = os.path.join(root, file)
#                 file_name = os.path.splitext(os.path.basename(file_path))[1]
#                 geojson_names.append(file_name)
#         # geojson_names.extend(file_names)
#     print(geojson_names)
#     return geojson_names


# geojson_names = get_geojson_names()


In [6]:
geojson_dic = {k: v for k, v in enumerate(geojson_names)}
geojson_dic


{0: 'geojson-africa-states',
 1: 'geojson-africa-test-states',
 2: 'geojson-asia-states',
 3: 'geojson-continent-states',
 4: 'geojson-europe-states',
 5: 'geojson-northamerica-states',
 6: 'geojson-world-states-ohsome',
 7: 'geojson-world-states'}

In [7]:
# define the dictinaries

# data_dic = {0: "blds_continent-states_18-23_ai",
#             1: "blds_africa-states_18-23_ai",
#             2: "blds_northamerica-states_18-23_ai",
#             3: "blds_europe-states_18-23_ai",
#             4: "blds_asia-states_18-23_ai", }

# geojson_dic = {0: "geojson-continent-states",
#                       1: "geojson-africa-states",
#                       2: "geojson-northamerica-states",
#                       3: "geojson-europe-states",
#                       4: "geojson-asia-states",}

In [8]:
if bool(geojson_dic):
    print("Dictionary is not empty")

Dictionary is not empty


In [9]:
# define the user input

while True:
    if bool(geojson_dic):
        try:
            user_d_spec = int(input(f"Which data do you want to download?\
                                Enter {geojson_dic}"
                                    ))
        except ValueError:
            print("Please enter a number.")
            continue
        if user_d_spec not in range(len(geojson_dic)):
            print(f"Please enter {len(geojson_dic.keys())}")
            continue
        break
    else:
        print("Geojson_dic is empty, restart the kernel, please.")
        break

while True:
    try:
        user_d_format = int(input("Which data format do you want to export?\
                            Enter 0 for json format\
                            or 1 for geojson format"
                                ))
    except ValueError:
        print("Please enter a number.")
        continue
    if user_d_format not in range(0, 2):
        print("Please enter 0 or 1.")
        continue
    break

while True:
    try:
        user_sel_countries = input("Do you want to download data for desired countries defined in a list in the code?\
                                        Enter y for yes or n for no"
                                )
    except ValueError as e:
        print("Error: ", e)
        continue
    if user_sel_countries != "y" and user_sel_countries != "n":
        print("Please enter y or n.")
        continue
    break


In [10]:
# read geojson data

# os.chdir('..')
# home_wd = os.getcwd()
# downloaded_data_geojson_dir = os.path.join(home_wd, "downloaded-data-geojson", "")

with open(f"geojson-regions\\{geojson_dic[user_d_spec]}.geojson", "r") as file:
    bpolys = json.load(file)


In [11]:
# define the functions

def convert_geojson_structure(original_dict):
    """Convert the GeoJSON structure to the structure required by the API."""

    new_dict = {}
    new_dict["type"] = "FeatureCollection"
    new_dict["features"] = []

    feature_dict = {}
    feature_dict["type"] = "Feature"
    feature_dict["geometry"] = {}
    feature_dict["geometry"]["type"] = "MultiPolygon"

    new_dict["features"].append(feature_dict)

    feature_dict["geometry"]["coordinates"] = original_dict["geometry"]["coordinates"]
    # feature_dict["geometry"]["type"] = "MultiPolygon"
    feature_dict["properties"] = original_dict["properties"]

    return new_dict

# append desired properties
def append_properties(result, desired_geojson_structure):
    """Add the desired properties to the result of the API call."""

    desired_properties = ["ADM0_ISO", "SOVEREIGNT",
                          "TYPE", "ADMIN", "GEOUNIT", "NAME", "NE_ID",
                          "POP_EST", "POP_RANK", "POP_YEAR", "GDP_MD", "GDP_YEAR",
                          "ECONOMY", "INCOME_GRP", "CONTINENT", "REGION_UN", "SUBREGION", "REGION_WB"]
    
    for region in result:
        region["groupByObject"][0] = desired_geojson_structure["features"][0]["properties"]["NAME_EN"]


        for property in desired_properties:
            region["groupByObject"].append(desired_geojson_structure["features"][0]["properties"][property])

    return result

def data_gen(feature):
    """ Get data from the API."""

    desired_geojson_structure = convert_geojson_structure(feature)

    parameters = {
        # pass GeoJSON as string.
        "bpolys": json.dumps(desired_geojson_structure),
        "filter": "building=* and building!=no and geometry:polygon",
        "groupByKey": "source",
        "groupByValues": "microsoft/BuildingFootprints,esri/Google_Africa_Buildings",
        "format": "json",
        "time": "2023-06-30",  # "2018-01-01/2023-06-30/P1Y" 2023-06-30
        }
    headers = {
        "accept": "application/json",
        "Content-Type": "application/x-www-form-urlencoded",
        }

    for value in parameters.values():
        assert value != "", "Please provide values for the parameters"

    while True:
        try:
            response = requests.post(url, data=parameters, headers=headers)
            response.raise_for_status()  # Raise an Exception if HTTP Status Code is not 200

            # print("Response:")
            # print(desired_geojson_structure["features"][0]["properties"]["NAME_EN"])
            # print(json.dumps(response.json(), indent=4))  # Pretty print response

            result = response.json()["groupByResult"]

            # for region in result:
            #   region["groupByObject"][0] = desired_geojson_structure["features"][0]["properties"]["NAME_EN"]

            #   region["groupByObject"].append(desired_geojson_structure["features"][0]["properties"]["POP_EST"])

            fin_res = append_properties(result, desired_geojson_structure)


            # print(json.dumps(fin_res, indent=4))  # Pretty print response
            return fin_res
        
        except requests.exceptions.RequestException:
            # Wi-Fi connection error occurred, wait for connection to be restored
            print("Waiting for Wi-Fi connection to be restored...")
            time.sleep(5)  # Wait for 5 seconds
            continue  # Continue to the next iteration of the loop

# longer version of the function
# def connect_feat_data(feature, data):

#     # Find the relevant value in data
#     rem_value_to_add = None
#     for elem in data:
#         if elem['groupByObject'][1] == 'remainder':
#             for result in elem['result']:
#                 if result['timestamp'] == '2023-01-01T00:00:00Z':
#                     rem_value_to_add = result['value']
#                     break
#             break


#     ai_value_to_add = None
#     for elem in data:
#         if elem['groupByObject'][1] == 'source=microsoft/BuildingFootprints':
#             for result in elem['result']:
#                 if result['timestamp'] == '2023-01-01T00:00:00Z':
#                     ai_value_to_add = result['value']
#                     break
#             break
#     # return value_to_add


#     blds_total = None
#     blds_total = rem_value_to_add + ai_value_to_add

#     # Calculate the AI percentage
#     ai_percentage = None
#     ai_percentage = (ai_value_to_add / (ai_value_to_add + rem_value_to_add)) * 100
    
#     # Add the value to the properties of feature
#     if rem_value_to_add is not None:
#         feature['properties']['mm_blds_2023'] = rem_value_to_add

#     if ai_value_to_add is not None:
#         feature['properties']['ai_blds_2023'] = ai_value_to_add

#     if blds_total is not None:
#         feature['properties']['blds_total'] = blds_total

#     if ai_percentage is not None:
#         feature['properties']['ai_percentage'] = ai_percentage

#     return feature


def connect_feat_data(feature, data):
    """Connects the data to the feature of the GeoJSON file"""

    rem_value_to_add = None
    ai_value_to_add = None

    for elem in data:
        if elem['groupByObject'][1] == 'remainder':
            rem_value_to_add = find_value(elem, '2023-06-30T00:00:00Z')
        elif elem['groupByObject'][1] == 'source=microsoft/BuildingFootprints':
            ai_value_to_add = find_value(elem, '2023-06-30T00:00:00Z') # T00:00:00Z

    blds_total = rem_value_to_add + ai_value_to_add\
        if rem_value_to_add is not None and ai_value_to_add is not None else None
    ai_percentage = (ai_value_to_add / (ai_value_to_add + rem_value_to_add)) * 100\
        if rem_value_to_add is not None and ai_value_to_add is not None else None

    if rem_value_to_add is not None:
        feature['properties']['mm_blds_2023'] = rem_value_to_add

    if ai_value_to_add is not None:
        feature['properties']['ai_blds_2023'] = ai_value_to_add

    if blds_total is not None:
        feature['properties']['blds_total'] = blds_total

    if ai_percentage is not None:
        feature['properties']['ai_percentage'] = ai_percentage

    return feature


def sel_countries_from_geojson(bpolys, desired_countires):
    """Get the celected countries from the geojson file."""

    sel_countries = {"features": []}

    # desired_countries = ["US1", "NGA", "DEU", "CZE", "VNM"]
    while True:
        try:
            for i in range(len(bpolys["features"])):
                if bpolys["features"][i]["properties"]["ADM0_A3"] in desired_countries:
                    sel_country = bpolys["features"][i]
                    sel_countries["features"].append(sel_country)
                    print(i, bpolys["features"][i]["properties"]["ADM0_A3"])
                    desired_countries.remove(bpolys["features"][i]["properties"]["ADM0_A3"])
        except KeyError:
            print("You are trying to access a key that does not exist. Please try to use a geojson format in different structure.")   
            break
        break
        
    print(f"Countries not in downloaded data: {desired_countries}")

    return sel_countries


def find_value(elem, timestamp):
    for result in elem['result']:
        if result['timestamp'] == timestamp:
            return result['value']
    print(f"Value for {timestamp} not found.")
    return None


def return_json_data(bpolys=bpolys):

    data = []

    features = bpolys["features"]
    for i, feature in tqdm(enumerate(features), total=len(features)):

        obtained_data = data_gen(feature)
        # print(data)
        data.append(obtained_data)

    return data


def return_geojson_data(bpolys=bpolys):

    data = []

    features = bpolys["features"]
    for i, feature in tqdm(enumerate(features), total=len(features)):

        obtained_data = data_gen(feature)
        # print(data)
        feat_with_data = connect_feat_data(feature, obtained_data)
        # print(feat_with_data)
        # print(feature)
        data.append(feat_with_data)

    return data

# export the data
def export_data_as_json(name, data):
    with open(f"downloaded-data-json\\blds_{name}.json", "w") as file:
        json.dump(data, file, indent=4)


# export_data_as_json(data_dic[user_d_spec], return_json_data)


def export_data_as_geojson(name, data):

    # Define the filename for the GeoJSON file
    filename = f'blds_{name}.geojson'

    # Create a FeatureCollection from the structure
    feature_collection = {
        'type': 'FeatureCollection',
        'features': data
    }

    # Write the FeatureCollection to a GeoJSON file
    with open(f"downloaded-data-geojson\\{filename}", 'w') as file:
        json.dump(feature_collection, file, indent=4)


# export_data_as_geojson(data_dic[user_d_spec], return_geojson_data)


In [12]:
# define the URL

base_url = "https://api.ohsome.org/v1"
endpoint = "/elements/count/groupBy/boundary/groupBy/tag"
url = base_url + endpoint

#### Get the data

In [13]:
# convert_geojson_structure(bpolys["features"][3])

In [14]:
json_data = []
geojson_data = []

fin_file_name = input("Enter the name of the file to export the data: ")

if user_d_format == 0: # 0 for json format
    if user_sel_countries == "y": # y just for selected countries
        sel_countries = sel_countries_from_geojson(bpolys, desired_countries)
        obtained_json_data = return_json_data(sel_countries)
        json_data.extend(obtained_json_data)
        export_data_as_json(fin_file_name, obtained_json_data)
    else:
        obtained_json_data = return_json_data()
        json_data.extend(obtained_json_data)
        export_data_as_json(fin_file_name, obtained_json_data)
else:
    if user_sel_countries == "y": # y just for selected countries
        sel_countries = sel_countries_from_geojson(bpolys, desired_countries)
        obtained_geojson_data = return_geojson_data(sel_countries)
        geojson_data.extend(obtained_geojson_data)
        export_data_as_geojson(fin_file_name, obtained_geojson_data)
    else:    
        obtained_geojson_data = return_geojson_data()
        geojson_data.extend(obtained_geojson_data)
        export_data_as_geojson(fin_file_name, obtained_geojson_data)
print("Finished!")

 72%|███████▏  | 150/209 [58:46<1:16:04, 77.37s/it]

Waiting for Wi-Fi connection to be restored...
Waiting for Wi-Fi connection to be restored...
Waiting for Wi-Fi connection to be restored...
Waiting for Wi-Fi connection to be restored...
Waiting for Wi-Fi connection to be restored...
Waiting for Wi-Fi connection to be restored...
Waiting for Wi-Fi connection to be restored...
Waiting for Wi-Fi connection to be restored...
Waiting for Wi-Fi connection to be restored...
Waiting for Wi-Fi connection to be restored...
Waiting for Wi-Fi connection to be restored...
Waiting for Wi-Fi connection to be restored...
Waiting for Wi-Fi connection to be restored...
Waiting for Wi-Fi connection to be restored...
Waiting for Wi-Fi connection to be restored...
Waiting for Wi-Fi connection to be restored...
Waiting for Wi-Fi connection to be restored...
Waiting for Wi-Fi connection to be restored...
Waiting for Wi-Fi connection to be restored...
Waiting for Wi-Fi connection to be restored...
Waiting for Wi-Fi connection to be restored...
Waiting for W

100%|██████████| 209/209 [1:42:25<00:00, 29.40s/it]   


Finished!


In [15]:
if user_d_format == 0:  # 0 for json format
    with open(f"downloaded-data-json\\blds_{fin_file_name}.json", "r") as file:
        dow_res = json.load(file)
else:
    print("Stopping the script.")
    raise SystemExit

Stopping the script.


SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [None]:
def transform_downloaded_data(dow_res):

    new_dic = {"state": [], "source": [], "timestamp": [], "value": []}

    flatten_data = [i for i in dow_res for i in i]

    for dic in flatten_data:
        # print(dic)
        state = dic["groupByObject"][0]
        source = dic["groupByObject"][1]

        for d in dic["result"]:
            timestamp = d["timestamp"]
            value = d["value"]

            # Append the corresponding state and source for each timestamp-value pair
            new_dic["state"].append(state)
            new_dic["source"].append(source)
            new_dic["timestamp"].append(timestamp)
            new_dic["value"].append(value)

    return new_dic

In [None]:
new_structure = transform_downloaded_data(dow_res)
pd.DataFrame(new_structure)

Unnamed: 0,state,source,timestamp,value
0,Czech Republic,remainder,2018-01-01T00:00:00Z,4434499.0
1,Czech Republic,remainder,2019-01-01T00:00:00Z,4506078.0
2,Czech Republic,remainder,2020-01-01T00:00:00Z,4602808.0
3,Czech Republic,remainder,2021-01-01T00:00:00Z,4656573.0
4,Czech Republic,remainder,2022-01-01T00:00:00Z,4707705.0
...,...,...,...,...
85,United States of America,source=esri/Google_Africa_Buildings,2019-01-01T00:00:00Z,0.0
86,United States of America,source=esri/Google_Africa_Buildings,2020-01-01T00:00:00Z,0.0
87,United States of America,source=esri/Google_Africa_Buildings,2021-01-01T00:00:00Z,0.0
88,United States of America,source=esri/Google_Africa_Buildings,2022-01-01T00:00:00Z,0.0


In [None]:
# d = {
#     "feature": [],
#     "source": [],
#     "timestamp": [],
#     "value": []
# }

# for region in json_data:
#     for dic1 in region:
#         feature, source = dic1["groupByObject"][0], dic1["groupByObject"][1]
#         for res in dic1["result"]:
#             d["feature"].append(feature)
#             d["source"].append(source)
#             d["timestamp"].append(res["timestamp"])
#             d["value"].append(res["value"])

# df = pd.DataFrame(d)
