# Download the buildings data

Question: How many buildings were mapped with RapID (Microsoft buildings) in selected region?

#### Import libs

In [1]:
import json
import os
import requests
import glob
from tqdm import tqdm
import time

### Get the data for every specified region

#### Definitions

In [2]:
os.chdir('..')


In [3]:
def get_geojson_names():

    home_dir = os.getcwd()
    geojson_dir = os.path.join(home_dir, f"geojson-regions", "")

    # Construct the file pattern
    file_pattern = os.path.join(geojson_dir, '*.geojson')
    # print(file_pattern)

    # Use glob to get the list of file names matching the pattern
    file_names = glob.glob(file_pattern)
    # print(file_names)

    # Extract the base names of the files without the extension
    names = [os.path.splitext(os.path.basename(file_name))[0]
            for file_name in file_names]
    # print(names)
    # Print the names

    return names
    

geojson_names = get_geojson_names()
geojson_names


['geojson-africa-states',
 'geojson-asia-states',
 'geojson-continent-states',
 'geojson-europe-states',
 'geojson-northamerica-states']

In [4]:
# def get_geojson_names():

#     # os.chdir('..')
#     home_dir = os.getcwd()
#     geojson_dir = os.path.join(home_dir, f"geojson-regions", "")

#     geojson_names = []
#     for root, dirs, files in os.walk(geojson_dir):
#         for file in files:
#             if file.endswith('.geojson'):
#                 file_path = os.path.join(root, file)
#                 file_name = os.path.splitext(os.path.basename(file_path))[1]
#                 geojson_names.append(file_name)
#         # geojson_names.extend(file_names)
#     print(geojson_names)
#     return geojson_names


# geojson_names = get_geojson_names()


In [5]:
geojson_dic = {k: v for k, v in enumerate(geojson_names)}
geojson_dic


{0: 'geojson-africa-states',
 1: 'geojson-asia-states',
 2: 'geojson-continent-states',
 3: 'geojson-europe-states',
 4: 'geojson-northamerica-states'}

In [6]:
# define the dictinaries

# data_dic = {0: "blds_continent-states_18-23_ai",
#             1: "blds_africa-states_18-23_ai",
#             2: "blds_northamerica-states_18-23_ai",
#             3: "blds_europe-states_18-23_ai",
#             4: "blds_asia-states_18-23_ai", }

# geojson_dic = {0: "geojson-continent-states",
#                       1: "geojson-africa-states",
#                       2: "geojson-northamerica-states",
#                       3: "geojson-europe-states",
#                       4: "geojson-asia-states",}

In [7]:
# define the user input

while True:
    if bool(geojson_dic):
        try:
            user_d_spec = int(input(f"Which data do you want to download?\
                                Enter {geojson_dic}"
                                    ))
        except ValueError:
            print("Please enter a number.")
            continue
        if user_d_spec not in range(len(geojson_dic)):
            print(f"Please enter {len(geojson_dic.keys())}")
            continue
        break
    else:
        print("Geojson_dic is empty, restart the kernel, please.")
        break

while True:
    try:
        user_d_format = int(input("Which data format do you want to export?\
                            Enter 0 for json format\
                            or 1 for geojson format"
                                ))
    except ValueError:
        print("Please enter a number.")
        continue
    if user_d_format not in range(0, 2):
        print("Please enter 0 or 1.")
        continue
    break


In [8]:
# define the functions

def convert_geojson_structure(original_dict):
    new_dict = {}
    new_dict["type"] = "FeatureCollection"
    new_dict["features"] = []

    feature_dict = {}
    feature_dict["type"] = "Feature"
    feature_dict["geometry"] = {}
    feature_dict["geometry"]["type"] = "MultiPolygon"

    new_dict["features"].append(feature_dict)

    feature_dict["geometry"]["coordinates"] = original_dict["geometry"]["coordinates"]
    # feature_dict["geometry"]["type"] = "MultiPolygon"
    feature_dict["properties"] = original_dict["properties"]

    return new_dict

def data_gen(feature):

    desired_geojson_structure = convert_geojson_structure(feature)

    parameters = {
        # pass GeoJSON as string.
        "bpolys": json.dumps(desired_geojson_structure),
        "filter": "building=* and building!=no and geometry:polygon",
        "groupByKey": "source",
        "groupByValues": "microsoft/BuildingFootprints",
        "format": "json",
        "time": "2018-01-01/2023-01-01/P1Y",
        }
    headers = {
        "accept": "application/json",
        "Content-Type": "application/x-www-form-urlencoded",
        }

    for value in parameters.values():
        assert value != "", "Please provide values for the parameters"

    while True:
        try:
            response = requests.post(url, data=parameters, headers=headers)
            response.raise_for_status()  # Raise an Exception if HTTP Status Code is not 200

            # print("Response:")
            # print(desired_geojson_structure["features"][0]["properties"]["NAME_EN"])
            # print(json.dumps(response.json(), indent=4))  # Pretty print response

            result = response.json()["groupByResult"]

            for state in result:
                state["groupByObject"][0] = desired_geojson_structure["features"][0]["properties"]["NAME_EN"]

            return result
        
        except requests.exceptions.RequestException:
            # Wi-Fi connection error occurred, wait for connection to be restored
            print("Waiting for Wi-Fi connection to be restored...")
            time.sleep(5)  # Wait for 5 seconds
            continue  # Continue to the next iteration of the loop

# londer version of the function
# def connect_feat_data(feature, data):

#     # Find the relevant value in data
#     rem_value_to_add = None
#     for elem in data:
#         if elem['groupByObject'][1] == 'remainder':
#             for result in elem['result']:
#                 if result['timestamp'] == '2023-01-01T00:00:00Z':
#                     rem_value_to_add = result['value']
#                     break
#             break


#     ai_value_to_add = None
#     for elem in data:
#         if elem['groupByObject'][1] == 'source=microsoft/BuildingFootprints':
#             for result in elem['result']:
#                 if result['timestamp'] == '2023-01-01T00:00:00Z':
#                     ai_value_to_add = result['value']
#                     break
#             break
#     # return value_to_add


#     blds_total = None
#     blds_total = rem_value_to_add + ai_value_to_add

#     # Calculate the AI percentage
#     ai_percentage = None
#     ai_percentage = (ai_value_to_add / (ai_value_to_add + rem_value_to_add)) * 100
    
#     # Add the value to the properties of feature
#     if rem_value_to_add is not None:
#         feature['properties']['mm_blds_2023'] = rem_value_to_add

#     if ai_value_to_add is not None:
#         feature['properties']['ai_blds_2023'] = ai_value_to_add

#     if blds_total is not None:
#         feature['properties']['blds_total'] = blds_total

#     if ai_percentage is not None:
#         feature['properties']['ai_percentage'] = ai_percentage

#     return feature


def connect_feat_data(feature, data):

    rem_value_to_add = None
    ai_value_to_add = None

    for elem in data:
        if elem['groupByObject'][1] == 'remainder':
            rem_value_to_add = find_value(elem, '2023-01-01T00:00:00Z')
        elif elem['groupByObject'][1] == 'source=microsoft/BuildingFootprints':
            ai_value_to_add = find_value(elem, '2023-01-01T00:00:00Z')

    blds_total = rem_value_to_add + ai_value_to_add\
        if rem_value_to_add is not None and ai_value_to_add is not None else None
    ai_percentage = (ai_value_to_add / (ai_value_to_add + rem_value_to_add)) * 100\
        if rem_value_to_add is not None and ai_value_to_add is not None else None

    if rem_value_to_add is not None:
        feature['properties']['mm_blds_2023'] = rem_value_to_add

    if ai_value_to_add is not None:
        feature['properties']['ai_blds_2023'] = ai_value_to_add

    if blds_total is not None:
        feature['properties']['blds_total'] = blds_total

    if ai_percentage is not None:
        feature['properties']['ai_percentage'] = ai_percentage

    return feature


def find_value(elem, timestamp):
    for result in elem['result']:
        if result['timestamp'] == timestamp:
            return result['value']
    return None


def return_json_data():

    data = []

    features = bpolys["features"]
    for i, feature in tqdm(enumerate(features), total=len(features)):

        obtained_data = data_gen(feature)
        # print(data)
        data.append(obtained_data)

    return data


def return_geojson_data():

    data = []

    features = bpolys["features"]
    for i, feature in tqdm(enumerate(features), total=len(features)):

        obtained_data = data_gen(feature)
        # print(data)
        feat_with_data = connect_feat_data(feature, obtained_data)
        # print(feat_with_data)
        # print(feature)
        data.append(feat_with_data)

    return data

# export the data


def export_data_as_json(name, data):
    with open(f"downloaded-data-json\\{name}.json", "w") as file:
        json.dump(data, file, indent=4)


# export_data_as_json(data_dic[user_d_spec], return_json_data)


def export_data_as_geojson(name, data):

    # Define the filename for the GeoJSON file
    filename = f'{name}.geojson'

    # Create a FeatureCollection from the structure
    feature_collection = {
        'type': 'FeatureCollection',
        'features': data
    }

    # Write the FeatureCollection to a GeoJSON file
    with open(f"downloaded-data-geojson\\{filename}", 'w') as file:
        json.dump(feature_collection, file, indent=4)


# export_data_as_geojson(data_dic[user_d_spec], return_geojson_data)


In [9]:
# define the URL

base_url = "https://api.ohsome.org/v1"
endpoint = "/elements/count/groupBy/boundary/groupBy/tag"
url = base_url + endpoint

#### Get the data

In [10]:
# read geojson data

# os.chdir('..')
# home_wd = os.getcwd()
# downloaded_data_geojson_dir = os.path.join(home_wd, "downloaded-data-geojson", "")

with open(f"geojson-regions\\{geojson_dic[user_d_spec]}.geojson", "r") as file:
    bpolys = json.load(file)


In [11]:
# convert_geojson_structure(bpolys["features"][3])


In [12]:
json_data = []
geojson_data = []

if user_d_format == 0:
    obtained_json_data = return_json_data()
    json_data.extend(obtained_json_data)
    export_data_as_json(geojson_dic[user_d_spec], obtained_json_data)
else:
    obtained_geojson_data = return_geojson_data()
    geojson_data.extend(obtained_geojson_data)
    export_data_as_geojson(geojson_dic[user_d_spec], obtained_geojson_data)


  6%|▌         | 3/53 [02:40<30:53, 37.07s/it]   

Waiting for Wi-Fi connection to be restored...
Waiting for Wi-Fi connection to be restored...
Waiting for Wi-Fi connection to be restored...
Waiting for Wi-Fi connection to be restored...
Waiting for Wi-Fi connection to be restored...
Waiting for Wi-Fi connection to be restored...
Waiting for Wi-Fi connection to be restored...
Waiting for Wi-Fi connection to be restored...
Waiting for Wi-Fi connection to be restored...
Waiting for Wi-Fi connection to be restored...
Waiting for Wi-Fi connection to be restored...
Waiting for Wi-Fi connection to be restored...
Waiting for Wi-Fi connection to be restored...


 25%|██▍       | 13/53 [10:49<24:31, 36.78s/it]  

In [None]:
import pandas as pd

In [None]:
d = {
    "feature": [],
    "source": [],
    "timestamp": [],
    "value": []
}

for region in json_data:
    for dic1 in region:
        feature, source = dic1["groupByObject"][0], dic1["groupByObject"][1]
        for res in dic1["result"]:
            d["feature"].append(feature)
            d["source"].append(source)
            d["timestamp"].append(res["timestamp"])
            d["value"].append(res["value"])

df = pd.DataFrame(d)


In [None]:
# 
df

Unnamed: 0,feature,source,timestamp,value
