In [1]:
import requests
import json
import pandas as pd
import time
import math
import itertools
from tqdm import tqdm
import matplotlib.pyplot as plt

In [2]:
HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36',
}

In [3]:
entries = []
ids = set()

In [4]:
size = 1024
x_start = int(math.floor(2738000 / size))
y_start = int(math.floor(1251000 / size))
x_end = int(math.ceil(2750000 / size))
y_end = int(math.ceil(1258000 / size))

In [5]:
print(f'making >= {9*((x_end - x_start + 1)) * ((y_end - y_start + 1))} requests')

making >= 1134 requests


In [6]:
request_counter = {2**i: 0 for i in range(11)}

In [7]:
failed_tasks = []

In [8]:
def scrape_region(sx, sy, s):
    time.sleep(0.1)
    request_counter[s] += 1
    try:
        url = f"https://api3.geo.admin.ch/rest/services/all/MapServer/identify?geometry={sx+s//2}%2C{sy+s//2}&geometryFormat=geojson&geometryType=esriGeometryPoint&imageDisplay=1920%2C333%2C96&lang=de&layers=all%3Ach.bfs.gebaeude_wohnungs_register&limit=202&mapExtent={sx}%2C{sy}%2C{sx+s}%2C{sy+s}&returnGeometry=true&sr=2056&tolerance=100"
        r = requests.get(url=url, headers=HEADERS)
        result = json.loads(r.content)['results']
    except Exception as e:
        print(e)
        failed_tasks.append((sx, sy, s, e))
        return
    if len(result) <= 200 or s == 1:
        for e in result:
            fid = e['featureId']
            if fid not in ids:
                ids.add(fid)
                entries.append(e)
    else:
        scrape_region(sx, sy, s//2)
        scrape_region(sx+s//2, sy, s//2)
        scrape_region(sx, sy+s//2, s//2)
        scrape_region(sx+s//2, sy+s//2, s//2)

In [9]:
for x, y in tqdm(list(itertools.product(range(x_start * 3 - 2, x_end * 3 + 1), range(y_start * 3 - 2, y_end * 3 + 1)))):
    scrape_region(x/3*size, y/3*size, size)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1134/1134 [08:23<00:00,  2.25it/s]


In [10]:
# with open('gwr_scrape_large.json', 'w') as f:
    # json.dump(entries, f)

In [21]:
request_counter

{1: 0,
 2: 0,
 4: 0,
 8: 0,
 16: 0,
 32: 0,
 64: 0,
 128: 0,
 256: 0,
 512: 464,
 1024: 1134}

In [12]:
print(f'made a total of {sum(request_counter.values())} requests')

made a total of 1598 requests


In [13]:
failed_tasks

[]

In [14]:
len(entries)

30475

In [15]:
objects = []

In [16]:
useful_properties_gbde = [
    ('ggdename', 'city'),
    ('gdekt', 'canton'),
    ('gbauj', 'year'),
    ('strname', 'street_name'),
    ('deinr', 'street_number')
]
useful_properties_whng = [
    ('wmehrg', 'multiple_floors'),
    ('wazim', 'num_rooms'),
    ('warea', 'area'),
    ('wkche', 'kitchen'),
    ('wstat', 'status'), # mapping required
    ('ewid', 'floor'), # ?
]

In [17]:
entries

[{'type': 'Feature',
  'featureId': '191826477_0',
  'bbox': [2737142.9, 1250037.5, 2737142.9, 1250037.5],
  'layerBodId': 'ch.bfs.gebaeude_wohnungs_register',
  'layerName': 'Gebäude- und Wohnungsregister',
  'id': '191826477_0',
  'geometry': {'type': 'Point', 'coordinates': [2737142.9, 1250037.5]},
  'properties': {'egid': '191826477',
   'strname_deinr': 'Ramsen 4430',
   'plz_plz6': '9100/910000',
   'ggdename': 'Herisau',
   'ggdenr': 3001,
   'gexpdat': '10.11.2022',
   'gdekt': 'AR',
   'egrid': 'CH973577931310',
   'lgbkr': 0,
   'lparz': '1887',
   'lparzsx': None,
   'ltyp': None,
   'gebnr': '4430',
   'gbez': 'Waldhütte',
   'gkode': 2737142.873,
   'gkodn': 1250037.473,
   'gksce': 901,
   'gstat': 1004,
   'gkat': 1060,
   'gklas': 1274,
   'gbauj': None,
   'gbaum': None,
   'gbaup': None,
   'gabbj': None,
   'garea': 10,
   'gvol': None,
   'gvolnorm': None,
   'gvolsce': None,
   'gastw': None,
   'ganzwhg': None,
   'gazzi': None,
   'gschutzr': 0,
   'gebf': None,


In [18]:
for e in entries:
    if e['properties']['whgnr'] is None:
        continue
    o = {
        'coord_x': e['geometry']['coordinates'][0],
        'coord_y': e['geometry']['coordinates'][1],
    }
    for k, n in useful_properties_gbde:
        o[n] = e['properties'][k]
    for i in range(len(e['properties']['whgnr'])):
        oo = o.copy()
        for k, n in useful_properties_whng:
            oo[n] = e['properties'][k][i]
        objects.append(oo)

In [23]:
df = pd.DataFrame(objects)
df = df[df['city'] == 'St. Gallen']
df['street_name'] = df['street_name'].map(lambda x: x[0])
df.to_csv('data/gwr_cleaned_large.csv')