# Building data download and home process
The building data are used to improve the home detection.

In [1]:
%load_ext autoreload
%autoreload 2
%cd D:\mobi-seg-net

D:\mobi-seg-net


In [10]:
import overturemaps
import geopandas as gpd
from shapely import wkb
from tqdm import tqdm
import workers as workers
import sqlalchemy
import io
import pandas as pd

In [3]:
# Data location
user = workers.keys_manager['database']['user']
password = workers.keys_manager['database']['password']
port = workers.keys_manager['database']['port']
db_name = workers.keys_manager['database']['name']
engine = sqlalchemy.create_engine(f'postgresql://{user}:{password}@localhost:{port}/{db_name}?gssencmode=disable')

In [4]:
bbox = (11.0273686052, 55.3617373725, 23.9033785336, 69.1062472602)

In [5]:
# need feature type and bounding box as arguments
# columns=['id', 'source', 'geometry', "class", "subtype"]
table = overturemaps.record_batch_reader("building", bbox).read_all()
table = table.combine_chunks()
# convert to dataframe
df = table.to_pandas()
# DataFrame to GeoDataFrame, set crs
gdf = gpd.GeoDataFrame(
    df,
    geometry=df['geometry'].apply(wkb.loads),
    crs="EPSG:4326"
)

In [6]:
print("No. of buildings", len(gdf))
gdf = gdf[['id', 'geometry', 'sources', 'level', 'subtype', 'class', 'height', 'names']]
gdf.loc[:, 'source'] = gdf['sources'].apply(lambda x: x[0]['dataset'])
gdf = gdf[['id', 'source', 'level', 'subtype', 'class', 'height', 'names', 'geometry']]

No. of buildings 12911247


## 1. Check included buildings' properties

In [7]:
# Get the proportion of rows for each unique value in 'source', 'subtype', and 'class'
share_source = gdf['source'].value_counts(normalize=True)
share_subtype = gdf['subtype'].value_counts(normalize=True)
share_class = gdf['class'].value_counts(normalize=True)

# Display the results
print("Share of rows in 'source':")
print(share_source)

print("\nShare of rows in 'subtype':")
print(share_subtype)

print("\nShare of rows in 'class':")
print(share_class)

Share of rows in 'source':
source
OpenStreetMap             0.536470
Microsoft ML Buildings    0.384917
Esri Community Maps       0.078613
Name: proportion, dtype: float64

Share of rows in 'subtype':
subtype
residential       0.773788
agricultural      0.079216
outbuilding       0.072056
commercial        0.022593
service           0.015951
industrial        0.013962
education         0.008331
civic             0.006593
religious         0.003560
transportation    0.001553
entertainment     0.001409
medical           0.000727
military          0.000262
Name: proportion, dtype: float64

Share of rows in 'class':
class
house             3.199343e-01
garage            1.396720e-01
detached          1.015720e-01
residential       5.624175e-02
shed              5.184506e-02
                      ...     
monastery         1.315063e-06
presbytery        8.767089e-07
wayside_shrine    4.383544e-07
shrine            4.383544e-07
glasshouse        4.383544e-07
Name: proportion, Length: 84, dty

In [18]:
gdf = gdf.loc[gdf['subtype'] == 'residential', :]
print("No. of residential buildings", len(gdf))

No. of residential buildings 1777134


## 2. Home - building association

In [19]:
gdf = gdf.to_crs(3006)
gdf['geometry'] = gdf['geometry'].buffer(50)
gdf = gdf.to_crs(4326)

In [24]:
gdf_home = workers.df2gdf_point(pd.read_sql("SELECT * FROM home", con=engine),
                                x_field='longitude', y_field='latitude', crs=4326, drop=False)
gdf_home = gpd.sjoin(gdf_home.drop_duplicates(subset=['device_aid']),
                     gdf[['id', 'class', 'source', 'geometry']], how="left", predicate="intersects")

In [25]:
print(len(gdf_home.dropna())/len(gdf_home))

0.8630734957611508


In [27]:
gdf_home = gdf_home.dropna()
gdf_home = gdf_home.drop_duplicates(subset=['device_aid'])
print("No. of individual devices with home", len(gdf_home))

No. of individual devices with home 1660987


### 2.1 Save filtered data

In [29]:
gdf_home[['device_aid', 'loc', 'latitude', 'longitude', 'count', 'id', 'class', 'source']].\
    rename(columns={'id': 'b_id', 'class': 'b_class', 'source': 'b_source'}).\
    to_sql('home_building', engine, schema='public', index=False, if_exists='replace')

987