In [61]:
import pandas as pd
import geopandas as gpd
import numpy as np
from shapely.wkt import loads
from shapely.geometry import Point, Polygon
import seaborn as sns
import geohash_hilbert as ghh

In [12]:
osm_df = pd.read_csv("../data/Stupino_OSM.csv")
osm_df.geometry = osm_df['geometry'].apply(loads)
osm_gdf = gpd.GeoDataFrame(osm_df, geometry='geometry', crs=4326)

In [13]:
osm_gdf.columns

Index(['element_type', 'osmid', 'geometry', 'building', 'nodes',
       'building:levels', 'addr:housenumber', 'addr:street', 'addr:city',
       'name', 'power', 'substation', 'shop', 'ref', 'amenity', 'covered',
       'man_made', 'addr:postcode', 'min_height', 'height', 'opening_hours',
       'type'],
      dtype='object')

In [14]:
osm_gdf.head()

Unnamed: 0,element_type,osmid,geometry,building,nodes,building:levels,addr:housenumber,addr:street,addr:city,name,...,shop,ref,amenity,covered,man_made,addr:postcode,min_height,height,opening_hours,type
0,way,38149921,"POLYGON ((38.08843 54.89178, 38.08841 54.89168...",apartments,"[449063448, 449063449, 449063450, 449063451, 4...",5,5,улица Тимирязева,Ступино,,...,,,,,,,,,,
1,way,38150506,"POLYGON ((38.08792 54.88927, 38.08812 54.88926...",apartments,"[449069926, 449069927, 4294693489, 4294693488,...",5,18/19,улица Тургенева,Ступино,,...,,,,,,,,,,
2,way,96695125,"POLYGON ((38.08118 54.89339, 38.08129 54.89400...",apartments,"[1119809344, 1119809343, 1119809359, 433084688...",5,43,Октябрьская улица,Ступино,,...,,,,,,,,,,
3,way,96695127,"POLYGON ((38.08185 54.89397, 38.08205 54.89396...",apartments,"[1119809355, 1119809351, 4330846905, 433084690...",5,41,Октябрьская улица,Ступино,,...,,,,,,,,,,
4,way,96695129,"POLYGON ((38.08257 54.89392, 38.08277 54.89391...",apartments,"[1119809358, 1119809348, 1119809349, 111980935...",5,39,Октябрьская улица,Ступино,,...,,,,,,,,,,


In [15]:
osm_gdf[['building', 'geometry']].explore(categorical=True, column='building')

In [16]:
hw = pd.read_csv("labeled_cluster_data.csv", index_col=0)
hw1 = pd.read_csv("..\\data\\2023-05-13_15-49-08\\labeled_cluster_data_2023-05-13_15-49-08.csv", index_col=0)

In [17]:
hw1.head()

Unnamed: 0,id,lat,lon,cluster,cluster_size,inq,wd_rate,wd_all,work_place
0,2.0,54.8436,38.1929,0.0,5056.0,46681.775,1.0,1.0,False
1,6.0,54.9034,38.0696,0.0,989.0,15146.533333,0.75,0.428571,True
2,6.0,54.8933,38.078,1.0,830.0,39723.665414,1.0,0.571429,True
5,13.0,54.8428,38.1908,0.0,2908.0,38369.617647,1.0,1.0,False
6,14.0,54.8419,38.1885,0.0,2213.0,35349.09434,1.0,1.0,False


In [18]:
hw.shape

(2624, 10)

In [19]:
geometry = [Point(xy) for xy in zip(hw['lon'], hw['lat'])]
hw_gdf = gpd.GeoDataFrame(hw, geometry=geometry, crs=4326)
hw_gdf.explore(categorical=True, column='work_place', cmap=['blue', 'green'])

In [20]:
hw_osm = hw_gdf.sjoin_nearest(osm_gdf, how="left", rsuffix='2')
living = hw_osm[hw_osm['building'].isin(['apartments', 'house', 'residential'])]
working = hw_osm[~hw_osm['building'].isin(['apartments', 'house', 'residential'])]




In [21]:
hw_osm.columns

Index(['id', 'lat', 'lon', 'cluster', 'cluster_size', 'inq', 'wd_rate',
       'wd_all', 'work_place', 'geometry', 'index_2', 'element_type', 'osmid',
       'building', 'nodes', 'building:levels', 'addr:housenumber',
       'addr:street', 'addr:city', 'name', 'power', 'substation', 'shop',
       'ref', 'amenity', 'covered', 'man_made', 'addr:postcode', 'min_height',
       'height', 'opening_hours', 'type'],
      dtype='object')

In [22]:
living.head()

Unnamed: 0,id,lat,lon,cluster,cluster_size,inq,wd_rate,wd_all,work_place,geometry,...,shop,ref,amenity,covered,man_made,addr:postcode,min_height,height,opening_hours,type
0,2.0,54.8436,38.1929,0.0,5056.0,46681.775,1.0,1.0,False,POINT (38.19290 54.84360),...,,,,,,,,,,
2,6.0,54.8933,38.078,1.0,830.0,39723.665414,1.0,0.571429,True,POINT (38.07800 54.89330),...,,,,,,,,,,
5,13.0,54.8428,38.1908,0.0,2908.0,38369.617647,1.0,1.0,False,POINT (38.19080 54.84280),...,,,,,,,,,,
6,14.0,54.8419,38.1885,0.0,2213.0,35349.09434,1.0,1.0,False,POINT (38.18850 54.84190),...,,,,,,,,,,
8,27.0,54.8848,38.0623,0.0,16631.0,38301.278897,1.0,1.0,False,POINT (38.06230 54.88480),...,,,,,,,,,,


In [57]:
hw_osm['hash'] = hw_osm[['lat', 'lon']].apply(lambda x: ghh.encode(x['lon'], x['lat'], precision=18, bits_per_char=2, ), axis=1)

In [62]:
geometry = [Polygon(
                ghh.rectangle(hash, bits_per_char=18)['geometry']['coordinates']
                ) 
                for hash in zip(hw_osm['hash'])
            ]
geohashes = gpd.GeoDataFrame(hw_osm['hash'])

AssertionError: 

{'type': 'Feature',
 'properties': {'code': '210123101103233300',
  'lng': 38.19328308105469,
  'lat': 54.843406677246094,
  'lng_err': 0.0006866455078125,
  'lat_err': 0.00034332275390625,
  'bits_per_char': 2},
 'bbox': (38.192596435546875, 54.84306335449219, 38.1939697265625, 54.84375),
 'geometry': {'type': 'Polygon',
  'coordinates': [[(38.192596435546875, 54.84306335449219),
    (38.1939697265625, 54.84306335449219),
    (38.1939697265625, 54.84375),
    (38.192596435546875, 54.84375),
    (38.192596435546875, 54.84306335449219)]]}}