In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
from geopandas.tools import sjoin

from h3 import h3
from shapely.geometry import Polygon
from shapely.geometry import box
import matplotlib.pyplot as plt
import transbigdata as tbd
from matplotlib.colors import Normalize

from palettable.colorbrewer.diverging import Spectral_9_r
from matplotlib.colors import LinearSegmentedColormap

custom_cmap = LinearSegmentedColormap.from_list('custom_cmap', Spectral_9_r.mpl_colors)


Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [45]:
accessibility_data = gpd.read_file('POI/Target_area/accessibility_data.shp') # accessibility gain
accessibility_data = accessibility_data.drop(columns=['index'])
accessibility_data.rename(columns={'accessibil': 'accessibility'}, inplace=True)

accessibility_data = accessibility_data.to_crs(3006)
accessibility_data['area'] = accessibility_data.geometry.area / (10**6)
accessibility_data = accessibility_data.to_crs(4326)

accessibility_data.head()

Unnamed: 0,hex_id,accessibility,geometry,area
0,891f25ad0dbffff,119,"POLYGON ((12.18021 57.74436, 12.18286 57.74529...",0.081623
1,891f2507103ffff,0,"POLYGON ((11.82421 57.65409, 11.82685 57.65503...",0.081692
2,891f253aa8bffff,0,"POLYGON ((12.07923 57.56624, 12.08188 57.56718...",0.082003
3,891f2506a8bffff,38454,"POLYGON ((11.93287 57.66635, 11.93551 57.66729...",0.081708
4,891f2532257ffff,165,"POLYGON ((12.10123 57.68717, 12.10388 57.68811...",0.081726


# Building POIs

In [250]:
POI_building = gpd.read_file('POI/Target_area/POI_building.shp') 

def merge_fclasses(fclass):
    if fclass in ['bungalow','allotment_house','mixed','semidetached_house','detached', 'dormitory','hut','semi','house',
                  'residential','apartments','cabin','shelter', 'hostel','terrace']:
        return 'LUP_Resi'
    elif fclass in ['retail','supermarket','hotel','hangar','factory','store','industrial','office','nursery','commercial',
                    'restaurant','warehouse','cafe','shop','kiosk','brewery','control_tower','dock','manufacture','bridge']:
        return 'LUP_Comm' 
    elif fclass in ['sports_hall','park','theatre','church','religious','cathedral','social_facility','subway_station',
                    'cultural','palace','gazebo','sport', 'sports_centre','theatre','stadium','ruins','synagogue','chapel',
                    'pavilion','mosque','cinema','museum','conference_centre','concert_hall','riding_hall', 'houseboat',
                    'boathouse','static_caravan','grandstand','synagogue','boat']:
        return 'LUP_Recre'
    elif fclass in ['university','school','college','kindergarten','riding_school']:
        return 'LUP_Edu'
    elif fclass in ['train_station','civic','library','community_centre','bank','atm','post_office', 'toilets', 'water_tower'
                    'government','public','railway_station','depot','observatory','embassy', 'parking', 'historic',
                    'transportation','fire_station','tower']:
        return 'LUP_Public'
    elif fclass in ['hospital', 'clinic']:
        return 'LUP_Health'
    else:
        return 'LUP_Other'
    
POI_building['major_clas'] = POI_building['type'].apply(merge_fclasses)
print('length of total POI_building = %d ' % len(POI_building) )
POI_building.head()

length of total POI_building = 144977 


Unnamed: 0,osm_id,code,fclass,name,type,geometry,major_clas
0,4714675,1500,building,,parking,"POLYGON ((11.85957 57.72728, 11.85959 57.72780...",LUP_Public
1,4732220,1500,building,,parking,"POLYGON ((11.85360 57.73053, 11.85433 57.73092...",LUP_Public
2,4767614,1500,building,,parking,"POLYGON ((11.98110 57.69712, 11.98178 57.69745...",LUP_Public
3,4769938,1500,building,Volvohallen,industrial,"POLYGON ((11.84796 57.72443, 11.84810 57.72447...",LUP_Comm
4,4769946,1500,building,,parking,"POLYGON ((11.86237 57.72717, 11.86283 57.72774...",LUP_Public


In [237]:
POI_building['type'].unique()

array(['parking', 'industrial', None, 'train_station', 'transportation',
       'office', 'retail', 'school', 'sports_hall', 'hospital', 'public',
       'cathedral', 'university', 'apartments', 'riding_hall',
       'farm_auxiliary', 'church', 'kindergarten', 'residential',
       'warehouse', 'commercial', 'service', 'supermarket', 'garages',
       'government', 'terrace', 'garage', 'shed', 'detached', 'house',
       'dormitory', 'construction', 'roof', 'water_tower', 'hotel',
       'sports_centre', 'greenhouse', 'semidetached_house', 'kiosk',
       'chapel', 'mosque', 'civic', 'grandstand', 'toilets', 'synagogue',
       'carport', 'bungalow', 'storage_tank', 'pavilion', 'barn',
       'bridge', 'dock', 'fire_station', 'religious', 'ship', 'farm',
       'boat', 'shop', 'houseboat', 'historic', 'allotment_house',
       'stable', 'brewery', 'college', 'cabin', 'manufacture', 'tower',
       'boathouse', 'ruins', 'hut', 'bunker', 'control_tower',
       'static_caravan', 'restaur

In [251]:
intersected_building = gpd.overlay(POI_building, accessibility_data, how='intersection')
print('length of total intersected POI_building = %d ' % len(intersected_building) )
intersected_building.head()

length of total intersected POI_building = 137589 


Unnamed: 0,osm_id,code,fclass,name,type,major_clas,hex_id,accessibility,area,geometry
0,4714675,1500,building,,parking,LUP_Public,891f251494bffff,22735,0.081531,"POLYGON ((11.85959 57.72780, 11.86057 57.72779..."
1,4714675,1500,building,,parking,LUP_Public,891f2514b97ffff,3270,0.081529,"POLYGON ((11.85958 57.72737, 11.85959 57.72728..."
2,4732220,1500,building,,parking,LUP_Public,891f2514827ffff,0,0.081518,"POLYGON ((11.85474 57.73130, 11.85477 57.73174..."
3,4732220,1500,building,,parking,LUP_Public,891f251482fffff,3,0.081523,"POLYGON ((11.85433 57.73092, 11.85445 57.73102..."
4,4767614,1500,building,,parking,LUP_Public,891f2506d07ffff,35736,0.081655,"POLYGON ((11.98178 57.69745, 11.98190 57.69738..."


In [252]:
intersected_building['count']=1
result_poi_building = intersected_building.groupby(['hex_id', 'major_clas']).agg({'count': 'sum'})
result_poi_building = result_poi_building.reset_index(drop = False)
result_poi_building.head()

Unnamed: 0,hex_id,major_clas,count
0,891f2504003ffff,LUP_Other,77
1,891f2504003ffff,LUP_Resi,6
2,891f2504007ffff,LUP_Other,17
3,891f2504007ffff,LUP_Resi,6
4,891f250400bffff,LUP_Edu,2


In [253]:
result_poi_building['LUP_Comm'] = result_poi_building['count'][result_poi_building['major_clas'] == 'LUP_Comm']
result_poi_building['LUP_Other'] = result_poi_building['count'][result_poi_building['major_clas'] == 'LUP_Other']
result_poi_building['LUP_Recre'] = result_poi_building['count'][result_poi_building['major_clas'] == 'LUP_Recre']
result_poi_building['LUP_Public'] = result_poi_building['count'][result_poi_building['major_clas'] == 'LUP_Public']
result_poi_building['LUP_Resi'] = result_poi_building['count'][result_poi_building['major_clas'] == 'LUP_Resi']
result_poi_building['LUP_Edu'] = result_poi_building['count'][result_poi_building['major_clas'] == 'LUP_Edu']
result_poi_building['LUP_Health'] = result_poi_building['count'][result_poi_building['major_clas'] == 'LUP_Health']

result_poi_building = result_poi_building.drop(columns=['major_clas', 'count'])
result_poi_building

Unnamed: 0,hex_id,LUP_Comm,LUP_Other,LUP_Recre,LUP_Public,LUP_Resi,LUP_Edu,LUP_Health
0,891f2504003ffff,,77.0,,,,,
1,891f2504003ffff,,,,,6.0,,
2,891f2504007ffff,,17.0,,,,,
3,891f2504007ffff,,,,,6.0,,
4,891f250400bffff,,,,,,2.0,
...,...,...,...,...,...,...,...,...
8439,891f25b9dabffff,,6.0,,,,,
8440,891f25b9db3ffff,,1.0,,,,,
8441,891f25b9db7ffff,,12.0,,,,,
8442,891f25b9dbbffff,,3.0,,,,,


In [254]:
result_poi_building = result_poi_building.groupby(['hex_id']).sum()
result_poi_building

Unnamed: 0_level_0,LUP_Comm,LUP_Other,LUP_Recre,LUP_Public,LUP_Resi,LUP_Edu,LUP_Health
hex_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
891f2504003ffff,0.0,77.0,0.0,0.0,6.0,0.0,0.0
891f2504007ffff,0.0,17.0,0.0,0.0,6.0,0.0,0.0
891f250400bffff,0.0,29.0,0.0,0.0,0.0,2.0,0.0
891f250400fffff,0.0,14.0,0.0,0.0,8.0,0.0,0.0
891f2504013ffff,0.0,49.0,0.0,0.0,17.0,0.0,0.0
...,...,...,...,...,...,...,...
891f25b9dabffff,0.0,6.0,0.0,0.0,0.0,0.0,0.0
891f25b9db3ffff,0.0,1.0,0.0,0.0,0.0,0.0,0.0
891f25b9db7ffff,0.0,12.0,0.0,0.0,0.0,0.0,0.0
891f25b9dbbffff,0.0,3.0,0.0,0.0,0.0,0.0,0.0


In [255]:
result_poi_building.sum()

LUP_Comm        1474.0
LUP_Other     105670.0
LUP_Recre        357.0
LUP_Public       293.0
LUP_Resi       28689.0
LUP_Edu         1008.0
LUP_Health        98.0
dtype: float64

# Land use Polygon POIs

In [256]:
POI_land_use = gpd.read_file('POI/Target_area/POI_land_use.shp')

POI_land_use['fclass'].unique()

array(['school', 'park', 'zoo', 'kindergarten', 'graveyard', 'pitch',
       'playground', 'sports_centre', 'prison', 'furniture_shop',
       'stadium', 'convenience', 'hospital', 'hotel', 'university',
       'library', 'ice_rink', 'supermarket', 'golf_course', 'mall',
       'fort', 'fountain', 'fast_food', 'camp_site', 'cafe', 'kiosk',
       'bakery', 'doityourself', 'community_centre', 'recycling_glass',
       'restaurant', 'car_dealership', 'garden_centre', 'theatre',
       'museum', 'recycling', 'castle', 'attraction', 'market_place',
       'water_tower', 'tower', 'water_works', 'archaeological',
       'nightclub', 'fire_station', 'dog_park', 'arts_centre',
       'beverages', 'town_hall', 'embassy', 'toilet', 'observation_tower',
       'car_wash', 'chalet', 'hostel', 'guesthouse', 'nursing_home',
       'bench', 'cinema', 'police', 'shelter', 'greengrocer',
       'bicycle_shop', 'florist', 'veterinary', 'swimming_pool',
       'recycling_clothes', 'pub', 'courthouse', 'r

In [257]:
def merge_fclasses(fclass):
    if fclass in ['shelter','chalet','hostel','guesthouse']:
        return 'LUP_Resi'
    elif fclass in ['hotel','restaurant','bicycle_shop','fast_food','cafe','bar','supermarket','convenience','department_store',
                    'mall','pub','motel','outdoor_shop','bookshop','clothes','optician','car_rental','nightclub','bakery',
                    'laundry','hairdresser','beverages','florist','travel_agent','biergarten','furniture_shop','stationery',
                    'recycling_clothes','bicycle_rental','food_court','beauty_shop','doityourself','video_shop','sports_shop',
                    'mobile_phone_shop','shoe_shop','jeweller','toy_shop','car_dealership','gift_shop','greengrocer','butcher',
                    'computer_shop','vending_parking','vending_machine','vending_any','car_wash','market_place','recycling_metal',
                    'recycling_paper','car_sharing','kiosk','recycling_glass','garden_centre','recycling']:
        return 'LUP_Comm'
    elif fclass in ['park','theatre','cinema','playground','sports_centre','attraction',
                    'picnic_site','zoo','fountain','viewpoint','artwork','archaeological','ruins','arts_centre',
                    'drinking_water','theme_park','pitch','stadium', 'ice_rink','golf_course','camp_site','dog_park',
                    'observation_tower', 'bench','swimming_pool','track','fort','caravan_site']:
        return 'LUP_Recre'
    elif fclass in ['university','school','college','kindergarten','riding_school']:
        return 'LUP_Edu'
    elif fclass in ['community_centre','bank','atm','post_office','museum','monument','memorial,''toilet','library',
                    'embassy','police','post_box','fire_station','courthouse','town_hall', 'tourist_info',
                    'wastewater_plant','prison','water_tower','water_works','tower','castle']:
        return 'LUP_Public'
    elif fclass in ['hospital', 'clinic','nursing_home','veterinary','pharmacy']:
        return 'LUP_Health'
    else:
        return 'LUP_Other'
    
POI_land_use['major_clas'] = POI_land_use['fclass'].apply(merge_fclasses)
print('length of total POI_land_use = %d ' % len(POI_land_use) )
POI_land_use.head()

length of total POI_land_use = 5990 


Unnamed: 0,osm_id,code,fclass,name,geometry,major_clas
0,4768432,2082,school,Oscar Fredriksskolan,"POLYGON ((11.94482 57.69702, 11.94487 57.69710...",LUP_Edu
1,4769341,2204,park,Vasaparken,"POLYGON ((11.96993 57.69853, 11.96995 57.69854...",LUP_Recre
2,4770768,2204,park,Hagaparken,"POLYGON ((11.96090 57.69820, 11.96091 57.69827...",LUP_Recre
3,5022239,2743,zoo,Djurgårdarna,"POLYGON ((11.93809 57.68505, 11.93810 57.68509...",LUP_Recre
4,5106265,2083,kindergarten,Djurgårdsgatan 29 förskola,"POLYGON ((11.93140 57.69635, 11.93177 57.69646...",LUP_Edu


In [258]:
intersected_land_use = gpd.overlay(POI_land_use, accessibility_data, how='intersection')
print('length of total intersected POI_land_use = %d ' % len(intersected_land_use) )
intersected_land_use.head()

length of total intersected POI_land_use = 7559 


Unnamed: 0,osm_id,code,fclass,name,major_clas,hex_id,accessibility,area,geometry
0,4768432,2082,school,Oscar Fredriksskolan,LUP_Edu,891f2506cabffff,65903,0.081641,"POLYGON ((11.94650 57.69709, 11.94624 57.69641..."
1,4768432,2082,school,Oscar Fredriksskolan,LUP_Edu,891f2506cbbffff,48641,0.081635,"POLYGON ((11.94487 57.69710, 11.94489 57.69711..."
2,4769341,2204,park,Vasaparken,LUP_Recre,891f2506dc7ffff,35328,0.081646,"POLYGON ((11.97054 57.69739, 11.97047 57.69747..."
3,4769341,2204,park,Vasaparken,LUP_Recre,891f2506d8bffff,33452,0.081641,"POLYGON ((11.96995 57.69854, 11.96997 57.69853..."
4,4769341,2204,park,Vasaparken,LUP_Recre,891f2506d13ffff,43516,0.081647,"POLYGON ((11.97293 57.69855, 11.97426 57.69762..."


In [259]:
intersected_land_use['count']=1
result_poi_land_use = intersected_land_use.groupby(['hex_id', 'major_clas']).agg({'count': 'sum'})
result_poi_land_use = result_poi_land_use.reset_index(drop = False)
result_poi_land_use.head()

Unnamed: 0,hex_id,major_clas,count
0,891f2504003ffff,LUP_Recre,1
1,891f2504007ffff,LUP_Recre,4
2,891f250400bffff,LUP_Edu,5
3,891f250400bffff,LUP_Recre,4
4,891f250400fffff,LUP_Recre,2


In [260]:
result_poi_land_use['LUP_Comm'] = result_poi_land_use['count'][result_poi_land_use['major_clas'] == 'LUP_Comm']
result_poi_land_use['LUP_Other'] = result_poi_land_use['count'][result_poi_land_use['major_clas'] == 'LUP_Other']
result_poi_land_use['LUP_Recre'] = result_poi_land_use['count'][result_poi_land_use['major_clas'] == 'LUP_Recre']
result_poi_land_use['LUP_Public'] = result_poi_land_use['count'][result_poi_land_use['major_clas'] == 'LUP_Public']
result_poi_land_use['LUP_Resi'] = result_poi_land_use['count'][result_poi_land_use['major_clas'] == 'LUP_Resi']
result_poi_land_use['LUP_Edu'] = result_poi_land_use['count'][result_poi_land_use['major_clas'] == 'LUP_Edu']
result_poi_land_use['LUP_Health'] = result_poi_land_use['count'][result_poi_land_use['major_clas'] == 'LUP_Health']

result_poi_land_use = result_poi_land_use.drop(columns=['major_clas', 'count'])
result_poi_land_use

Unnamed: 0,hex_id,LUP_Comm,LUP_Other,LUP_Recre,LUP_Public,LUP_Resi,LUP_Edu,LUP_Health
0,891f2504003ffff,,,1.0,,,,
1,891f2504007ffff,,,4.0,,,,
2,891f250400bffff,,,,,,5.0,
3,891f250400bffff,,,4.0,,,,
4,891f250400fffff,,,2.0,,,,
...,...,...,...,...,...,...,...,...
3878,891f25b9bb7ffff,,,1.0,,,,
3879,891f25b9bc3ffff,,,2.0,,,,
3880,891f25b9bd3ffff,,,1.0,,,,
3881,891f25b9d27ffff,,,1.0,,,,


In [261]:
result_poi_land_use = result_poi_land_use.groupby(['hex_id']).sum()
result_poi_land_use

Unnamed: 0_level_0,LUP_Comm,LUP_Other,LUP_Recre,LUP_Public,LUP_Resi,LUP_Edu,LUP_Health
hex_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
891f2504003ffff,0.0,0.0,1.0,0.0,0.0,0.0,0.0
891f2504007ffff,0.0,0.0,4.0,0.0,0.0,0.0,0.0
891f250400bffff,0.0,0.0,4.0,0.0,0.0,5.0,0.0
891f250400fffff,0.0,0.0,2.0,0.0,0.0,0.0,0.0
891f2504013ffff,0.0,0.0,3.0,0.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...
891f25b9bb7ffff,0.0,0.0,1.0,0.0,0.0,0.0,0.0
891f25b9bc3ffff,0.0,0.0,2.0,0.0,0.0,0.0,0.0
891f25b9bd3ffff,0.0,0.0,1.0,0.0,0.0,0.0,0.0
891f25b9d27ffff,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [262]:
result_poi_land_use.sum()

LUP_Comm       561.0
LUP_Other      124.0
LUP_Recre     4922.0
LUP_Public     268.0
LUP_Resi       591.0
LUP_Edu       1044.0
LUP_Health      49.0
dtype: float64

# Land use Point POIs

In [263]:
POI_points = gpd.read_file('POI/sweden-OSMshp/gis_osm_pois_free_1.shp') # whole sweden
POI_points.head()

Unnamed: 0,osm_id,code,fclass,name,geometry
0,299809,2701,tourist_info,,POINT (15.74385 58.71026)
1,599753,2733,archaeological,Avrättningsplats,POINT (16.61785 59.19575)
2,1314052,2081,university,Konstfack,POINT (17.99421 59.29977)
3,6958798,2302,fast_food,Backen,POINT (17.96329 59.30533)
4,8089809,2030,recycling,,POINT (17.93140 59.29665)


In [264]:
intersected_POI_points = gpd.overlay(POI_points, accessibility_data, how='intersection')
print('length of total intersected POI points = %d ' % len(intersected_POI_points) )
intersected_POI_points.head()

length of total intersected POI points = 11994 


Unnamed: 0,osm_id,code,fclass,name,hex_id,accessibility,area,geometry
0,30153806,2742,viewpoint,,891f25a92b7ffff,63053,0.081589,POINT (11.93178 57.71445)
1,30480018,2301,restaurant,Tabeilu Sushi,891f2506dc7ffff,35328,0.081646,POINT (11.96807 57.69620)
2,30480028,2301,restaurant,Japan House,891f2506dc7ffff,35328,0.081646,POINT (11.96606 57.69756)
3,30482977,2401,hotel,Gothia Towers,891f2506d33ffff,38091,0.081656,POINT (11.98905 57.69748)
4,30483077,2401,hotel,Radisson Blu Scandinavia,891f25a932fffff,39387,0.081626,POINT (11.97281 57.70704)


In [229]:
intersected_POI_points['fclass'].unique()

array(['viewpoint', 'restaurant', 'hotel', 'observation_tower',
       'recycling', 'post_box', 'recycling_glass', 'comms_tower',
       'bicycle_shop', 'clinic', 'fast_food', 'convenience', 'kiosk',
       'supermarket', 'atm', 'bank', 'artwork', 'playground', 'school',
       'pub', 'recycling_paper', 'cinema', 'cafe', 'police', 'zoo',
       'tourist_info', 'bench', 'toilet', 'pharmacy', 'archaeological',
       'attraction', 'sports_centre', 'picnic_site', 'vending_parking',
       'shelter', 'library', 'clothes', 'post_office', 'waste_basket',
       'tower', 'beverages', 'bakery', 'theatre', 'museum', 'memorial',
       'kindergarten', 'florist', 'computer_shop', 'sports_shop',
       'hostel', 'fountain', 'university', 'theme_park', 'drinking_water',
       'lighthouse', 'dentist', 'recycling_metal', 'swimming_pool',
       'greengrocer', 'bicycle_rental', 'fire_station', 'shoe_shop',
       'car_rental', 'department_store', 'doityourself',
       'mobile_phone_shop', 'hairdress

In [265]:
def merge_fclasses(fclass):
    if fclass in ['shelter','chalet','hostel','guesthouse']:
        return 'LUP_Resi'
    elif fclass in ['hotel','restaurant','bicycle_shop','fast_food','cafe','bar','supermarket','convenience','department_store',
                    'mall','pub','motel','outdoor_shop','bookshop','clothes','optician','car_rental','nightclub','bakery',
                    'laundry','hairdresser','beverages','florist','travel_agent','biergarten','furniture_shop','stationery',
                    'recycling_clothes','bicycle_rental','food_court','beauty_shop','doityourself','video_shop','sports_shop',
                    'mobile_phone_shop','shoe_shop','jeweller','toy_shop','car_dealership','gift_shop','greengrocer','butcher',
                    'computer_shop','vending_parking','vending_machine','vending_any','car_wash','market_place','recycling_metal',
                    'recycling_paper','car_sharing','kiosk','recycling_glass','garden_centre','recycling','newsagent','comms_tower']:
        return 'LUP_Comm'
    elif fclass in ['park','theatre','cinema','playground','sports_centre','attraction',
                    'picnic_site','zoo','fountain','viewpoint','artwork','archaeological','ruins','arts_centre',
                    'drinking_water','theme_park','pitch','stadium', 'ice_rink','golf_course','camp_site','dog_park',
                    'observation_tower', 'bench','swimming_pool','track','fort','caravan_site','hunting_stand']:
        return 'LUP_Recre'
    elif fclass in ['university','school','college','kindergarten','riding_school']:
        return 'LUP_Edu'
    elif fclass in ['community_centre','bank','atm','post_office','museum','monument','memorial,''toilet','library',
                    'embassy','police','post_box','fire_station','courthouse','town_hall', 'tourist_info', 'water_well',
                    'wastewater_plant','prison','water_tower','water_works','tower','castle','waste_basket','lighthouse']:
        return 'LUP_Public'
    elif fclass in ['hospital', 'clinic','nursing_home','veterinary','pharmacy','dentist','doctors','chemist']:
        return 'LUP_Health'
    else:
        return 'LUP_Other'

intersected_POI_points['major_clas'] = intersected_POI_points['fclass'].apply(merge_fclasses)
print('length of total POI_land_use = %d ' % len(intersected_POI_points) )
intersected_POI_points.head()

length of total POI_land_use = 11994 


Unnamed: 0,osm_id,code,fclass,name,hex_id,accessibility,area,geometry,major_clas
0,30153806,2742,viewpoint,,891f25a92b7ffff,63053,0.081589,POINT (11.93178 57.71445),LUP_Recre
1,30480018,2301,restaurant,Tabeilu Sushi,891f2506dc7ffff,35328,0.081646,POINT (11.96807 57.69620),LUP_Comm
2,30480028,2301,restaurant,Japan House,891f2506dc7ffff,35328,0.081646,POINT (11.96606 57.69756),LUP_Comm
3,30482977,2401,hotel,Gothia Towers,891f2506d33ffff,38091,0.081656,POINT (11.98905 57.69748),LUP_Comm
4,30483077,2401,hotel,Radisson Blu Scandinavia,891f25a932fffff,39387,0.081626,POINT (11.97281 57.70704),LUP_Comm


In [266]:
intersected_POI_points['count']=1
result_poi_points = intersected_POI_points.groupby(['hex_id', 'major_clas']).agg({'count': 'sum'})
result_poi_points = result_poi_points.reset_index(drop = False)
result_poi_points.head()

Unnamed: 0,hex_id,major_clas,count
0,891f250400bffff,LUP_Comm,1
1,891f250400bffff,LUP_Recre,1
2,891f250400bffff,LUP_Resi,2
3,891f2504013ffff,LUP_Resi,1
4,891f250401bffff,LUP_Resi,1


In [267]:
result_poi_points['LUP_Comm'] = result_poi_points['count'][result_poi_points['major_clas'] == 'LUP_Comm']
result_poi_points['LUP_Other'] = result_poi_points['count'][result_poi_points['major_clas'] == 'LUP_Other']
result_poi_points['LUP_Recre'] = result_poi_points['count'][result_poi_points['major_clas'] == 'LUP_Recre']
result_poi_points['LUP_Public'] = result_poi_points['count'][result_poi_points['major_clas'] == 'LUP_Public']
result_poi_points['LUP_Resi'] = result_poi_points['count'][result_poi_points['major_clas'] == 'LUP_Resi']
result_poi_points['LUP_Edu'] = result_poi_points['count'][result_poi_points['major_clas'] == 'LUP_Edu']
result_poi_points['LUP_Health'] = result_poi_points['count'][result_poi_points['major_clas'] == 'LUP_Health']

result_poi_points = result_poi_points.drop(columns=['major_clas', 'count'])
result_poi_points

Unnamed: 0,hex_id,LUP_Comm,LUP_Other,LUP_Recre,LUP_Public,LUP_Resi,LUP_Edu,LUP_Health
0,891f250400bffff,1.0,,,,,,
1,891f250400bffff,,,1.0,,,,
2,891f250400bffff,,,,,2.0,,
3,891f2504013ffff,,,,,1.0,,
4,891f250401bffff,,,,,1.0,,
...,...,...,...,...,...,...,...,...
3348,891f25b9babffff,,,,,1.0,,
3349,891f25b9bdbffff,,,,1.0,,,
3350,891f25b9d17ffff,,,,1.0,,,
3351,891f25b9d2bffff,,,,,1.0,,


In [268]:
result_poi_points = result_poi_points.groupby(['hex_id']).sum()
result_poi_points

Unnamed: 0_level_0,LUP_Comm,LUP_Other,LUP_Recre,LUP_Public,LUP_Resi,LUP_Edu,LUP_Health
hex_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
891f250400bffff,1.0,0.0,1.0,0.0,2.0,0.0,0.0
891f2504013ffff,0.0,0.0,0.0,0.0,1.0,0.0,0.0
891f250401bffff,0.0,0.0,0.0,0.0,1.0,0.0,0.0
891f2504023ffff,0.0,0.0,3.0,0.0,0.0,0.0,3.0
891f2504027ffff,0.0,0.0,0.0,0.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...
891f25b9babffff,1.0,0.0,0.0,0.0,1.0,0.0,0.0
891f25b9bdbffff,0.0,0.0,0.0,1.0,0.0,0.0,0.0
891f25b9d17ffff,0.0,0.0,0.0,1.0,0.0,0.0,0.0
891f25b9d2bffff,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [269]:
result_poi_points.sum()

LUP_Comm      3939.0
LUP_Other      178.0
LUP_Recre     4085.0
LUP_Public    2840.0
LUP_Resi       552.0
LUP_Edu        137.0
LUP_Health     263.0
dtype: float64

# Merge the POIs from building and land use

In [270]:
result_poi = result_poi_building.add(result_poi_land_use, fill_value=0)
result_poi = result_poi.add(result_poi_points, fill_value=0)

In [271]:
result_poi.sum()

LUP_Comm        5974.0
LUP_Other     105972.0
LUP_Recre       9364.0
LUP_Public      3401.0
LUP_Resi       29832.0
LUP_Edu         2189.0
LUP_Health       410.0
dtype: float64

calculate the TFIDF ratio of POIs

In [272]:
mix=result_poi.loc[:,['LUP_Resi','LUP_Comm','LUP_Edu','LUP_Recre','LUP_Public','LUP_Health','LUP_Other']]
weight = list(mix.sum(axis=0).sum()/mix.sum(axis=0))
c = np.log(weight)
# print(c)
mix.iloc[0:len(mix),0] = mix.iloc[0:len(mix),0]*c[0]
mix.iloc[0:len(mix),1] = mix.iloc[0:len(mix),1]*c[1]
mix.iloc[0:len(mix),2] = mix.iloc[0:len(mix),2]*c[2]
mix.iloc[0:len(mix),3] = mix.iloc[0:len(mix),3]*c[3]
mix.iloc[0:len(mix),4] = mix.iloc[0:len(mix),4]*c[4]
mix.iloc[0:len(mix),5] = mix.iloc[0:len(mix),5]*c[5]
mix.iloc[0:len(mix),6] = mix.iloc[0:len(mix),6]*c[6]
mix['all'] = mix.sum(axis=1)
mix.loc[:,'LUP_Resi':'LUP_Other'] = mix.loc[:,'LUP_Resi':'LUP_Other'].div(mix['all'], axis=0)+0.00001
mix = mix.astype(float)
mix.fillna(0, inplace=True)
mix.describe(include='all')
mix2 = mix
def func (x):
    y = x*np.log(x)
    return y
mix2 = mix2.loc[:,'LUP_Resi':'LUP_Other'].applymap(func)/(-np.log(5))
mix2['entropy'] = mix2.loc[:,'LUP_Resi':'LUP_Other'].sum(axis=1)
mix['entropy'] = mix2['entropy']
mix['Resi_lur'] = mix['LUP_Resi']
mix['Comm_lur'] = mix['LUP_Comm']
mix['Edu_lur'] = mix['LUP_Edu']
mix['Recre_lur'] = mix['LUP_Recre']
mix['Public_lur'] = mix['LUP_Public']
mix['Health_lur'] = mix['LUP_Health']
mix['Other_lur'] = mix['LUP_Other']
mix = mix.drop(['LUP_Resi', 'LUP_Comm', 'LUP_Edu', 'LUP_Recre', 'LUP_Public','LUP_Health', 'LUP_Other','all'], axis=1)
mix = mix.reset_index(drop=False)
mix

  mix2 = mix2.loc[:,'LUP_Resi':'LUP_Other'].applymap(func)/(-np.log(5))


Unnamed: 0,hex_id,entropy,Resi_lur,Comm_lur,Edu_lur,Recre_lur,Public_lur,Health_lur,Other_lur
0,891f2504003ffff,0.475234,0.231181,0.000010,0.000010,0.065407,0.00001,0.00001,0.703443
1,891f2504007ffff,0.669005,0.356722,0.000010,0.000010,0.403655,0.00001,0.00001,0.239653
2,891f250400bffff,0.815334,0.053578,0.052717,0.482249,0.227322,0.00001,0.00001,0.184183
3,891f250400fffff,0.625095,0.543700,0.000010,0.000010,0.230719,0.00001,0.00001,0.225610
4,891f2504013ffff,0.727980,0.482809,0.000010,0.068999,0.136591,0.00001,0.00001,0.311641
...,...,...,...,...,...,...,...,...,...
5456,891f25b9dabffff,0.000423,0.000010,0.000010,0.000010,0.000010,0.00001,0.00001,1.000010
5457,891f25b9db3ffff,0.000423,0.000010,0.000010,0.000010,0.000010,0.00001,0.00001,1.000010
5458,891f25b9db7ffff,0.000423,0.000010,0.000010,0.000010,0.000010,0.00001,0.00001,1.000010
5459,891f25b9dbbffff,0.000423,0.000010,0.000010,0.000010,0.000010,0.00001,0.00001,1.000010


In [273]:
# Merged LUP counts and LUR into main data
data = accessibility_data.reset_index(drop=True)
# data = data.merge(result, on='hex_id', how='left')
data = data.merge(mix, on='hex_id', how='left')
print(len(data))
data.head()

9772


Unnamed: 0,hex_id,accessibility,geometry,area,entropy,Resi_lur,Comm_lur,Edu_lur,Recre_lur,Public_lur,Health_lur,Other_lur
0,891f25ad0dbffff,119,"POLYGON ((12.18021 57.74436, 12.18286 57.74529...",0.081623,0.000423,1e-05,1e-05,1e-05,1.00001,1e-05,1e-05,1e-05
1,891f2507103ffff,0,"POLYGON ((11.82421 57.65409, 11.82685 57.65503...",0.081692,,,,,,,,
2,891f253aa8bffff,0,"POLYGON ((12.07923 57.56624, 12.08188 57.56718...",0.082003,0.303958,0.808345,1e-05,1e-05,1e-05,1e-05,1e-05,0.191675
3,891f2506a8bffff,38454,"POLYGON ((11.93287 57.66635, 11.93551 57.66729...",0.081708,0.822373,0.309163,0.304195,1e-05,1e-05,1e-05,0.276717,0.109965
4,891f2532257ffff,165,"POLYGON ((12.10123 57.68717, 12.10388 57.68811...",0.081726,0.000423,1e-05,1e-05,1e-05,1.00001,1e-05,1e-05,1e-05


# Road Density

In [46]:
POI_road = gpd.read_file('POI/Target_area/POI_road.shp')

print('length of total POI_road = %d ' % len(POI_road) )
POI_road.head()

length of total POI_road = 107853 


Unnamed: 0,osm_id,code,fclass,name,ref,oneway,maxspeed,layer,bridge,tunnel,geometry
0,3678945,5122,residential,Öjersjövägen,,B,30,0,F,F,"LINESTRING (12.15126 57.70713, 12.15103 57.707..."
1,3846609,5131,motorway_link,Nya Öjersjövägen,,F,60,0,F,F,"LINESTRING (12.09217 57.67335, 12.09196 57.673..."
2,4040302,5111,motorway,Norgevägen,E 6,F,80,0,F,F,"LINESTRING (11.97419 57.86391, 11.97465 57.863..."
3,4040303,5111,motorway,Norgevägen,E 6,F,100,0,F,F,"LINESTRING (12.00448 57.84406, 12.00477 57.843..."
4,4040436,5131,motorway_link,,,F,80,0,F,F,"LINESTRING (12.00655 57.79813, 12.00625 57.797..."


In [47]:
intersected_lines = gpd.overlay(POI_road, accessibility_data, how='intersection')
print('length of total intersected_lines = %d ' % len(intersected_lines) )
intersected_lines.head()

length of total intersected_lines = 137467 


Unnamed: 0,osm_id,code,fclass,name,ref,oneway,maxspeed,layer,bridge,tunnel,hex_id,accessibility,area,geometry
0,3678945,5122,residential,Öjersjövägen,,B,30,0,F,F,891f25321cbffff,3862,0.081695,"LINESTRING (12.15126 57.70713, 12.15103 57.707..."
1,3846609,5131,motorway_link,Nya Öjersjövägen,,F,60,0,F,F,891f2533113ffff,32393,0.08175,"LINESTRING (12.09217 57.67335, 12.09196 57.673..."
2,4040302,5111,motorway,Norgevägen,E 6,F,80,0,F,F,891f25b8d6fffff,0,0.081267,"LINESTRING (11.98027 57.86099, 11.98089 57.86080)"
3,4040302,5111,motorway,Norgevägen,E 6,F,80,0,F,F,891f25b8d63ffff,0,0.081261,"LINESTRING (11.97625 57.86267, 11.97657 57.862..."
4,4040302,5111,motorway,Norgevägen,E 6,F,80,0,F,F,891f25b8d73ffff,0,0.081255,"LINESTRING (11.97419 57.86391, 11.97465 57.863..."


In [49]:
intersected_lines = intersected_lines.to_crs(3006)
intersected_lines['length'] = intersected_lines.geometry.length / (10**3) # in km
intersected_lines = intersected_lines.to_crs(4326)
intersected_lines['road_density'] = intersected_lines['length'] / intersected_lines['area'] # in km / km^2
intersected_lines.head()

Unnamed: 0,osm_id,code,fclass,name,ref,oneway,maxspeed,layer,bridge,tunnel,hex_id,accessibility,area,geometry,length,road_density
0,3678945,5122,residential,Öjersjövägen,,B,30,0,F,F,891f25321cbffff,3862,0.081695,"LINESTRING (12.15126 57.70713, 12.15103 57.707...",0.111932,1.370118
1,3846609,5131,motorway_link,Nya Öjersjövägen,,F,60,0,F,F,891f2533113ffff,32393,0.08175,"LINESTRING (12.09217 57.67335, 12.09196 57.673...",0.051781,0.633406
2,4040302,5111,motorway,Norgevägen,E 6,F,80,0,F,F,891f25b8d6fffff,0,0.081267,"LINESTRING (11.98027 57.86099, 11.98089 57.86080)",0.042638,0.524664
3,4040302,5111,motorway,Norgevägen,E 6,F,80,0,F,F,891f25b8d63ffff,0,0.081261,"LINESTRING (11.97625 57.86267, 11.97657 57.862...",0.303374,3.733332
4,4040302,5111,motorway,Norgevägen,E 6,F,80,0,F,F,891f25b8d73ffff,0,0.081255,"LINESTRING (11.97419 57.86391, 11.97465 57.863...",0.185343,2.281006


In [50]:
def merge_fclasses(fclass):
    if fclass in ['primary', 'motorway', 'trunk']:
        return 'Primary'
    elif fclass in ['secondary', 'primary_link', 'trunk_link', 'secondary_link', 'motorway_link']:
        return 'Secondary'
    elif fclass in ['tertiary', 'tertiary_link', 'residential', 'living_street', 'service']:
        return 'Tertiary'
    elif fclass in ['cycleway', 'bridleway', 'track']:
        return 'Cycleway'
    elif fclass in ['pedestrian', 'footway', 'path', 'steps', 'track_grade2', 'track_grade4', 'track_grade1', 
                    'track_grade3','unclassified','unknown', 'track_grade5']:  # add track_grade5
        return 'Pedestrian'
    else:
        return 'Other'   # May include nothing

intersected_lines['main_fclass'] = intersected_lines['fclass'].apply(merge_fclasses)
intersected_lines.head()

Unnamed: 0,osm_id,code,fclass,name,ref,oneway,maxspeed,layer,bridge,tunnel,hex_id,accessibility,area,geometry,length,road_density,main_fclass
0,3678945,5122,residential,Öjersjövägen,,B,30,0,F,F,891f25321cbffff,3862,0.081695,"LINESTRING (12.15126 57.70713, 12.15103 57.707...",0.111932,1.370118,Tertiary
1,3846609,5131,motorway_link,Nya Öjersjövägen,,F,60,0,F,F,891f2533113ffff,32393,0.08175,"LINESTRING (12.09217 57.67335, 12.09196 57.673...",0.051781,0.633406,Secondary
2,4040302,5111,motorway,Norgevägen,E 6,F,80,0,F,F,891f25b8d6fffff,0,0.081267,"LINESTRING (11.98027 57.86099, 11.98089 57.86080)",0.042638,0.524664,Primary
3,4040302,5111,motorway,Norgevägen,E 6,F,80,0,F,F,891f25b8d63ffff,0,0.081261,"LINESTRING (11.97625 57.86267, 11.97657 57.862...",0.303374,3.733332,Primary
4,4040302,5111,motorway,Norgevägen,E 6,F,80,0,F,F,891f25b8d73ffff,0,0.081255,"LINESTRING (11.97419 57.86391, 11.97465 57.863...",0.185343,2.281006,Primary


In [51]:
result_road = intersected_lines.groupby(['hex_id', 'main_fclass']).agg({'road_density': 'sum'})
result_road = result_road.reset_index(drop = False)
result_road.head()

Unnamed: 0,hex_id,main_fclass,road_density
0,891f2504003ffff,Cycleway,3.78828
1,891f2504003ffff,Pedestrian,14.119589
2,891f2504003ffff,Tertiary,12.179672
3,891f2504007ffff,Cycleway,0.022056
4,891f2504007ffff,Pedestrian,12.970331


In [52]:
result_road['Rd_Pri'] = result_road['road_density'][result_road['main_fclass'] == 'Primary']
result_road['Rd_Sec'] = result_road['road_density'][result_road['main_fclass'] == 'Secondary']
result_road['Rd_Ter'] = result_road['road_density'][result_road['main_fclass'] == 'Tertiary']
result_road['Rd_Cyc'] = result_road['road_density'][result_road['main_fclass'] == 'Cycleway']
result_road['Rd_Pedes'] = result_road['road_density'][result_road['main_fclass'] == 'Pedestrian']
result_road['Rd_Other'] = result_road['road_density'][result_road['main_fclass'] == 'Other']
result_road = result_road.drop(columns=['main_fclass', 'road_density'])
result_road = result_road.groupby(['hex_id']).sum().reset_index()
result_road

Unnamed: 0,hex_id,Rd_Pri,Rd_Sec,Rd_Ter,Rd_Cyc,Rd_Pedes,Rd_Other
0,891f2504003ffff,0.0,0.0,12.179672,3.788280,14.119589,0.0
1,891f2504007ffff,0.0,0.0,5.572806,0.022056,12.970331,0.0
2,891f250400bffff,0.0,0.0,7.951512,9.493987,24.027859,0.0
3,891f250400fffff,0.0,0.0,2.048990,0.152064,19.465291,0.0
4,891f2504013ffff,0.0,0.0,14.143177,2.936698,16.697399,0.0
...,...,...,...,...,...,...,...
7808,891f25b9db3ffff,0.0,0.0,0.000000,2.239844,3.041589,0.0
7809,891f25b9db7ffff,0.0,0.0,1.713244,0.000000,7.740703,0.0
7810,891f25b9dbbffff,0.0,0.0,0.328145,1.525539,0.000000,0.0
7811,891f25b9e2fffff,0.0,0.0,1.291939,0.000000,0.000000,0.0


In [274]:
data = data.merge(result_road, on='hex_id', how='left')
data.head()

Unnamed: 0,hex_id,accessibility,geometry,area,entropy,Resi_lur,Comm_lur,Edu_lur,Recre_lur,Public_lur,Health_lur,Other_lur,Rd_Pri,Rd_Sec,Rd_Ter,Rd_Cyc,Rd_Pedes,Rd_Other
0,891f25ad0dbffff,119,"POLYGON ((12.18021 57.74436, 12.18286 57.74529...",0.081623,0.000423,1e-05,1e-05,1e-05,1.00001,1e-05,1e-05,1e-05,0.0,0.0,0.030864,0.0,9.195498,0.0
1,891f2507103ffff,0,"POLYGON ((11.82421 57.65409, 11.82685 57.65503...",0.081692,,,,,,,,,,,,,,
2,891f253aa8bffff,0,"POLYGON ((12.07923 57.56624, 12.08188 57.56718...",0.082003,0.303958,0.808345,1e-05,1e-05,1e-05,1e-05,1e-05,0.191675,0.0,3.482623,8.078311,0.0,2.334086,0.0
3,891f2506a8bffff,38454,"POLYGON ((11.93287 57.66635, 11.93551 57.66729...",0.081708,0.822373,0.309163,0.304195,1e-05,1e-05,1e-05,0.276717,0.109965,6.23746,0.617976,22.194071,9.579723,14.658543,0.0
4,891f2532257ffff,165,"POLYGON ((12.10123 57.68717, 12.10388 57.68811...",0.081726,0.000423,1e-05,1e-05,1e-05,1.00001,1e-05,1e-05,1e-05,0.0,0.0,3.009512,2.845747,6.350286,0.0


# Transit Infrastructure

In [275]:
POI_tran_infra = gpd.read_file('POI/sweden-OSMshp/gis_osm_traffic_free_1.shp') # whole sweden

def classify_tranInfra(fclass):
    if fclass in ['traffic_signals', 'motorway_junction', 'slipway','weir','marina','lock_gate','stop']:
        return 'T_Hinder'
    elif fclass in ['crossing', 'turning_circle', 'mini_roundabout', 'parking_underground','parking_bicycle']:
        return 'T_Enable'
    else:
        return 'T_Other'
    
POI_tran_infra['major_class'] = POI_tran_infra['fclass'].apply(classify_tranInfra)
POI_tran_infra = POI_tran_infra.drop(columns=['osm_id','code','name'])
POI_tran_infra.head()

Unnamed: 0,fclass,geometry,major_class
0,turning_circle,POINT (15.57997 58.42017),T_Enable
1,motorway_junction,POINT (15.53515 58.42810),T_Hinder
2,traffic_signals,POINT (15.62221 58.42224),T_Hinder
3,crossing,POINT (15.62760 58.41486),T_Enable
4,traffic_signals,POINT (15.63091 58.41061),T_Hinder


In [276]:
intersected_tran_infra = gpd.overlay(POI_tran_infra, accessibility_data, how='intersection')
print('length of total intersected POI_tran_infra = %d ' % len(intersected_tran_infra) )
intersected_tran_infra.head()

length of total intersected POI_tran_infra = 99308 


Unnamed: 0,fclass,major_class,hex_id,accessibility,area,geometry
0,motorway_junction,T_Hinder,891f25b8d6fffff,0,0.081267,POINT (11.98467 57.85966)
1,motorway_junction,T_Hinder,891f25aa387ffff,192,0.081421,POINT (12.00655 57.79813)
2,motorway_junction,T_Hinder,891f25a9e83ffff,45138,0.081604,POINT (11.99001 57.71982)
3,motorway_junction,T_Hinder,891f25a9e03ffff,35886,0.08162,POINT (11.99553 57.71256)
4,motorway_junction,T_Hinder,891f25a9e73ffff,47852,0.081631,POINT (11.99590 57.70739)


In [277]:
intersected_tran_infra['count']=1
result_tran_infra = intersected_tran_infra.groupby(['hex_id', 'major_class']).agg({'count': 'sum'})
result_tran_infra = result_tran_infra.reset_index(drop = False)
result_tran_infra.head()

Unnamed: 0,hex_id,major_class,count
0,891f2504003ffff,T_Other,47
1,891f2504007ffff,T_Other,31
2,891f250400bffff,T_Enable,2
3,891f250400bffff,T_Other,52
4,891f250400fffff,T_Other,31


In [278]:
result_tran_infra['T_Enable_POI'] = result_tran_infra['count'][result_tran_infra['major_class'] == 'T_Enable']
result_tran_infra['T_Hinder_POI'] = result_tran_infra['count'][result_tran_infra['major_class'] == 'T_Hinder']
result_tran_infra['T_Other_POI'] = result_tran_infra['count'][result_tran_infra['major_class'] == 'T_Other']

result_tran_infra = result_tran_infra.drop(columns=['major_class', 'count'])
result_tran_infra = result_tran_infra.groupby(['hex_id']).sum()
result_tran_infra

Unnamed: 0_level_0,T_Enable_POI,T_Hinder_POI,T_Other_POI
hex_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
891f2504003ffff,0.0,0.0,47.0
891f2504007ffff,0.0,0.0,31.0
891f250400bffff,2.0,0.0,52.0
891f250400fffff,0.0,0.0,31.0
891f2504013ffff,0.0,0.0,60.0
...,...,...,...
891f25b9d6fffff,0.0,0.0,7.0
891f25b9d73ffff,1.0,0.0,0.0
891f25b9d97ffff,1.0,0.0,0.0
891f25b9dbbffff,0.0,1.0,0.0


In [279]:
# TFIDF for transit
mix1=result_tran_infra.loc[:,['T_Enable_POI','T_Hinder_POI','T_Other_POI']]
weight = list(mix1.sum(axis=0).sum()/mix1.sum(axis=0))
c = np.log(weight)
# print(c)
mix1.iloc[0:len(mix1),0] = mix1.iloc[0:len(mix1),0]*c[0]
mix1.iloc[0:len(mix1),1] = mix1.iloc[0:len(mix1),1]*c[1]
mix1.iloc[0:len(mix1),2] = mix1.iloc[0:len(mix1),2]*c[2]

mix1['all'] = mix1.sum(axis=1)
mix1.loc[:,'T_Enable_POI':'T_Other_POI'] = mix1.loc[:,'T_Enable_POI':'T_Other_POI'].div(mix1['all'], axis=0)+0.00001
mix1 = mix1.astype(float)
mix1.fillna(0, inplace=True)
mix1.describe(include='all')
mix2 = mix1
def func (x):
    y = x*np.log(x)
    return y
mix2 = mix2.loc[:,'T_Enable_POI':'T_Other_POI'].applymap(func)/(-np.log(5))
mix2['Tentropy'] = mix2.loc[:,'LUP_Resi':'LUP_Other'].sum(axis=1)
mix1['Tentropy'] = mix2['Tentropy']
mix1['T_Enable_LUR'] = mix1['T_Enable_POI']
mix1['T_Hinder_LUR'] = mix1['T_Hinder_POI']
mix1['T_Other_LUR'] = mix1['T_Other_POI']

mix1 = mix1.drop(['T_Enable_POI','T_Hinder_POI','T_Other_POI','all'], axis=1)
mix1 = mix1.reset_index(drop=False)
mix1

  mix2 = mix2.loc[:,'T_Enable_POI':'T_Other_POI'].applymap(func)/(-np.log(5))


Unnamed: 0,hex_id,Tentropy,T_Enable_LUR,T_Hinder_LUR,T_Other_LUR
0,891f2504003ffff,0.0,0.000010,0.00001,1.000010
1,891f2504007ffff,0.0,0.000010,0.00001,1.000010
2,891f250400bffff,0.0,0.547116,0.00001,0.452904
3,891f250400fffff,0.0,0.000010,0.00001,1.000010
4,891f2504013ffff,0.0,0.000010,0.00001,1.000010
...,...,...,...,...,...
3521,891f25b9d6fffff,0.0,0.000010,0.00001,1.000010
3522,891f25b9d73ffff,0.0,1.000010,0.00001,0.000010
3523,891f25b9d97ffff,0.0,1.000010,0.00001,0.000010
3524,891f25b9dbbffff,0.0,0.000010,1.00001,0.000010


In [280]:
mix1 = mix1.drop(columns=['Tentropy'])
data = data.merge(mix1, on='hex_id', how='left')
data.head()

Unnamed: 0,hex_id,accessibility,geometry,area,entropy,Resi_lur,Comm_lur,Edu_lur,Recre_lur,Public_lur,...,Other_lur,Rd_Pri,Rd_Sec,Rd_Ter,Rd_Cyc,Rd_Pedes,Rd_Other,T_Enable_LUR,T_Hinder_LUR,T_Other_LUR
0,891f25ad0dbffff,119,"POLYGON ((12.18021 57.74436, 12.18286 57.74529...",0.081623,0.000423,1e-05,1e-05,1e-05,1.00001,1e-05,...,1e-05,0.0,0.0,0.030864,0.0,9.195498,0.0,,,
1,891f2507103ffff,0,"POLYGON ((11.82421 57.65409, 11.82685 57.65503...",0.081692,,,,,,,...,,,,,,,,,,
2,891f253aa8bffff,0,"POLYGON ((12.07923 57.56624, 12.08188 57.56718...",0.082003,0.303958,0.808345,1e-05,1e-05,1e-05,1e-05,...,0.191675,0.0,3.482623,8.078311,0.0,2.334086,0.0,,,
3,891f2506a8bffff,38454,"POLYGON ((11.93287 57.66635, 11.93551 57.66729...",0.081708,0.822373,0.309163,0.304195,1e-05,1e-05,1e-05,...,0.109965,6.23746,0.617976,22.194071,9.579723,14.658543,0.0,0.159733,0.275814,0.564483
4,891f2532257ffff,165,"POLYGON ((12.10123 57.68717, 12.10388 57.68811...",0.081726,0.000423,1e-05,1e-05,1e-05,1.00001,1e-05,...,1e-05,0.0,0.0,3.009512,2.845747,6.350286,0.0,,,


# Distance to Transit (number of PT stops)

In [67]:
POI_transport = gpd.read_file('POI/sweden-OSMshp/gis_osm_transport_free_1.shp') # whole sweden
POI_transport.head()

Unnamed: 0,osm_id,code,fclass,name,geometry
0,81048,5621,bus_stop,Anders Ljungstedts gymnasium,POINT (15.63776 58.41508)
1,81062,5621,bus_stop,Hagalundsvägen,POINT (15.65425 58.41803)
2,81112,5621,bus_stop,Majelden,POINT (15.63883 58.39723)
3,81138,5621,bus_stop,Middagsgatan,POINT (15.66019 58.37426)
4,81267,5621,bus_stop,Isberget,POINT (15.56639 58.38982)


In [68]:
intersected_transport = gpd.overlay(POI_transport, accessibility_data, how='intersection')
print('length of total intersected POI_transport = %d ' % len(intersected_transport) )
intersected_transport.head()

length of total intersected POI_transport = 2526 


Unnamed: 0,osm_id,code,fclass,name,hex_id,accessibility,area,geometry
0,490263,5621,bus_stop,Häradsvägen,891f25a8b67ffff,2755,0.081633,POINT (12.13390 57.73237)
1,490265,5621,bus_stop,Stenliden,891f25a8b6fffff,2879,0.081637,POINT (12.13332 57.73004)
2,513765,5621,bus_stop,Nils Henrikssons väg,891f25a8a37ffff,9526,0.081609,POINT (12.10405 57.73745)
3,7373654,5621,bus_stop,Torrekulla,891f2531673ffff,1498,0.081866,POINT (12.04059 57.61757)
4,19476504,5621,bus_stop,Fässberg,891f2504cafffff,20883,0.08176,POINT (11.97742 57.65012)


In [70]:
intersected_transport['fclass'].unique()

array(['bus_stop', 'tram_stop', 'ferry_terminal', 'taxi', 'bus_station',
       'railway_station', 'helipad'], dtype=object)

In [117]:
intersected_transport['count']=1
intersected_transport['station_density'] = intersected_transport['count'] / intersected_transport['area']

result_transport = intersected_transport.groupby(['hex_id', 'fclass']).agg({'station_density': 'sum'})
result_transport = result_transport.reset_index(drop = False)
print('length = %d' % len(result_transport))
result_transport.head()

length = 1308


Unnamed: 0,hex_id,fclass,station_density
0,891f250400bffff,bus_stop,24.465375
1,891f2504013ffff,bus_stop,12.234297
2,891f250401bffff,bus_stop,24.467181
3,891f2504023ffff,bus_stop,12.232099
4,891f2504037ffff,tram_stop,24.465611


In [118]:
result_transport = result_transport.groupby(['hex_id']).sum()
result_transport = result_transport.drop(columns=['fclass'])
print('length = %d' % len(result_transport))

result_transport

length = 1188


Unnamed: 0_level_0,station_density
hex_id,Unnamed: 1_level_1
891f250400bffff,24.465375
891f2504013ffff,12.234297
891f250401bffff,24.467181
891f2504023ffff,12.232099
891f2504037ffff,24.465611
...,...
891f25b9babffff,24.578200
891f25b9bbbffff,12.290010
891f25b9d23ffff,24.597293
891f25b9d2bffff,24.595864


In [281]:
data = data.merge(result_transport, on='hex_id', how='left')
data['station_density'] = data['station_density'].fillna(0)

data.head()

Unnamed: 0,hex_id,accessibility,geometry,area,entropy,Resi_lur,Comm_lur,Edu_lur,Recre_lur,Public_lur,...,Rd_Pri,Rd_Sec,Rd_Ter,Rd_Cyc,Rd_Pedes,Rd_Other,T_Enable_LUR,T_Hinder_LUR,T_Other_LUR,station_density
0,891f25ad0dbffff,119,"POLYGON ((12.18021 57.74436, 12.18286 57.74529...",0.081623,0.000423,1e-05,1e-05,1e-05,1.00001,1e-05,...,0.0,0.0,0.030864,0.0,9.195498,0.0,,,,0.0
1,891f2507103ffff,0,"POLYGON ((11.82421 57.65409, 11.82685 57.65503...",0.081692,,,,,,,...,,,,,,,,,,0.0
2,891f253aa8bffff,0,"POLYGON ((12.07923 57.56624, 12.08188 57.56718...",0.082003,0.303958,0.808345,1e-05,1e-05,1e-05,1e-05,...,0.0,3.482623,8.078311,0.0,2.334086,0.0,,,,0.0
3,891f2506a8bffff,38454,"POLYGON ((11.93287 57.66635, 11.93551 57.66729...",0.081708,0.822373,0.309163,0.304195,1e-05,1e-05,1e-05,...,6.23746,0.617976,22.194071,9.579723,14.658543,0.0,0.159733,0.275814,0.564483,0.0
4,891f2532257ffff,165,"POLYGON ((12.10123 57.68717, 12.10388 57.68811...",0.081726,0.000423,1e-05,1e-05,1e-05,1.00001,1e-05,...,0.0,0.0,3.009512,2.845747,6.350286,0.0,,,,0.0


# Final data process

In [282]:
data = data.drop(columns=['Rd_Other']) # all-zero


In [122]:
nan_data = data[data.isna().any(axis=1)]
nan_data.head()

Unnamed: 0,hex_id,accessibility,geometry,entropy,Resi_lur,Comm_lur,Edu_lur,Recre_lur,Public_lur,Health_lur,...,Rd_Pri,Rd_Sec,Rd_Ter,Rd_Cyc,Rd_Pedes,T_Enable_LUR,T_Hinder_LUR,T_Other_LUR,num_station,station_density
0,891f25ad0dbffff,119,"POLYGON ((12.18021 57.74436, 12.18286 57.74529...",,,,,,,,...,0.0,0.0,0.030864,0.0,9.195498,,,,0.0,0.0
1,891f2507103ffff,0,"POLYGON ((11.82421 57.65409, 11.82685 57.65503...",,,,,,,,...,,,,,,,,,0.0,0.0
2,891f253aa8bffff,0,"POLYGON ((12.07923 57.56624, 12.08188 57.56718...",0.248793,0.862814,1e-05,1e-05,1e-05,1e-05,1e-05,...,0.0,3.482623,8.078311,0.0,2.334086,,,,0.0,0.0
4,891f2532257ffff,165,"POLYGON ((12.10123 57.68717, 12.10388 57.68811...",0.000423,1e-05,1e-05,1e-05,1e-05,1e-05,1e-05,...,0.0,0.0,3.009512,2.845747,6.350286,,,,0.0,0.0
6,891f2515947ffff,0,"POLYGON ((11.79460 57.69472, 11.79724 57.69566...",,,,,,,,...,0.0,0.0,4.97387,1.498416,4.818121,1e-05,1e-05,1.00001,0.0,0.0


In [123]:
print(data.columns)

Index(['hex_id', 'accessibility', 'geometry', 'entropy', 'Resi_lur',
       'Comm_lur', 'Edu_lur', 'Recre_lur', 'Public_lur', 'Health_lur',
       'Other_lur', 'Rd_Pri', 'Rd_Sec', 'Rd_Ter', 'Rd_Cyc', 'Rd_Pedes',
       'T_Enable_LUR', 'T_Hinder_LUR', 'T_Other_LUR', 'num_station',
       'station_density'],
      dtype='object')


In [283]:
data.to_csv('prepared_data.csv')