# Individual mobility and place visitation data preparation
1. Individual stops are stored under `dbs/stop2poi/`. Each file is a set of individuals and their stay records.
2. Place-based statistics are stored under `dbs/visits_day_did/`. Each file is a type of POIs and their visit records.

In [1]:
%load_ext autoreload
%autoreload 2
%cd D:\nine-euro-ticket-de

D:\nine-euro-ticket-de


In [10]:
import os
import pandas as pd
import workers
import geopandas as gpd
import sqlalchemy

# Data location
user = workers.keys_manager['database']['user']
password = workers.keys_manager['database']['password']
port = workers.keys_manager['database']['port']
db_name = workers.keys_manager['database']['name']
engine = sqlalchemy.create_engine(f'postgresql://{user}:{password}@localhost:{port}/{db_name}?gssencmode=disable')

## 1. Individual stops

In [11]:
data_folder = os.path.join('dbs/stop2poi/')
paths2stops = {int(x.split('_')[-1].split('.')[0]): os.path.join(data_folder, x)\
               for x in list(os.walk(data_folder))[0][2]}
paths2stops_list = list(paths2stops.values())
paths2stops_list[0]

'dbs/stop2poi/stops_poi_0.parquet'

In [12]:
df = pd.read_parquet(paths2stops_list[0])
print(df.iloc[0])

device_aid     0001bdf7-7bf9-6a62-2c06-a917b712bba6
loc                                               5
latitude                                    52.4064
longitude                                   13.2598
size                                              6
batch                                             0
dur                                      342.333333
localtime                 2023-05-03 13:23:27+02:00
l_localtime               2023-05-03 19:05:47+02:00
date                                     2023-05-03
h_s                                              13
year                                           2023
weekday                                           2
week                                             18
seq                                               1
osm_id                                  978741857.0
class                                       amenity
subclass                                       cafe
theme                               Food & Beverage
label       

## 2. Visits at places

In [8]:
data_folder = os.path.join('dbs/visits_day_did/')
paths2stops_list = [os.path.join('dbs/visits_day_did/', x) for x in list(os.walk(data_folder))[0][2]]
paths2stops_list[0]

'dbs/visits_day_did/Accomodations.parquet'

In [9]:
df = pd.read_parquet(paths2stops_list[0])
print(df.iloc[0])

osm_id                                    4447196
date                                   2019-06-23
year                                         2019
month                                           6
weekday                                         6
theme                Outdoor & Recreational areas
label                               Accomodations
pt_station_num                                 18
precipitation                                 0.2
num_visits                                      1
num_visits_wt                            8.807692
num_unique_device                               1
dur_total                              182.666667
dur_total_wt                          1608.871795
dur_m                                  182.666667
d_h25                                   31.044609
d_h50                                   31.044609
d_h75                                   31.044609
d_h25_wt                                31.044609
d_h50_wt                                31.044609


## 3. POI data

In [11]:
gdf = gpd.GeoDataFrame.from_postgis(sql="""SELECT * FROM poi;""", con=engine)
gdf.head()

Unnamed: 0,osm_id,class,subclass,name,theme,label,geom
0,324043489,historic,boundary_stone,25,Historic,Historic,POINT (13.83511 48.76237)
1,897267627,historic,boundary_stone,24,Historic,Historic,POINT (13.83534 48.76241)
2,323299682,historic,boundary_stone,22,Historic,Historic,POINT (13.83580 48.76255)
3,897267707,historic,boundary_stone,23,Historic,Historic,POINT (13.83551 48.76253)
4,323777930,historic,boundary_stone,21,Historic,Historic,POINT (13.83614 48.76274)


In [12]:
gdf.to_file('D:/poi.shp', index=False)

## 4. Home locations

In [13]:
df_h = pd.read_sql(sql="""SELECT * FROM home;""", con=engine)
df_h.head()

Unnamed: 0,device_aid,loc,latitude,longitude,count
0,0001bdf7-7bf9-6a62-2c06-a917b712bba6,1,51.508021,6.859183,3
1,0001f145-6b4d-4636-a108-a26abc35b220,1,48.706003,9.254181,18
2,00020451-44b2-446a-95d7-27a31353eb42,2,48.9997,12.0717,11
3,0004ce43-5bbd-4ff6-a36c-559870df365d,1,49.3167,7.05,3
4,00054747-ebe9-4dbe-8942-807328e26e3b,1,48.819655,9.27906,14


In [14]:
df_h.to_csv('D:/home.csv.gz', compression='gzip', index=False)