# Home detection and corresponding DeSO zone

In [1]:
%load_ext autoreload
%autoreload 2
%cd D:\mad4abm

D:\mad4abm


In [2]:
# Load libs
import pandas as pd
import geopandas as gpd
import sqlalchemy
import numpy as np
from scipy import stats
from tqdm import tqdm
from lib import preprocess as preprocess
from shapely.geometry import MultiPoint

In [3]:
# Data location
user = preprocess.keys_manager['database']['user']
password = preprocess.keys_manager['database']['password']
port = preprocess.keys_manager['database']['port']
db_name = preprocess.keys_manager['database']['name']
engine = sqlalchemy.create_engine(f'postgresql://{user}:{password}@localhost:{port}/{db_name}')

## 1. Home detection test

In [16]:
uid = '0d3a07aa-e73c-4f98-b10e-cb8d3da77d79'
df_eg = pd.read_sql_query(sql="""SELECT * FROM stops_subset WHERE uid = '%s';"""%uid, con=engine)
df_eg.head()

Unnamed: 0,timestamp,uid,lat,lng,location_method,datetime,leaving_datetime,tzname,TimeLocal,leaving_TimeLocal,date,dur,month,cluster
0,1559353521,0d3a07aa-e73c-4f98-b10e-cb8d3da77d79,60.000009,15.791768,cell,2019-06-01 02:00:01,2019-06-01 04:00:01,Europe/Stockholm,2019-06-01 04:00:01,2019-06-01 06:00:01,2019-06-01,7200.0,6,1
1,1559360671,0d3a07aa-e73c-4f98-b10e-cb8d3da77d79,60.000009,15.791768,cell,2019-06-01 04:07:14,2019-06-01 05:56:28,Europe/Stockholm,2019-06-01 06:07:14,2019-06-01 07:56:28,2019-06-01,6554.0,6,1
2,1559361665,0d3a07aa-e73c-4f98-b10e-cb8d3da77d79,60.000694,15.788607,cell,2019-06-01 05:56:28,2019-06-01 06:07:48,Europe/Stockholm,2019-06-01 07:56:28,2019-06-01 08:07:48,2019-06-01,680.0,6,1
3,1559379582,0d3a07aa-e73c-4f98-b10e-cb8d3da77d79,59.989805,15.83542,fused,2019-06-01 10:42:10,2019-06-01 11:05:10,Europe/Stockholm,2019-06-01 12:42:10,2019-06-01 13:05:10,2019-06-01,1380.0,6,2
4,1559380908,0d3a07aa-e73c-4f98-b10e-cb8d3da77d79,59.981548,15.803858,cell,2019-06-01 11:14:34,2019-06-01 11:25:15,Europe/Stockholm,2019-06-01 13:14:34,2019-06-01 13:25:15,2019-06-01,641.0,6,3


In [17]:
df_eg.head()

Unnamed: 0,timestamp,uid,lat,lng,location_method,datetime,leaving_datetime,tzname,TimeLocal,leaving_TimeLocal,date,dur,month,cluster
0,1559353521,0d3a07aa-e73c-4f98-b10e-cb8d3da77d79,60.000009,15.791768,cell,2019-06-01 02:00:01,2019-06-01 04:00:01,Europe/Stockholm,2019-06-01 04:00:01,2019-06-01 06:00:01,2019-06-01,7200.0,6,1
1,1559360671,0d3a07aa-e73c-4f98-b10e-cb8d3da77d79,60.000009,15.791768,cell,2019-06-01 04:07:14,2019-06-01 05:56:28,Europe/Stockholm,2019-06-01 06:07:14,2019-06-01 07:56:28,2019-06-01,6554.0,6,1
2,1559361665,0d3a07aa-e73c-4f98-b10e-cb8d3da77d79,60.000694,15.788607,cell,2019-06-01 05:56:28,2019-06-01 06:07:48,Europe/Stockholm,2019-06-01 07:56:28,2019-06-01 08:07:48,2019-06-01,680.0,6,1
3,1559379582,0d3a07aa-e73c-4f98-b10e-cb8d3da77d79,59.989805,15.83542,fused,2019-06-01 10:42:10,2019-06-01 11:05:10,Europe/Stockholm,2019-06-01 12:42:10,2019-06-01 13:05:10,2019-06-01,1380.0,6,2
4,1559380908,0d3a07aa-e73c-4f98-b10e-cb8d3da77d79,59.981548,15.803858,cell,2019-06-01 11:14:34,2019-06-01 11:25:15,Europe/Stockholm,2019-06-01 13:14:34,2019-06-01 13:25:15,2019-06-01,641.0,6,3


Holiday seasons: 6.23 - 8.10, and > 12.22

In [58]:
# Identify home location using individual stop sequence
def home_detection(data):
    def time_in_range(start, end, x):
        """Return true if x is in the range [start, end]"""
        if (x >= start) | (x < end):
            return True
        else:
            return False

    def point(row):
        start = 19
        end = 8
        h_s = row['TimeLocal'].hour
        h_e = row['leaving_TimeLocal'].hour
        pt = row['dur'] / 3600  # stay length in hour
        if row['weekday'] == 1:
            if ~time_in_range(start, end, h_s) & ~time_in_range(start, end, h_e):
                pt = 0
            if ~time_in_range(start, end, h_s) & time_in_range(start, end, h_e):
                pt = pt - (19 - h_s)
            if time_in_range(start, end, h_s) & ~time_in_range(start, end, h_e):
                pt = pt - (h_e - 8)
        return pt

    data_cat = data.reset_index(drop=True)
    data_cat.loc[:, 'point'] = data_cat.apply(lambda row: point(row), axis=1)
    try:
        if len(data_cat) > 1:
            home = pd.DataFrame(data_cat.groupby(['cluster'])['point'].sum())  # Share of stay in total
            home = home.reset_index()
            home = home.loc[home['point'].idxmax()].values
            coords = data.loc[data.cluster == home[0], ['lng', 'lat']].values
            centroid = (MultiPoint(coords).centroid.x, MultiPoint(coords).centroid.y) #x='longitude', y='latitude'
        else:
            home = [0, 0]
            centroid = (0, 0)
        return pd.Series({'cluster': home[0], 'length': home[1], 'lng': centroid[0], 'lat': centroid[1]})
    except:
        home = [0, 0, 0, 0]
        centroid = (0, 0)
        return pd.Series({'cluster': home[0], 'length': home[1], 'lng': centroid[0], 'lat': centroid[1]})

In [19]:
df_eg.loc[:, 'weekday'] = df_eg.loc[:, 'TimeLocal'].apply(lambda x: 1 if x.weekday() in (0, 1, 2, 3, 4) else 0)

In [40]:
home_detection(df_eg)

cluster     1.000000
length     12.981944
lng        60.000901
lat        15.791221
dtype: float64

## 2. Home detection for all

In [41]:
df = pd.read_sql_query(sql="""SELECT * FROM stops_subset;""", con=engine)
df.loc[:, 'weekday'] = df.loc[:, 'TimeLocal'].apply(lambda x: 1 if x.weekday() in (0, 1, 2, 3, 4) else 0)

In [59]:
tqdm.pandas()
df_home = df.groupby('uid').progress_apply(home_detection).reset_index()
df_home.loc[:, 'cluster'] = df_home.loc[:, 'cluster'].astype(int)
df_home.head()

100%|██████████| 212826/212826 [27:28<00:00, 129.08it/s]


Unnamed: 0,uid,cluster,length,lng,lat
0,00008608-f79e-414d-bf1c-25632d6bc059,11,3.048611,12.569336,56.17392
1,00009689-c524-4a99-95d8-a2397d87db62,1,4.551667,12.657073,56.098287
2,0000c837-ef82-4dfd-b2a5-00bdc8680b0b,1,0.0,18.008615,59.371578
3,0000cd68-c931-4e3c-96f6-7c5837f59b08,20,23.314167,16.580486,59.628055
4,0000f6ad-ffa4-4af2-9c2a-49d6dc86ec3a,2,10.205833,16.39,57.89


## 3. Get DeSO zones for detected home

In [47]:
gdf = gpd.GeoDataFrame.from_postgis("SELECT deso, geom FROM public.zones", con=engine).to_crs(4326)

In [67]:
gdf_home = preprocess.df2gdf_point(df_home, 'lng', 'lat', crs=4326, drop=True)
gdf_home = gpd.sjoin(gdf_home, gdf)
gdf_home.head()

Unnamed: 0,uid,cluster,length,geometry,index_right,deso
0,00008608-f79e-414d-bf1c-25632d6bc059,11,3.048611,POINT (12.56934 56.17392),3249,1284C1040
24883,1e3e08c2-26f7-4230-91b8-0dc723e9f616,3,3.760556,POINT (12.56227 56.16990),3249,1284C1040
94244,720501ee-fb8e-4eb0-a46a-1539f01454dc,6,16.829444,POINT (12.57611 56.17458),3249,1284C1040
121734,92eb49d2-2ce5-4ec0-8dd7-fcb14c93a652,9,2.588611,POINT (12.56974 56.17623),3249,1284C1040
151119,b5de8870-7cd1-4a78-9a07-460dc3cfc807,1,2.776944,POINT (12.56942 56.17419),3249,1284C1040


In [68]:
df_home_deso = pd.merge(df_home, gdf_home.loc[:, ['uid', 'deso']], on='uid', how='inner')
df_home_deso.head()

Unnamed: 0,uid,cluster,length,lng,lat,deso
0,00008608-f79e-414d-bf1c-25632d6bc059,11,3.048611,12.569336,56.17392,1284C1040
1,00009689-c524-4a99-95d8-a2397d87db62,1,4.551667,12.657073,56.098287,1283C1670
2,0000c837-ef82-4dfd-b2a5-00bdc8680b0b,1,0.0,18.008615,59.371578,0184C1300
3,0000cd68-c931-4e3c-96f6-7c5837f59b08,20,23.314167,16.580486,59.628055,1980C1570
4,0000f6ad-ffa4-4af2-9c2a-49d6dc86ec3a,2,10.205833,16.39,57.89,0883B2010


In [69]:
df_home_deso.loc[df_home_deso['length']>0, :].to_sql('home', engine, schema='public', index=False, method='multi', if_exists='replace', chunksize=10000)