In [1]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from geopy.distance import geodesic

### Stop points

In [2]:
DATA_DIR = './data/pre_stop_points.csv'
stop_df = pd.read_csv(DATA_DIR)
stop_df['activity'] = stop_df['activity_class']
stop_df = stop_df.drop(['activity_class'], axis=1)
stop_df.head()

Unnamed: 0,uid,id,lat,lng,start_time,end_time,activity
0,9497,250460,37.554742,127.026641,2019-11-07 10:45:58,2019-11-07 16:00:49,0.0
1,9497,240029,37.564022,127.03558,2019-11-06 23:27:51,2019-11-07 10:45:58,3.0
2,9497,238990,37.560267,127.033065,2019-11-06 20:22:16,2019-11-06 23:25:41,3.0
3,9497,234786,37.561929,127.038133,2019-11-06 17:15:56,2019-11-06 18:13:46,3.0
4,9497,226233,37.561171,127.037129,2019-11-06 07:49:15,2019-11-06 08:29:32,3.0


###  Home location

In [3]:
home_df = pd.read_csv('./data/home_detection.csv')
home_df.head()

Unnamed: 0,uid,lat_avg,lng_avg,overn_cnt,total_dur,clus_idx
0,204,37.555587,127.035631,6 days 00:00:00.000000000,6 days 01:32:26.000000000,0
1,287,37.542607,127.046064,6 days 00:00:00.000000000,5 days 08:40:51.000000000,0
2,305,37.570986,127.036927,10 days 00:00:00.000000000,8 days 05:11:49.000000000,0
3,381,37.539104,127.056621,43 days 00:00:00.000000000,38 days 08:47:40.000000000,0
4,606,37.249107,127.011834,32 days 00:00:00.000000000,29 days 13:53:05.000000000,0


### Core activity distance
* core activity: **home** (or work)
* normalize: $\begin{align}
\phi(d) = (1 + d^{2})^{-1}
\end{align}$

In [4]:
def normalization(dist):
    return (1 + (dist**2))**-1

In [5]:
def core_activity_distance(stop_df, home_df):
    id_list = list()
    core_dist_list = list()

    for uid in stop_df.uid.unique():
        user_stop = stop_df[stop_df.uid == uid]
        user_home = home_df[home_df.uid == uid]

        for i in user_stop.index:
            core_dist = geodesic((stop_df.lat[i], stop_df.lng[i]),
                                 (user_home.lat_avg.iloc[0], user_home.lng_avg.iloc[0])).kilometers
            core_dist_list.append(normalization(core_dist))
            id_list.append(user_stop.id[i])
    
    core_dist_df = pd.DataFrame(core_dist_list, columns=['core_dist'])
    core_dist_df['id'] = id_list
    core_dist_df = core_dist_df[['id', 'core_dist']]
    return core_dist_df

In [6]:
core_atv_dist_df = core_activity_distance(stop_df, home_df)
core_atv_dist_df.head()

Unnamed: 0,id,core_dist
0,250460,0.998987
1,240029,0.370871
2,238990,0.587871
3,234786,0.376185
4,226233,0.423902


In [7]:
core_atv_dist_df.to_csv(f'./data/parameter_settings/core_dist.csv', index=False)