In [1]:
import pandas as pd
import datetime

### Data representation
$\begin{align}\{p^u_{i}|i=1,2,3,...,n ,\end{align}$
and
$\begin{align}u=1,2,3,...,U\}\end{align}$
- $\begin{align}p^u_{i}=(x_{i},y_{i},t_{i1},t_{i2})\end{align}$
- $\begin{align}x_{i},y_{i}:\end{align}$geographical coordinates
- $\begin{align}t_{i1},t_{i2}:\end{align}$start and end time

In [2]:
stop_points = pd.read_csv('./data/pre_stop_points.csv')
stop_points['activity'] = stop_points['activity_class']
stop_points = stop_points.drop(['activity_class'], axis=1)
stop_points['start_time'] = pd.to_datetime(stop_points['start_time'])
stop_points['end_time'] = pd.to_datetime(stop_points['end_time'])
stop_points.head()

Unnamed: 0,uid,id,lat,lng,start_time,end_time,activity
0,9497,250460,37.554742,127.026641,2019-11-07 10:45:58,2019-11-07 16:00:49,0.0
1,9497,240029,37.564022,127.03558,2019-11-06 23:27:51,2019-11-07 10:45:58,3.0
2,9497,238990,37.560267,127.033065,2019-11-06 20:22:16,2019-11-06 23:25:41,3.0
3,9497,234786,37.561929,127.038133,2019-11-06 17:15:56,2019-11-06 18:13:46,3.0
4,9497,226233,37.561171,127.037129,2019-11-06 07:49:15,2019-11-06 08:29:32,3.0


In [3]:
weekday_list = list()

for i in stop_points.index:
    boolean = 1 if stop_points.start_time[i].weekday() < 5 else 0
    weekday_list.append(boolean)

stop_points['weekday'] = weekday_list
stop_points.head()

Unnamed: 0,uid,id,lat,lng,start_time,end_time,activity,weekday
0,9497,250460,37.554742,127.026641,2019-11-07 10:45:58,2019-11-07 16:00:49,0.0,1
1,9497,240029,37.564022,127.03558,2019-11-06 23:27:51,2019-11-07 10:45:58,3.0,1
2,9497,238990,37.560267,127.033065,2019-11-06 20:22:16,2019-11-06 23:25:41,3.0,1
3,9497,234786,37.561929,127.038133,2019-11-06 17:15:56,2019-11-06 18:13:46,3.0,1
4,9497,226233,37.561171,127.037129,2019-11-06 07:49:15,2019-11-06 08:29:32,3.0,1


#### Set of time slots
- The time period $\begin{align}(t_{i1},t_{i2})\mapsto\end{align}$ a set $\begin{align}\mathcal{S}_{i}\end{align}$ of time slots

In [4]:
def create_time_slot(freq):
    time_slot = datetime.timedelta(hours=0, minutes=0)
    time_slot_set = [time_slot]
    
    while time_slot < datetime.timedelta(hours=24):
        time_slot += datetime.timedelta(minutes=freq)
        time_slot_set.append(time_slot)
    df = pd.DataFrame(time_slot_set, columns=['time_slot'])
    return df

In [5]:
def time_slot_mapping(time_slot, start_time, end_time):
    for i in time_slot.index:
        if time_slot.time_slot[i] > start_time:
            sot = i-1
            break
    
    if start_time > end_time:        
        for i in time_slot.index:
            if time_slot.time_slot[i] > end_time:
                ent = i+1
                break
        df = time_slot[sot:]
        df = df.append(time_slot[:ent]).reset_index(drop=True)
        return df
    else:
        for i in time_slot.index:        
            if time_slot.time_slot[i] > end_time:
                ent = i+1
                break

        df = time_slot[sot:ent].reset_index(drop=True)
        return df

In [6]:
def create_set_of_time_slot(point, time_slot):
    df = pd.DataFrame()
    
    for i in point.index:
        hours = point.start_time[i].time().hour
        minutes = point.start_time[i].time().minute
        start_time = datetime.timedelta(hours=hours, minutes=minutes)

        hours = point.end_time[i].time().hour
        minutes = point.end_time[i].time().minute
        end_time = datetime.timedelta(hours=hours, minutes=minutes)

        tmp = time_slot_mapping(time_slot, start_time, end_time)
        tmp['idx'] = i
        tmp['weekday'] = point.weekday[i]
        tmp['activity'] = point.activity[i]
        df = df.append(tmp)
    df = df.reset_index(drop=True)
    return df

In [7]:
time_slot = create_time_slot(60)
time_slot_set = create_set_of_time_slot(stop_points, time_slot)
time_slot_set.tail()

Unnamed: 0,time_slot,idx,weekday,activity
6209,17:00:00,905,1,2.0
6210,18:00:00,905,1,2.0
6211,14:00:00,906,1,2.0
6212,15:00:00,906,1,2.0
6213,16:00:00,906,1,2.0


In [8]:
def temporal_frequency(point, time_slot_set):
    temporal_frequency = list()
    
    for i in point.index:
        activity_frequency = [point.id[i]]
        stop_point = time_slot_set[time_slot_set.idx == i]
        strat_time = stop_point.time_slot.iloc[0]
        end_time = stop_point.time_slot.iloc[-1]
        weekday = stop_point.weekday.iloc[0]
        
        df = time_slot_set[(time_slot_set.time_slot >= strat_time) &
                              (time_slot_set.time_slot >= end_time) &
                              (time_slot_set.weekday == weekday)]
        
        for activity_type in range(4):
            activity_frequency.append(df[df.activity == activity_type].shape[0] / df.shape[0])
        temporal_frequency.append(activity_frequency)
    feature_vector = pd.DataFrame(temporal_frequency, columns=['id', 'temporal_1', 'temporal_2', 'temporal_3', 'temporal_4'])
    return feature_vector

In [9]:
temporal_feature = temporal_frequency(stop_points, time_slot_set)
temporal_feature.head()

Unnamed: 0,id,temporal_1,temporal_2,temporal_3,temporal_4
0,250460,0.473973,0.086301,0.086301,0.353425
1,240029,0.673522,0.028278,0.061697,0.236504
2,238990,0.68306,0.021858,0.065574,0.229508
3,234786,0.568951,0.071119,0.06418,0.29575
4,226233,0.339605,0.12787,0.160714,0.371811


#### Activity duration  
$\begin{align}T_{i}=(t_{i2}-t_{i1})\end{align}$

In [11]:
stop_points['duration'] = (stop_points.end_time - stop_points.start_time) / datetime.timedelta(days=1)
duration_df = stop_points[['id', 'duration']]
duration_df.head()

Unnamed: 0,id,duration
0,250460,0.218646
1,240029,0.470914
2,238990,0.127373
3,234786,0.040162
4,226233,0.027975


In [12]:
temporal_feature.to_csv(f'./data/parameter_settings/time_slot_{}.csv', index=False)
# duration_df.to_csv('./data/parameter_settings/duration.csv', index=False)