In [1]:
import os
import re
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import time

from utills import Candidate, Platoon
from pattern_miner import Miner

def get_trajectory_id(text):
    m = re.search('client_(.+).csv', text)
    if m:
        found = m.group(1)
        return found
    else:
        raise ValueError()

TRAJ_FOLDER = 'paths'
columns = ['lat', 'long', 'datetime', 'trajectory_id']
FILE_NAME = os.path.join(TRAJ_FOLDER, 'processed.csv')

if not os.path.exists(FILE_NAME):
    if not os.path.exists(TRAJ_FOLDER):
        raise ValueError(TRAJ_FOLDER + ' does not exist')
    folder_files = os.listdir(TRAJ_FOLDER)
    list_df = []
    for filename in folder_files:
        df = pd.read_csv(os.path.join(TRAJ_FOLDER, filename), names=columns)
        df['datetime'] = pd.to_datetime(df['datetime'], unit='s')
        df['trajectory_id'] = get_trajectory_id(filename)
        list_df += [df]
    df = pd.concat(list_df, ignore_index=True)
    df.to_csv(FILE_NAME, index=False)

df = pd.read_csv(FILE_NAME, parse_dates=[columns.index('datetime')], dtype={'lat': np.float32, 'long': np.float32, 'trajectory_id': np.str_})
df.head()

Unnamed: 0,lat,long,datetime,trajectory_id
0,359.5,416.5,2017-12-21 13:01:42,4976
1,359.5,416.5,2017-12-21 13:01:49,4976
2,359.5,416.5,2017-12-21 13:01:54,4976
3,364.5,426.5,2017-12-21 13:01:59,4976
4,379.5,456.5,2017-12-21 13:02:06,4976


In [10]:
savefolder = 'results_small'
if not os.path.exists(savefolder):
    os.makedirs(savefolder)

sampling_interval = pd.Timedelta(minutes=1)
split_border = pd.Timedelta(days=1)
max_time_interval = df['datetime'].max()-df['datetime'].min()
pl = Platoon(2, 60, 5, max_time_interval // sampling_interval)
miner = Miner(df, pl, sampling_interval)

time1 = time.time()
print('Start time: ' + time.ctime())
#miner.extract_staypoints_heatmap(10)
#miner.save_staypoints_heatmap(os.path.join(savefolder, 'staypoints_heatmap.npy'))
miner.load_staypoints_heatmap(os.path.join(savefolder, 'staypoints_heatmap.npy'))
time2 = time.time()
print('Staypoints extraction done. Time: ' + str(time2 - time1))
miner.unify_datetime(split_border)
time3 = time.time()
print('Data unification done. Time: ' + str(time3 - time2))
#miner.compute_candidate_stars(5)
#miner.save_candidate_stars(os.path.join(savefolder, 'candidate_stars.json'))
miner.load_candidate_stars(os.path.join(savefolder, 'candidate_stars.json'))
time4 = time.time()
print('Candidate stars computing done. Time: ' + str(time4 - time3))
miner.compute_pattern_set()
time5 = time.time()
print('Pattern set computing done. Time: ' + str(time5 - time4))
miner.compute_connection_rate()
miner.save_connection_rate(os.path.join(savefolder, 'connection_rate.npz'))
miner.load_connection_rate(os.path.join(savefolder, 'connection_rate.npz'))
time6 = time.time()
print('Pattern set computing done. Time: ' + str(time6 - time5))
print('All time: ' + str(time6 - time1))
print('Finish time: ' + time.ctime())

Start time: Wed Apr 25 13:00:37 2018
Staypoints extraction done. Time: 0.0010094642639160156
Data unification done. Time: 199.55676913261414
Candidate stars computing done. Time: 0.04638791084289551
Pattern set computing done. Time: 0.529653787612915
Pattern set computing done. Time: 40.72226047515869
All time: 240.85608077049255
Finish time: Wed Apr 25 13:04:38 2018
