In [1]:
import pandas as pd
import numpy as np
import re
import glob
import geohash
import json
import collections as cl
import datetime
from tqdm import tqdm_notebook as tqdm

In [None]:
## common function

In [2]:
def timehash_encode(time):
    return str(int(time[11:13])*6 + int(time[14])).zfill(4)

def timehash_encode_for_1minute(time):
    return str(int(time[11:13])*60 + int(time[14:16])).zfill(4)

def encode(time, latitude, longtitude):
    t_hash = timehash_encode_for_1minute(time)
    g_hash = geohash.encode(latitude, longtitude, 10)
    return g_hash + t_hash

In [None]:
## ここから↓データの読み込み

In [None]:
def transform_from_dir(dir_name, index, area):
    files = glob.glob("%s/*" % dir_name)
    for file in files:
        extract_columns(file, index, area)
        
def extract_10_minutes_data(trajectory_data):
    return trajectory_data[trajectory_data['time'].str.endswith('0:00')]

def extract_columns(file_name, index, area):
    print(file_name)
    time_tokyo = pd.read_csv(file_name, header=None)
    time_tokyo = time_tokyo.query('index %% 10 == %s' % index)
    time_tokyo = time_tokyo.iloc[:, 3:6]
    time_tokyo.columns = ["time", "long", "lat"]
    time_tokyo.drop_duplicates()
    time_tokyo.to_csv('./data/output-1minute-%s-index-%s.csv' % (area, str(index)), mode='a', index=False, header=False)

def encode_batch_data(trajectory_data_n, data_list):
    trajectory_data_n.columns = ["time", "long", "lat"]
    for index, row in tqdm(trajectory_data_n.iterrows()):
        encoded_value = encode(row["time"], row["lat"], row["long"])
        data_list.append(encoded_value)
    return True

In [None]:
for index in range(10):
    for i in range(24):
        transform_from_dir("/Users/fumiyuki/Downloads/time-tokyo-%s" % str(i + 1), index, 'tokyo')
        transform_from_dir("/Users/fumiyuki/Downloads/time-kinki-%s" % str(i + 1), index, 'kinki')

In [None]:
data_list = []
for index in range(1):
    for area in ["tokyo", "kinki"]:
        trajectory_data = pd.read_csv('./data/output-1minute-%s-index-%s.csv' % (area, str(index)), header=None)
        trajectory_data = trajectory_data.drop_duplicates()
        encode_batch_data(trajectory_data, data_list)

In [None]:
import pickle

# with open('index-1.pickle', 'wb') as f:
#     pickle.dump(data_list, f, pickle.HIGHEST_PROTOCOL)

# with open('index-1.pickle', 'rb') as f:
#     data_list = pickle.load(f)

In [None]:
## ここから↓jsonにデータの書き込み

In [None]:
def encode_all_data(data_list, limit_num):
    data_list = data_list[:limit_num]
    data_list.sort()
    json_data = cl.OrderedDict()
    json_data["data"] = data_list
    now_timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
    filename = './data/central-geohash10-real-for-1minute-%s-%s.json' % (str(limit_num), now_timestamp)
    with open(filename, 'w') as f:
        json.dump(json_data, f, indent=None)
        
# def encode_all_data(output_file_name, limit_num):
#     trajectory_data = pd.read_csv(output_file_name, header=None)
#     trajectory_data.columns = ["time", "long", "lat"]
    
#     json_data = cl.OrderedDict()
#     data_list = []
#     count = 0
#     for index, row in tqdm(trajectory_data.iterrows()):
#         encoded_value = encode(row["time"], row["lat"], row["long"])
#         data_list.append(encoded_value)
#         count += 1
#         if count == limit_num:
#             break
#     data_list.sort()
#     json_data["data"] = data_list
#     now_timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
#     filename = './data/real-for-1minute-accum.json'
#     with open(filename, 'w') as f:
#         json.dump(json_data, f, indent=None)

In [None]:
encode_all_data(data_list, 100000000)

In [None]:
## クエリデータ

In [77]:
def transform_from_dir(dir_name, id_data_list, current, batch_size):
    files = glob.glob("%s/*" % dir_name)
    files.sort()
    files = files[current:current+batch_size]
    for file in tqdm(files):
        id_data = extract_columns(file)
        amari = 1440 - len(id_data)
        if amari < 0:
            if amari != -1440:
                print(file)
                continue
        id_data = id_data.append(id_data.iloc[:amari])
        length = len(id_data) // 1440
        for i in range(length):
            id_data_list.append(id_data[i*1440:(i+1)*1440])
    return id_data_list
        
def extract_10_minutes_data(trajectory_data):
    return trajectory_data[trajectory_data['time'].str.endswith('0:00')]

def extract_columns(file_name):
    id_data = pd.read_csv(file_name, header=None)
    id_data = id_data.iloc[:, [3,4,5]]
    id_data.columns = ["time", "long", "lat"]
    id_data = id_data.drop_duplicates(subset=["time"])
#     if len(id_data) % 1440 != 0:
#         print(file_name)
#         raise ValueError("id_data is not 1440 muliplies")
    return id_data

In [89]:
id_data_list = []
batch_size = 250
current = 0
for i in range(2):
    for j in range(40):
#         transform_from_dir("/Users/fumiyuki/Downloads/tokyo-id-%s/data" % str(i + 2), id_data_list, current, batch_size)
        transform_from_dir("/Users/fumiyuki/Downloads/kinki-id-%s" % str(i + 1), id_data_list, current, batch_size)
        current += batch_size
    current = 0

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for file in tqdm(files):


HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))

/Users/fumiyuki/Downloads/kinki-id-1/00000204.csv



HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))

/Users/fumiyuki/Downloads/kinki-id-1/00000592.csv
/Users/fumiyuki/Downloads/kinki-id-1/00000593.csv
/Users/fumiyuki/Downloads/kinki-id-1/00000594.csv
/Users/fumiyuki/Downloads/kinki-id-1/00000595.csv
/Users/fumiyuki/Downloads/kinki-id-1/00000596.csv
/Users/fumiyuki/Downloads/kinki-id-1/00000597.csv
/Users/fumiyuki/Downloads/kinki-id-1/00000598.csv
/Users/fumiyuki/Downloads/kinki-id-1/00000599.csv
/Users/fumiyuki/Downloads/kinki-id-1/00000600.csv
/Users/fumiyuki/Downloads/kinki-id-1/00000601.csv
/Users/fumiyuki/Downloads/kinki-id-1/00000602.csv
/Users/fumiyuki/Downloads/kinki-id-1/00000603.csv
/Users/fumiyuki/Downloads/kinki-id-1/00000604.csv
/Users/fumiyuki/Downloads/kinki-id-1/00000605.csv
/Users/fumiyuki/Downloads/kinki-id-1/00000606.csv
/Users/fumiyuki/Downloads/kinki-id-1/00000607.csv
/Users/fumiyuki/Downloads/kinki-id-1/00000608.csv
/Users/fumiyuki/Downloads/kinki-id-1/00000609.csv
/Users/fumiyuki/Downloads/kinki-id-1/00000610.csv
/Users/fumiyuki/Downloads/kinki-id-1/00000611.csv


HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))

/Users/fumiyuki/Downloads/kinki-id-1/00001717.csv



HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))

/Users/fumiyuki/Downloads/kinki-id-1/00002092.csv



HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))

/Users/fumiyuki/Downloads/kinki-id-1/00002698.csv
/Users/fumiyuki/Downloads/kinki-id-1/00002699.csv
/Users/fumiyuki/Downloads/kinki-id-1/00002700.csv
/Users/fumiyuki/Downloads/kinki-id-1/00002701.csv
/Users/fumiyuki/Downloads/kinki-id-1/00002702.csv
/Users/fumiyuki/Downloads/kinki-id-1/00002703.csv
/Users/fumiyuki/Downloads/kinki-id-1/00002704.csv
/Users/fumiyuki/Downloads/kinki-id-1/00002705.csv
/Users/fumiyuki/Downloads/kinki-id-1/00002706.csv
/Users/fumiyuki/Downloads/kinki-id-1/00002707.csv
/Users/fumiyuki/Downloads/kinki-id-1/00002708.csv
/Users/fumiyuki/Downloads/kinki-id-1/00002709.csv
/Users/fumiyuki/Downloads/kinki-id-1/00002710.csv
/Users/fumiyuki/Downloads/kinki-id-1/00002711.csv



HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))

/Users/fumiyuki/Downloads/kinki-id-1/00003670.csv
/Users/fumiyuki/Downloads/kinki-id-1/00003673.csv
/Users/fumiyuki/Downloads/kinki-id-1/00003675.csv
/Users/fumiyuki/Downloads/kinki-id-1/00003678.csv
/Users/fumiyuki/Downloads/kinki-id-1/00003683.csv
/Users/fumiyuki/Downloads/kinki-id-1/00003693.csv
/Users/fumiyuki/Downloads/kinki-id-1/00003746.csv
/Users/fumiyuki/Downloads/kinki-id-1/00003748.csv
/Users/fumiyuki/Downloads/kinki-id-1/00003749.csv
/Users/fumiyuki/Downloads/kinki-id-1/00003750.csv
/Users/fumiyuki/Downloads/kinki-id-1/00003751.csv
/Users/fumiyuki/Downloads/kinki-id-1/00003752.csv
/Users/fumiyuki/Downloads/kinki-id-1/00003753.csv
/Users/fumiyuki/Downloads/kinki-id-1/00003754.csv



HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))

/Users/fumiyuki/Downloads/kinki-id-1/00004209.csv



HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))

/Users/fumiyuki/Downloads/kinki-id-1/00005003.csv



HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))

/Users/fumiyuki/Downloads/kinki-id-1/00005323.csv
/Users/fumiyuki/Downloads/kinki-id-1/00005326.csv
/Users/fumiyuki/Downloads/kinki-id-1/00005327.csv
/Users/fumiyuki/Downloads/kinki-id-1/00005328.csv



HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))

/Users/fumiyuki/Downloads/kinki-id-1/00005734.csv



HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))

/Users/fumiyuki/Downloads/kinki-id-1/00005823.csv
/Users/fumiyuki/Downloads/kinki-id-1/00005824.csv
/Users/fumiyuki/Downloads/kinki-id-1/00005825.csv
/Users/fumiyuki/Downloads/kinki-id-1/00005826.csv
/Users/fumiyuki/Downloads/kinki-id-1/00005827.csv
/Users/fumiyuki/Downloads/kinki-id-1/00005828.csv
/Users/fumiyuki/Downloads/kinki-id-1/00005829.csv
/Users/fumiyuki/Downloads/kinki-id-1/00005830.csv
/Users/fumiyuki/Downloads/kinki-id-1/00005831.csv
/Users/fumiyuki/Downloads/kinki-id-1/00005832.csv
/Users/fumiyuki/Downloads/kinki-id-1/00005833.csv
/Users/fumiyuki/Downloads/kinki-id-1/00005834.csv
/Users/fumiyuki/Downloads/kinki-id-1/00005835.csv
/Users/fumiyuki/Downloads/kinki-id-1/00005836.csv
/Users/fumiyuki/Downloads/kinki-id-1/00005837.csv
/Users/fumiyuki/Downloads/kinki-id-1/00005838.csv
/Users/fumiyuki/Downloads/kinki-id-1/00005839.csv
/Users/fumiyuki/Downloads/kinki-id-1/00005840.csv
/Users/fumiyuki/Downloads/kinki-id-1/00005841.csv
/Users/fumiyuki/Downloads/kinki-id-1/00005842.csv


HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))

/Users/fumiyuki/Downloads/kinki-id-1/00006130.csv
/Users/fumiyuki/Downloads/kinki-id-1/00006132.csv
/Users/fumiyuki/Downloads/kinki-id-1/00006134.csv



HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))

/Users/fumiyuki/Downloads/kinki-id-1/00007970.csv



HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))

/Users/fumiyuki/Downloads/kinki-id-1/00008309.csv
/Users/fumiyuki/Downloads/kinki-id-1/00008325.csv
/Users/fumiyuki/Downloads/kinki-id-1/00008332.csv
/Users/fumiyuki/Downloads/kinki-id-1/00008334.csv
/Users/fumiyuki/Downloads/kinki-id-1/00008335.csv
/Users/fumiyuki/Downloads/kinki-id-1/00008336.csv
/Users/fumiyuki/Downloads/kinki-id-1/00008337.csv



HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))

/Users/fumiyuki/Downloads/kinki-id-1/00008791.csv



HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))

/Users/fumiyuki/Downloads/kinki-id-1/00009288.csv



HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))

/Users/fumiyuki/Downloads/kinki-id-1/00009297.csv
/Users/fumiyuki/Downloads/kinki-id-1/00009429.csv



HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))

/Users/fumiyuki/Downloads/kinki-id-2/00100677.csv



HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))

/Users/fumiyuki/Downloads/kinki-id-2/00100924.csv
/Users/fumiyuki/Downloads/kinki-id-2/00100926.csv
/Users/fumiyuki/Downloads/kinki-id-2/00100978.csv
/Users/fumiyuki/Downloads/kinki-id-2/00100980.csv



HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))

/Users/fumiyuki/Downloads/kinki-id-2/00102491.csv
/Users/fumiyuki/Downloads/kinki-id-2/00102492.csv
/Users/fumiyuki/Downloads/kinki-id-2/00102493.csv
/Users/fumiyuki/Downloads/kinki-id-2/00102494.csv
/Users/fumiyuki/Downloads/kinki-id-2/00102495.csv
/Users/fumiyuki/Downloads/kinki-id-2/00102496.csv
/Users/fumiyuki/Downloads/kinki-id-2/00102497.csv
/Users/fumiyuki/Downloads/kinki-id-2/00102498.csv
/Users/fumiyuki/Downloads/kinki-id-2/00102499.csv



HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))

/Users/fumiyuki/Downloads/kinki-id-2/00102500.csv
/Users/fumiyuki/Downloads/kinki-id-2/00102501.csv
/Users/fumiyuki/Downloads/kinki-id-2/00102502.csv
/Users/fumiyuki/Downloads/kinki-id-2/00102503.csv
/Users/fumiyuki/Downloads/kinki-id-2/00102504.csv



HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))

/Users/fumiyuki/Downloads/kinki-id-2/00103082.csv



HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))

/Users/fumiyuki/Downloads/kinki-id-2/00103260.csv
/Users/fumiyuki/Downloads/kinki-id-2/00103286.csv



HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))

/Users/fumiyuki/Downloads/kinki-id-2/00103794.csv
/Users/fumiyuki/Downloads/kinki-id-2/00103795.csv
/Users/fumiyuki/Downloads/kinki-id-2/00103796.csv
/Users/fumiyuki/Downloads/kinki-id-2/00103797.csv
/Users/fumiyuki/Downloads/kinki-id-2/00103798.csv
/Users/fumiyuki/Downloads/kinki-id-2/00103799.csv
/Users/fumiyuki/Downloads/kinki-id-2/00103800.csv
/Users/fumiyuki/Downloads/kinki-id-2/00103801.csv
/Users/fumiyuki/Downloads/kinki-id-2/00103802.csv
/Users/fumiyuki/Downloads/kinki-id-2/00103803.csv
/Users/fumiyuki/Downloads/kinki-id-2/00103804.csv
/Users/fumiyuki/Downloads/kinki-id-2/00103805.csv
/Users/fumiyuki/Downloads/kinki-id-2/00103806.csv
/Users/fumiyuki/Downloads/kinki-id-2/00103807.csv
/Users/fumiyuki/Downloads/kinki-id-2/00103809.csv
/Users/fumiyuki/Downloads/kinki-id-2/00103810.csv
/Users/fumiyuki/Downloads/kinki-id-2/00103811.csv
/Users/fumiyuki/Downloads/kinki-id-2/00103812.csv
/Users/fumiyuki/Downloads/kinki-id-2/00103813.csv
/Users/fumiyuki/Downloads/kinki-id-2/00103814.csv


HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))

/Users/fumiyuki/Downloads/kinki-id-2/00104398.csv
/Users/fumiyuki/Downloads/kinki-id-2/00104401.csv



HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))

/Users/fumiyuki/Downloads/kinki-id-2/00104517.csv
/Users/fumiyuki/Downloads/kinki-id-2/00104518.csv
/Users/fumiyuki/Downloads/kinki-id-2/00104519.csv
/Users/fumiyuki/Downloads/kinki-id-2/00104520.csv
/Users/fumiyuki/Downloads/kinki-id-2/00104521.csv
/Users/fumiyuki/Downloads/kinki-id-2/00104522.csv
/Users/fumiyuki/Downloads/kinki-id-2/00104523.csv
/Users/fumiyuki/Downloads/kinki-id-2/00104524.csv
/Users/fumiyuki/Downloads/kinki-id-2/00104525.csv
/Users/fumiyuki/Downloads/kinki-id-2/00104526.csv
/Users/fumiyuki/Downloads/kinki-id-2/00104527.csv
/Users/fumiyuki/Downloads/kinki-id-2/00104528.csv
/Users/fumiyuki/Downloads/kinki-id-2/00104529.csv
/Users/fumiyuki/Downloads/kinki-id-2/00104530.csv
/Users/fumiyuki/Downloads/kinki-id-2/00104531.csv
/Users/fumiyuki/Downloads/kinki-id-2/00104532.csv
/Users/fumiyuki/Downloads/kinki-id-2/00104533.csv
/Users/fumiyuki/Downloads/kinki-id-2/00104534.csv
/Users/fumiyuki/Downloads/kinki-id-2/00104535.csv
/Users/fumiyuki/Downloads/kinki-id-2/00104536.csv


HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))

/Users/fumiyuki/Downloads/kinki-id-2/00105076.csv



HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))

/Users/fumiyuki/Downloads/kinki-id-2/00105407.csv
/Users/fumiyuki/Downloads/kinki-id-2/00105409.csv
/Users/fumiyuki/Downloads/kinki-id-2/00105410.csv



HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))

/Users/fumiyuki/Downloads/kinki-id-2/00106239.csv



HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))

/Users/fumiyuki/Downloads/kinki-id-2/00106381.csv
/Users/fumiyuki/Downloads/kinki-id-2/00106403.csv



HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))

/Users/fumiyuki/Downloads/kinki-id-2/00106737.csv



HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))

/Users/fumiyuki/Downloads/kinki-id-2/00106835.csv



HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))

/Users/fumiyuki/Downloads/kinki-id-2/00107395.csv



HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))

/Users/fumiyuki/Downloads/kinki-id-2/00108908.csv



HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))

/Users/fumiyuki/Downloads/kinki-id-2/00109124.csv



HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))

/Users/fumiyuki/Downloads/kinki-id-2/00109322.csv
/Users/fumiyuki/Downloads/kinki-id-2/00109334.csv



HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))




In [85]:
import pickle

with open('id_data_list.pickle', 'wb') as f:
    pickle.dump(id_data_list, f, pickle.HIGHEST_PROTOCOL)

# with open('id_data_list.pickle', 'rb') as f:
#     data_list = pickle.load(f)

In [86]:
def encode_all_data(data_list, client_limit):
    current_id = 0
    
    json_data = cl.OrderedDict()
    same_data = []
    total_data_list = []
    for i, id_data in tqdm(enumerate(data_list)):
        assert(len(id_data) == 1440)
        encoded_list = []
        for index, row in id_data.iterrows():
            encoded_value = encode(row["time"], row["lat"], row["long"]).encode()
            encoded_list.append(encoded_value)
        value = { "geodata": b''.join(encoded_list).hex(), "query_size": len(id_data), "query_id": current_id }
        total_data_list.append(value)
        current_id += 1
        if current_id == client_limit:
            break
    
    json_data["data"] = total_data_list
    json_data["client_size"] = current_id
    print("client size", current_id)
    
    now_timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
    filename = './data/geohash10-client-%s-real-for-1minute-%s.json' % (str(client_limit), now_timestamp)
    with open(filename, 'w') as f:
        json.dump(json_data, f, indent=None)

In [94]:
encode_all_data(id_data_list[10000:], 1000)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for i, id_data in tqdm(enumerate(data_list)):


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


client size 1000


19985

In [None]:
# テスト

In [None]:
trajectory_data = pd.read_csv('./data/output-for-1minute.csv', header=None)
trajectory_data = trajectory_data.query('index % 40 == 0')
trajectory_data = trajectory_data.drop_duplicates()

In [None]:
time_tokyo = pd.read_csv(f"/Users/fumiyuki/Downloads/time-tokyo-1/08TKY_time_0000.csv", header=None)
time_tokyo = time_tokyo.iloc[:, 3:6]
time_tokyo.columns = ["time", "long", "lat"]
geohash.encode(time_tokyo["lat"][0], time_tokyo["long"][0] , 9)

In [72]:
time_tokyo = pd.read_csv(f"/Users/fumiyuki/Downloads/tokyo-id-2/data/00020042.csv", header=None)

In [73]:
time_tokyo = time_tokyo.iloc[:, [3,4,5]]
time_tokyo.columns = ["time", "long", "lat"]
time_tokyo = time_tokyo.drop_duplicates(subset=["time"])

In [38]:
pd.set_option('display.max_rows', 200)

In [54]:
time_tokyo.iloc[0]

time    2010-10-01 00:00:00
long                135.503
lat                  34.799
Name: 0, dtype: object

In [65]:
time_tokyo = time_tokyo.append(time_tokyo.iloc[:0])