In [1]:
import pandas as pd
import numpy as np
import json
from collections import namedtuple
import pickle

In [2]:
df = pd.read_csv('../data/one_travel_chain.csv')
# b_m = df[df['date']<df['migrt']]
# a_m = df[df['date']>df['migrt']]
# b_m.to_csv("../data/one_travel_chain_before.csv",index=False)
# a_m.to_csv("../data/one_travel_chain_after.csv",index=False)

In [3]:
unique_grid_ids = pd.unique(df[['grid_id_o', 'grid_id_d']].values.ravel('K'))

# using factorize function to apply new unique id
new_codes, unique = pd.factorize(unique_grid_ids, sort=True)
grid_id_mapping = dict(zip(unique, new_codes))

df['grid_new_o'] = df['grid_id_o'].map(grid_id_mapping)
df['grid_new_d'] = df['grid_id_d'].map(grid_id_mapping)
df.head(2)

Unnamed: 0,who,date,seiqd,mode,lambda_o,phi_o,lambda_d,phi_d,migrt,weekend,...,LU_Special,LU_Transportation,LU_Wild,trip_routelen,stime,etime,grid_id_o,grid_id_d,grid_new_o,grid_new_d
0,36384703,20190102,1,1.0,114.03383,22.621549,114.064399,22.554032,20190706,False,...,0.0,0.0,0.002857,8885.289,2019/1/2 8:33,2019/1/2 8:41,27962.0,17265.0,2,329
1,36384703,20190102,2,1.0,114.064399,22.554032,114.041782,22.544709,20190706,False,...,0.0,0.0,0.02738,3314.763,2019/1/2 9:55,2019/1/2 9:58,17265.0,15470.0,329,325


In [4]:
id_fnid_mapping = dict(zip(new_codes,unique))
with open("../data/id_fnid_mapping.pkl", "wb") as f:
    pickle.dump(id_fnid_mapping, f)

In [5]:
def build_chain(group):
    chains = []
    current_chain = [group['grid_id_o'].iloc[0]]
    
    for i in range(len(group) - 1):
        current_chain.append(group['grid_id_d'].iloc[i])
        # Check if the next 'o' is different from the current 'd'
        if group['grid_id_d'].iloc[i] != group['grid_id_o'].iloc[i + 1]:
            chains.append(current_chain)
            current_chain = [group['grid_id_o'].iloc[i + 1]]
    
    # Append the last destination and the final chain
    current_chain.append(group['grid_id_d'].iloc[-1])
    chains.append(current_chain)
    
    return chains

def build_id_chain(group):
    chains = []
    current_chain = [group['grid_new_o'].iloc[0]]
    
    for i in range(len(group) - 1):
        current_chain.append(group['grid_new_d'].iloc[i])
        # Check if the next 'o' is different from the current 'd'
        if group['grid_new_d'].iloc[i] != group['grid_new_o'].iloc[i + 1]:
            chains.append(current_chain)
            current_chain = [group['grid_new_o'].iloc[i + 1]]
    
    # Append the last destination and the final chain
    current_chain.append(group['grid_new_d'].iloc[-1])
    chains.append(current_chain)
    
    return chains

def build_both_chains(group):
    travel_chain = build_chain(group)
    id_chain = build_id_chain(group)
    return pd.Series({'travel_chain': travel_chain, 'id_chain': id_chain})

In [6]:
# Group by 'date' and apply the chain-building function
grouped1 = df[df['date']<df['migrt']]
grouped2 = df[df['date']>df['migrt']]
grouped1 = grouped1.groupby('date').apply(build_both_chains).reset_index()
grouped2 = grouped2.groupby('date').apply(build_both_chains).reset_index()

In [7]:
# Define the namedtuple type
TravelData = namedtuple('TravelChain', ['date', 'travel_chain','id_chain'])

In [8]:
# Convert each row to a namedtuple
namedtuples_list1 = [TravelData(row.date, row.travel_chain[0], row.id_chain[0]) for _, row in grouped1.iterrows()]
namedtuples_list2 = [TravelData(row.date, row.travel_chain[0], row.id_chain[0]) for _, row in grouped2.iterrows()]
namedtuples_all = namedtuples_list1+namedtuples_list2

In [9]:
def int64_converter(obj):
    if isinstance(obj, np.int64):
        return int(obj)
    raise TypeError

dicts_list1 = [nt._asdict() for nt in namedtuples_list1]
with open('../data/before_migrt.json', 'w') as file:
    json.dump(dicts_list1, file, indent=4,default=int64_converter)

dicts_list2 = [nt._asdict() for nt in namedtuples_list2]
with open('../data/after_migrt.json', 'w') as file:
    json.dump(dicts_list2, file, indent=4,default=int64_converter)

dicts_list_all = [nt._asdict() for nt in namedtuples_all]
with open('../data/all_traj.json', 'w') as file:
    json.dump(dicts_list_all, file, indent=4,default=int64_converter)

In [10]:
with open('../data/all_traj.json', 'r') as file:
    loaded_dicts_all = json.load(file)
loaded_namedtuples_all = [TravelData(**d) for d in loaded_dicts_all]

In [11]:
loaded_namedtuples_all

[TravelChain(date=20190102, travel_chain=[27962.0, 17265.0, 15470.0, 27962.0], id_chain=[2, 329, 325, 2]),
 TravelChain(date=20190103, travel_chain=[27962.0, 17265.0, 15470.0, 27962.0], id_chain=[2, 329, 325, 2]),
 TravelChain(date=20190104, travel_chain=[27962.0, 15470.0, 27962.0, 22184.0, 19680.0], id_chain=[2, 325, 2, 154, 194]),
 TravelChain(date=20190105, travel_chain=[19680.0, 21884.0, 27962.0, 19769.0, 17994.0, 27962.0], id_chain=[194, 98, 2, 0, 221, 2]),
 TravelChain(date=20190107, travel_chain=[27962.0, 15470.0, 27962.0, 21502.0, 27962.0], id_chain=[2, 325, 2, 121, 2]),
 TravelChain(date=20190108, travel_chain=[27962.0, 15470.0, 27962.0, 21502.0], id_chain=[2, 325, 2, 121]),
 TravelChain(date=20190109, travel_chain=[21502.0, 27962.0, 17265.0, 27962.0], id_chain=[121, 2, 329, 2]),
 TravelChain(date=20190110, travel_chain=[27962.0, 15470.0, 21544.0, 27962.0], id_chain=[2, 325, 47, 2]),
 TravelChain(date=20190111, travel_chain=[27962.0, 15470.0, 27962.0], id_chain=[2, 325, 2]),
 