In [1]:
import pandas as pd
import numpy as np
import json
from collections import namedtuple
import pickle

In [2]:
df = pd.read_csv('../data/one_travel_chain.csv')

unique_grid_ids = pd.unique(df[['grid_id_o', 'grid_id_d']].values.ravel('K'))

new_codes, unique = pd.factorize(unique_grid_ids, sort=True)

grid_id_mapping = dict(zip(unique, new_codes))

df['grid_new_o'] = df['grid_id_o'].map(grid_id_mapping)
df['grid_new_d'] = df['grid_id_d'].map(grid_id_mapping)
df.head()

Unnamed: 0,who,date,seiqd,mode,lambda_o,phi_o,lambda_d,phi_d,migrt,weekend,...,LU_Residence,LU_Science_&_Education,LU_Special,LU_Transportation,LU_Wild,trip_routelen,grid_id_o,grid_id_d,grid_new_o,grid_new_d
0,36384703,20190102,1,1,114.03383,22.621549,114.064399,22.554032,20190706,False,...,1.9663,0.77702,0.0,0.0,0.002857,8885.289,27962.0,17265,12,319
1,36384703,20190102,2,1,114.064399,22.554032,114.041782,22.544709,20190706,False,...,0.874633,0.087018,0.0,0.0,0.02738,3314.763,17265.0,15470,319,285
2,36384703,20190102,3,1,114.041782,22.544709,114.03383,22.621549,20190706,False,...,0.643236,0.326028,0.0,0.241541,0.237051,11424.598,15470.0,27962,285,12
3,36384703,20190103,1,1,114.03383,22.621549,114.064399,22.554032,20190706,False,...,1.9663,0.77702,0.0,0.0,0.002857,8885.289,27962.0,17265,12,319
4,36384703,20190103,2,1,114.064399,22.554032,114.041782,22.544709,20190706,False,...,0.874633,0.087018,0.0,0.0,0.02738,3314.763,17265.0,15470,319,285


In [3]:
id_fnid_mapping = dict(zip(new_codes,unique))
with open("../data/id_fnid_mapping.pkl", "wb") as f:
    pickle.dump(id_fnid_mapping, f)

In [24]:
def build_chain(group):
    chains = []
    current_chain = [group['grid_id_o'].iloc[0]]
    
    for i in range(len(group) - 1):
        current_chain.append(group['grid_id_d'].iloc[i])
        # Check if the next 'o' is different from the current 'd'
        if group['grid_id_d'].iloc[i] != group['grid_id_o'].iloc[i + 1]:
            chains.append(current_chain)
            current_chain = [group['grid_id_o'].iloc[i + 1]]
    
    # Append the last destination and the final chain
    current_chain.append(group['grid_id_d'].iloc[-1])
    chains.append(current_chain)
    
    return chains

def build_id_chain(group):
    chains = []
    current_chain = [group['grid_new_o'].iloc[0]]
    
    for i in range(len(group) - 1):
        current_chain.append(group['grid_new_d'].iloc[i])
        # Check if the next 'o' is different from the current 'd'
        if group['grid_new_d'].iloc[i] != group['grid_new_o'].iloc[i + 1]:
            chains.append(current_chain)
            current_chain = [group['grid_new_o'].iloc[i + 1]]
    
    # Append the last destination and the final chain
    current_chain.append(group['grid_new_d'].iloc[-1])
    chains.append(current_chain)
    
    return chains

def buildCost(group):
    cost = []
    current_cost = [group['trip_routelen'].iloc[0]]

    for i in range(len(group)-1):
        current_cost.append(group['trip_routelen'].iloc[i])
        if group['grid_new_d'].iloc[i] != group['grid_new_o'].iloc[i + 1]:
            cost.append(0)
            current_cost = [group['trip_routelen'].iloc[i + 1]]

    current_cost.append(0)
    cost.append(current_cost)
    return cost

def build_both_chains(group):
    travel_chain = build_chain(group)
    id_chain = build_id_chain(group)
    cost = buildCost(group)
    return pd.Series({'travel_chain': travel_chain, 'id_chain': id_chain, 'cost': cost})

In [25]:
# Group by 'date' and apply the chain-building function
grouped1 = df[df['date']<df['migrt']]
grouped2 = df[df['date']>df['migrt']]
grouped1 = grouped1.groupby('date').apply(build_both_chains).reset_index()
grouped2 = grouped2.groupby('date').apply(build_both_chains).reset_index()

In [27]:
# Define the namedtuple type
TravelData = namedtuple('TravelChain', ['date', 'travel_chain','id_chain','cost'])

In [28]:
# Convert each row to a namedtuple
namedtuples_list1 = [TravelData(row.date, row.travel_chain[0], row.id_chain[0],row.cost[0]) for _, row in grouped1.iterrows()]
namedtuples_list2 = [TravelData(row.date, row.travel_chain[0], row.id_chain[0],row.cost[0]) for _, row in grouped2.iterrows()]
namedtuples_all = namedtuples_list1+namedtuples_list2

In [29]:
def int64_converter(obj):
    if isinstance(obj, np.int64):
        return int(obj)
    raise TypeError

dicts_list1 = [nt._asdict() for nt in namedtuples_list1]
with open('../data/before_migrt.json', 'w') as file:
    json.dump(dicts_list1, file, indent=4,default=int64_converter)

dicts_list2 = [nt._asdict() for nt in namedtuples_list2]
with open('../data/after_migrt.json', 'w') as file:
    json.dump(dicts_list2, file, indent=4,default=int64_converter)

dicts_list_all = [nt._asdict() for nt in namedtuples_all]
with open('../data/all_traj.json', 'w') as file:
    json.dump(dicts_list_all, file, indent=4,default=int64_converter)

In [30]:
with open('../data/all_traj.json', 'r') as file:
    loaded_dicts_all = json.load(file)
loaded_namedtuples_all = [TravelData(**d) for d in loaded_dicts_all]

In [31]:
loaded_namedtuples_all

[TravelChain(date=20190102, travel_chain=[27962.0, 17265, 15470, 27962], id_chain=[12, 319, 285, 12], cost=[8885.289, 8885.289, 3314.7629999999995, 0]),
 TravelChain(date=20190103, travel_chain=[27962.0, 17265, 15470, 27962], id_chain=[12, 319, 285, 12], cost=[8885.289, 8885.289, 3314.7629999999995, 0]),
 TravelChain(date=20190104, travel_chain=[27962.0, 15470, 27962, 22184, 19680], id_chain=[12, 285, 12, 95, 173], cost=[11424.597999999996, 11424.597999999996, 11424.598000000002, 20163.194000000003, 0]),
 TravelChain(date=20190105, travel_chain=[19680.0, 21884, 27962, 19769, 17994, 27962], id_chain=[173, 160, 12, 225, 188, 12], cost=[18589.026, 18589.026, 8889.861999999997, 10341.902999999998, 3775.105, 0]),
 TravelChain(date=20190107, travel_chain=[27962.0, 15470, 27962, 21502, 27962], id_chain=[12, 285, 12, 126, 12], cost=[11424.597999999996, 11424.597999999996, 11424.598000000002, 11056.067000000005, 0]),
 TravelChain(date=20190108, travel_chain=[27962.0, 15470, 27962, 21502], id_ch