In [1]:
import pandas as pd
import numpy as np
import json
from collections import namedtuple
import pickle

In [2]:
df = pd.read_csv('../data/one_travel_chain.csv')

In [3]:
df['origin'] = df.apply(lambda row: (row['lambda_o'], row['phi_o']), axis=1)
df['destination'] = df.apply(lambda row: (row['lambda_d'], row['phi_d']), axis=1)

unique_coords_ids = pd.unique(df[['origin', 'destination']].values.ravel('K'))

# using factorize function to apply new unique id
new_codes, unique = pd.factorize(unique_coords_ids, sort=True)
coords_id_mapping = dict(zip(unique, new_codes))

df['origin_id'] = df['origin'].map(coords_id_mapping)
df['destination_id'] = df['destination'].map(coords_id_mapping)

In [4]:
id_coords_mapping = dict(zip(new_codes,unique))
with open("../data/id_coords_mapping.pkl", "wb") as f:
    pickle.dump(id_coords_mapping, f)

In [5]:
def build_chain(group):
    chains = []
    current_chain = [group['origin'].iloc[0]]
    
    for i in range(len(group) - 1):
        current_chain.append(group['destination'].iloc[i])
        # Check if the next 'o' is different from the current 'd'
        if group['destination'].iloc[i] != group['origin'].iloc[i + 1]:
            chains.append(current_chain)
            current_chain = [group['origin'].iloc[i + 1]]
    
    # Append the last destination and the final chain
    current_chain.append(group['destination'].iloc[-1])
    chains.append(current_chain)
    
    return chains

def build_id_chain(group):
    chains = []
    current_chain = [group['origin_id'].iloc[0]]
    
    for i in range(len(group) - 1):
        current_chain.append(group['destination_id'].iloc[i])
        # Check if the next 'o' is different from the current 'd'
        if group['destination_id'].iloc[i] != group['origin_id'].iloc[i + 1]:
            chains.append(current_chain)
            current_chain = [group['origin_id'].iloc[i + 1]]
    
    # Append the last destination and the final chain
    current_chain.append(group['destination_id'].iloc[-1])
    chains.append(current_chain)
    
    return chains

def build_fnid_chain(group):
    chains = []
    current_chain = [group['grid_id_o'].iloc[0]]

    for i in range(len(group) - 1):
        current_chain.append(group['grid_id_d'].iloc[i])
        # Check if the next 'o' is different from the current 'd'
        if group['grid_id_d'].iloc[i] != group['grid_id_o'].iloc[i + 1]:
            chains.append(current_chain)
            current_chain = [group['grid_id_o'].iloc[i + 1]]
    
    # Append the last destination and the final chain
    current_chain.append(group['grid_id_d'].iloc[-1])
    chains.append(current_chain)
    
    return chains

def build_both_chains(group):
    travel_chain = build_chain(group)
    id_chain = build_id_chain(group)
    fnid_chain = build_fnid_chain(group)
    return pd.Series({'travel_chain': travel_chain, 'id_chain': id_chain, 'fnid_chain':fnid_chain})

In [6]:
# Group by 'date' and apply the chain-building function
grouped1 = df[df['date']<df['migrt']]
grouped2 = df[df['date']>df['migrt']]
grouped1 = grouped1.groupby('date').apply(build_both_chains).reset_index()
grouped2 = grouped2.groupby('date').apply(build_both_chains).reset_index()

In [7]:
# Define the namedtuple type
TravelData = namedtuple('TravelChain', ['date', 'travel_chain','id_chain','fnid_chain'])

In [8]:
# Convert each row to a namedtuple
namedtuples_list1 = [TravelData(row.date, row.travel_chain[0], row.id_chain[0], row.fnid_chain[0]) for _, row in grouped1.iterrows()]
namedtuples_list2 = [TravelData(row.date, row.travel_chain[0], row.id_chain[0], row.fnid_chain[0]) for _, row in grouped2.iterrows()]
namedtuples_all = namedtuples_list1+namedtuples_list2

In [9]:
def int64_converter(obj):
    if isinstance(obj, np.int64):
        return int(obj)
    raise TypeError

dicts_list1 = [nt._asdict() for nt in namedtuples_list1]
with open('../data/before_migrt.json', 'w') as file:
    json.dump(dicts_list1, file, indent=4,default=int64_converter)

dicts_list2 = [nt._asdict() for nt in namedtuples_list2]
with open('../data/after_migrt.json', 'w') as file:
    json.dump(dicts_list2, file, indent=4,default=int64_converter)

dicts_list_all = [nt._asdict() for nt in namedtuples_all]
with open('../data/all_traj.json', 'w') as file:
    json.dump(dicts_list_all, file, indent=4,default=int64_converter)

In [10]:
with open('../data/all_traj.json', 'r') as file:
    loaded_dicts_all = json.load(file)
loaded_namedtuples_all = [TravelData(**d) for d in loaded_dicts_all]

In [11]:
loaded_namedtuples_all

[TravelChain(date=20190102, travel_chain=[[114.0338301, 22.62154859], [114.0643988, 22.55403194], [114.041782, 22.54470908], [114.0338301, 22.62154859]], id_chain=[30, 352, 162, 30], fnid_chain=[27962.0, 17265.0, 15470.0, 27962.0]),
 TravelChain(date=20190103, travel_chain=[[114.0338301, 22.62154859], [114.0643988, 22.55403194], [114.041782, 22.54470908], [114.0338301, 22.62154859]], id_chain=[30, 352, 162, 30], fnid_chain=[27962.0, 17265.0, 15470.0, 27962.0]),
 TravelChain(date=20190104, travel_chain=[[114.0338301, 22.62154859], [114.041782, 22.54470908], [114.0338301, 22.62154859], [113.8719434, 22.58630867], [113.8594877, 22.57118435]], id_chain=[30, 162, 30, 161, 241], fnid_chain=[27962.0, 15470.0, 27962.0, 22184.0, 19680.0]),
 TravelChain(date=20190105, travel_chain=[[113.8594877, 22.57118435], [114.0107444, 22.58362918], [114.0338301, 22.62154859], [114.0779134, 22.56979328], [114.102444, 22.55857648], [114.0338301, 22.62154859]], id_chain=[241, 107, 30, 225, 87, 30], fnid_chain=