In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
from sklearn.preprocessing import MinMaxScaler
import warnings 
from tqdm import tqdm
import os
warnings.filterwarnings('ignore')

In [2]:
grid_be = gpd.read_file('./data_pe/grids_with_BE.geojson')

In [3]:
selectedColumns = ['fnid','pre_home_distance', 'post_home_distance', 'LU_Business',
                        'LU_City_Road', 'LU_Consumption', 'LU_Culture', 'LU_Industry',
                        'LU_Medical', 'LU_Park_&_Scenery', 'LU_Public', 'LU_Residence',
                        'LU_Science_&_Education', 'LU_Special', 'LU_Transportation', 'LU_Wild']
be_data = grid_be[selectedColumns]

In [4]:
grid_coords = grid_be.geometry.centroid
be_data['x'] = grid_coords.x
be_data['y'] = grid_coords.y

In [5]:
after_be_data = be_data.drop(columns=['pre_home_distance']).rename(columns={'post_home_distance': 'home_distance'})
before_be_data = be_data.drop(columns=['post_home_distance']).rename(columns={'pre_home_distance': 'home_distance'})

# load model

In [6]:
import SCBIRL_Global_PE.SCBIRLTransformer as SIRLT
import SCBIRL_Global_PE.utils as SIRLU
import SCBIRL_Global_PE.migrationProcess as SIRLP

data_dir = './data/'
before_migration_path = data_dir + 'before_migrt.json'
full_trajectory_path = data_dir + 'all_traj.json'

inputs, targets_action, pe_code, action_dim, state_dim = SIRLU.loadTrajChain(before_migration_path, full_trajectory_path)
model = SIRLT.avril(inputs, targets_action, pe_code, state_dim, action_dim, state_only=True)

In [7]:
def getComputeFunction(model, attribute_type):
    """Return the appropriate function to compute either 'value' or 'reward'."""
    if attribute_type == 'value':
        return lambda state,grid_code: np.max(model.QValue(state,grid_code))
    elif attribute_type == 'reward':
        return lambda state,grid_code: model.reward(state,grid_code)[0][0][0]
    else:
        raise ValueError("attribute_type should be either 'value' or 'reward'.")
    
def globalPE(coords,dimension):
    x = coords[0]
    y = coords[1]
    pe = [0]*dimension
    for k in range(1,dimension+1):
        w = (100**(k/dimension))
        pe[k-1] = np.sin(x*w) + np.sin(y*w)
    return np.array(pe)

# Calculate reward for all grids

In [None]:
model.loadParams('./model/params_transformer_pe.pickle')
rewardFunction = getComputeFunction(model, 'reward')
def calculate_reward(row):
    coords = (row.x, row.y)
    state = row[1:15]
    pe_code = globalPE(coords, len(state))
    pe_code = np.expand_dims(np.expand_dims(np.expand_dims(pe_code, axis=0), axis=0), axis=0)
    state = np.expand_dims(np.expand_dims(np.expand_dims(state, axis=0), axis=0), axis=0)
    return float(rewardFunction(state, pe_code))
before_be_data['reward'] = before_be_data.apply(calculate_reward, axis=1)
before_be_data.to_csv('./data_pe/all_grid_before.csv')

In [8]:
model_dir = './data_pe/after_migrt/model_random/'
save_dir = './data_pe/after_migrt/all_grid/'
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

pickle_files = [file for file in os.listdir(model_dir) if file.endswith('.pickle')]
for pickle_file in tqdm(pickle_files):
    temp_after_be_data = after_be_data.copy()
    file_path = os.path.join(model_dir,pickle_file)
 
    model.loadParams(file_path)
    rewardFunction = lambda state,grid_code: model.reward(state,grid_code)[0][0][0]
    rewards = []

    for index, row in temp_after_be_data.iterrows():
        coords = (row.x, row.y)
        state = row[1:15] 
        pe_code = globalPE(coords, len(state))
        pe_code = np.expand_dims(np.expand_dims(np.expand_dims(pe_code, axis=0), axis=0), axis=0)
        state = np.expand_dims(np.expand_dims(np.expand_dims(state, axis=0), axis=0), axis=0)
        reward = float(rewardFunction(state, pe_code))
        rewards.append(reward)
    temp_after_be_data['reward'] = rewards
    
    # Save the updated DataFrame to a CSV file in the save_dir
    csv_file_name = pickle_file.replace('.pickle', '.csv')
    csv_file_path = os.path.join(save_dir, csv_file_name)
    temp_after_be_data.to_csv(csv_file_path, index=False)

  0%|          | 0/9 [00:00<?, ?it/s]

load params from ./data_pe/after_migrt/model_random/20190901.pickle!


 11%|█         | 1/9 [06:33<52:30, 393.80s/it]

load params from ./data_pe/after_migrt/model_random/20190915.pickle!


 22%|██▏       | 2/9 [13:17<46:39, 399.90s/it]

load params from ./data_pe/after_migrt/model_random/20191001.pickle!


 33%|███▎      | 3/9 [19:50<39:40, 396.72s/it]

load params from ./data_pe/after_migrt/model_random/20191015.pickle!


 44%|████▍     | 4/9 [26:47<33:43, 404.70s/it]

load params from ./data_pe/after_migrt/model_random/20191101.pickle!


 56%|█████▌    | 5/9 [33:45<27:17, 409.38s/it]

load params from ./data_pe/after_migrt/model_random/20191115.pickle!


 67%|██████▋   | 6/9 [40:28<20:22, 407.34s/it]

load params from ./data_pe/after_migrt/model_random/20191201.pickle!


 78%|███████▊  | 7/9 [47:11<13:31, 405.63s/it]

load params from ./data_pe/after_migrt/model_random/20191214.pickle!


 89%|████████▉ | 8/9 [53:57<06:45, 405.82s/it]

load params from ./data_pe/after_migrt/model_random/20191231.pickle!


100%|██████████| 9/9 [1:00:24<00:00, 402.73s/it]


## parallel

In [None]:
from concurrent.futures import ProcessPoolExecutor
from tqdm import tqdm
import os
import pandas as pd

def process_file(pickle_file, model_dir, save_dir, after_be_data, globalPE, getComputeFunction):
    temp_after_be_data = after_be_data.copy()
    file_path = os.path.join(model_dir, pickle_file)
    model.loadParams(file_path)
    rewardFunction = getComputeFunction(model, 'reward')
    
    def calculate_reward(row):
        coords = (row.x, row.y)
        state = row[1:15]
        pe_code = globalPE(coords, len(state))
        pe_code = np.expand_dims(np.expand_dims(np.expand_dims(pe_code, axis=0), axis=0), axis=0)
        state = np.expand_dims(np.expand_dims(np.expand_dims(state, axis=0), axis=0), axis=0)
        return float(rewardFunction(state, pe_code))
    
    temp_after_be_data['reward'] = temp_after_be_data.apply(calculate_reward, axis=1)
    
    # Save the updated DataFrame to a CSV file in the save_dir
    csv_file_name = pickle_file.replace('.pickle', '.csv')
    csv_file_path = os.path.join(save_dir, csv_file_name)
    temp_after_be_data.to_csv(csv_file_path, index=False)

# Directories
model_dir = './data_pe/after_migrt/model/'
save_dir = './data_pe/after_migrt/all_grid/'

# Make sure the save directory exists
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

# List of pickle files
pickle_files = [file for file in os.listdir(model_dir) if file.endswith('.pickle')]

# Number of workers in the pool
num_workers = os.cpu_count()

# Processing files in parallel
with ProcessPoolExecutor(max_workers=num_workers) as executor:
    # Submit tasks
    futures = [executor.submit(process_file, pickle_file, model_dir, save_dir, after_be_data, globalPE, getComputeFunction)
               for pickle_file in pickle_files]
    
    # Progress bar for the futures
    for _ in tqdm(concurrent.futures.as_completed(futures), total=len(futures)):
        pass