# Accessibility results

In [1]:
%load_ext autoreload
%autoreload 2
%cd D:\netmob25

D:\netmob25


In [2]:
import pandas as pd
from tqdm import tqdm
from geopy.distance import geodesic

## 1. Load activity data

In [3]:
df_activity = pd.read_parquet("dbs/data_p/stays_extraction_all.parquet")
# Extract end coordinates and hour
df_activity = df_activity[['end_lon', 'end_lat', 'end_time', 'day_type', 'dow', 'ID', 'purpose_d', 'weight_day', 'trip_id']].copy()
df_activity = df_activity.rename(columns={'end_lon': 'lon', 'end_lat': 'lat', 'end_time': 'time'})
df_activity['hour'] = pd.to_datetime(df_activity['time']).dt.hour
print(f"Number of types: {df_activity['purpose_d'].unique()} in {len(df_activity)} activities.")
df_activity.head()

Number of types: ['OTHER' 'RETURN_HOME' 'ACCOM' 'PURCHASE' 'WORK' 'LEISURE' 'BUSINESS'
 'HEALTH' 'STUDIES'] in 69706 activities.


Unnamed: 0,lon,lat,time,day_type,dow,ID,purpose_d,weight_day,trip_id,hour
0,2.14452,48.866024,2023-03-15 16:54:59,Normal,wednesday,10_2978,OTHER,235.905683,2023-03-15_1,16
1,2.141281,48.866651,2023-03-15 17:10:59,Normal,wednesday,10_2978,RETURN_HOME,235.905683,2023-03-15_2,17
2,2.126685,48.819422,2023-03-15 17:51:00,Normal,wednesday,10_2978,ACCOM,235.905683,2023-03-15_3,17
3,2.139098,48.865031,2023-03-15 18:26:00,Normal,wednesday,10_2978,RETURN_HOME,235.905683,2023-03-15_4,18
4,2.14452,48.866024,2023-03-16 08:17:00,Normal,thursday,10_2978,OTHER,218.345882,2023-03-16_1,8


## 2. Merge accessibility data

In [4]:
tp_path = "dbs/accessibility/"
df_activity_list = []
for hour in tqdm(range(24), desc="Merging accessibility data"):
    for time_threshold in [15, 30]:
        for amenity in ['sl', 'en', 'hs', 'ed']:
            for fn in ['pt', 'car']:
                df_origins = pd.read_csv(f"{tp_path}/data/origins_{hour}.csv")
                file_path = f"{tp_path}/access_{time_threshold}_{amenity}_{fn}_{hour}.csv"
                df = pd.read_csv(file_path)
                df_origins = pd.merge(df_origins, df[['id', 'accessibility']], on='id')
                for var in ['time_threshold', 'amenity', 'fn', 'hour']:
                    df_origins[var] = locals()[var]
                temp = pd.merge(df_activity, df_origins, on=['hour', 'lon', 'lat'], how='left')
                df_activity_list.append(temp.dropna())
df_activity = pd.concat(df_activity_list, ignore_index=True)

Merging accessibility data: 100%|██████████| 24/24 [00:18<00:00,  1.28it/s]


### 2.1 Clean up and save

In [5]:
df_activity.drop(columns=['id'], inplace=True)
df_activity.columns = ['lon', 'lat', 'time', 'day_type', 'dow', 'ID', 'purpose', 
                       'weight_day', 'trip_id', 'hour', 'accessibility', 'time_threshold',
                       'amenity', 'mode']
# Make time_threshold etc. categorical
df_activity['time_threshold'] = df_activity['time_threshold'].map({15: '15 min', 30: '30 min'}).astype('category')
df_activity['amenity'] = df_activity['amenity'].map({'sl': 'Social & Leisure', 'en': 'Essential needs',
                                                     'hs': 'Health services', 'ed': 'Education'}).astype('category')
df_activity['mode'] = df_activity['mode'].map({'car': 'Car', 'pt': 'Public transit'}).astype('category')
df_activity['purpose'] = df_activity['purpose'].map({'RETURN_HOME': 'Home', 
                                                     'OTHER': 'Other', 
                                                     'LEISURE': 'Leisure', 
                                                     'WORK': 'Work', 
                                                     'ACCOM': 'Accomodation', 
                                                     'BUSINESS': 'Business',
                                                     'PURCHASE': 'Purchase', 
                                                     'HEALTH': 'Health', 
                                                     'STUDIES': 'Studies'}).astype('category')
df_activity.head()

Unnamed: 0,lon,lat,time,day_type,dow,ID,purpose,weight_day,trip_id,hour,accessibility,time_threshold,amenity,mode
0,2.408676,48.863514,2023-03-18 00:25:00,Normal,friday,10_2980,Home,232.409,2023-03-17_6,0,111.0,15 min,Social & Leisure,Public transit
1,2.328877,48.609386,2023-03-19 00:30:00,Normal,saturday,10_2998,Other,290.365069,2023-03-18_8,0,7.0,15 min,Social & Leisure,Public transit
2,2.456914,48.84484,2023-03-17 00:36:00,Normal,thursday,10_3004,Home,177.526369,2023-03-16_4,0,7.0,15 min,Social & Leisure,Public transit
3,2.229863,48.900512,2023-03-18 00:20:59,Normal,friday,10_3011,Home,657.095216,2023-03-17_12,0,32.0,15 min,Social & Leisure,Public transit
4,2.232411,48.899118,2023-03-18 00:40:59,Normal,friday,10_3011,Other,657.095216,2023-03-17_13,0,33.0,15 min,Social & Leisure,Public transit


## 3. Distance to home

In [6]:
# Add home coordinates from origins as well
df_home = pd.read_parquet("dbs/data_p/stays_extraction_all.parquet")
# Extract end coordinates and hour
df_home = df_home[['start_lon', 'start_lat', 'ID', 'purpose_o']].copy()
df_home = df_home.rename(columns={'start_lon': 'home_lon', 'start_lat': 'home_lat', 'purpose_o': 'purpose'})
df_home = df_home[df_home['purpose'] == 'DEPART_HOME'].drop_duplicates(subset=['ID']).drop(columns='purpose')
df_home.head()

Unnamed: 0,home_lon,home_lat,ID
0,2.139098,48.865031,10_2978
18,2.407062,48.86383,10_2980
35,2.384387,48.688494,10_2981
50,2.496571,48.613275,10_2982
83,2.023759,48.80352,10_2984


In [7]:
# Step 1: Extract home locations for each ID
home_locations = df_activity[df_activity['purpose'] == 'Home'].groupby('ID')[['lat', 'lon']].first().reset_index()
home_locations.columns = ['ID', 'home_lat', 'home_lon']
home_locations = pd.concat([home_locations, df_home], ignore_index=True).drop_duplicates(subset=['ID'])

# Step 2: Merge home location back into the original dataframe
df_activity = df_activity.merge(home_locations, on='ID', how='left')

# Step 3: Define a function to compute geodesic distance
def compute_distance(row):
    return geodesic((row['lat'], row['lon']), (row['home_lat'], row['home_lon'])).meters

# Step 4: Apply the distance calculation
tqdm.pandas()
df_activity['d2h'] = df_activity.progress_apply(compute_distance, axis=1)
df_activity.drop(columns=['home_lat', 'home_lon'], inplace=True)
df_activity.head()

100%|██████████| 1083424/1083424 [03:35<00:00, 5017.77it/s]


Unnamed: 0,lon,lat,time,day_type,dow,ID,purpose,weight_day,trip_id,hour,accessibility,time_threshold,amenity,mode,d2h
0,2.408676,48.863514,2023-03-18 00:25:00,Normal,friday,10_2980,Home,232.409,2023-03-17_6,0,111.0,15 min,Social & Leisure,Public transit,0.0
1,2.328877,48.609386,2023-03-19 00:30:00,Normal,saturday,10_2998,Other,290.365069,2023-03-18_8,0,7.0,15 min,Social & Leisure,Public transit,9375.612274
2,2.456914,48.84484,2023-03-17 00:36:00,Normal,thursday,10_3004,Home,177.526369,2023-03-16_4,0,7.0,15 min,Social & Leisure,Public transit,0.0
3,2.229863,48.900512,2023-03-18 00:20:59,Normal,friday,10_3011,Home,657.095216,2023-03-17_12,0,32.0,15 min,Social & Leisure,Public transit,0.0
4,2.232411,48.899118,2023-03-18 00:40:59,Normal,friday,10_3011,Other,657.095216,2023-03-17_13,0,33.0,15 min,Social & Leisure,Public transit,242.77821


In [8]:
df_activity.loc[:, 'd2h'] /= 1000  # Convert meters to kilometers

## 4. Accessibility gap

In [9]:
# Step 1: Extract home accessibility for each ID
home_access = df_activity[df_activity['purpose'] == 'Home'].groupby(['ID', 'time_threshold', 'amenity', 'mode'])[['accessibility']].median().reset_index()
home_access.rename(columns={'accessibility': 'home_accessibility'}, inplace=True)

# Step 2: Merge home location back into the original dataframe
df_activity = df_activity.merge(home_access, on=['ID', 'time_threshold', 'amenity', 'mode'], how='left')

# Step 3: Compute the accessibility gap
df_activity['gap_access'] = df_activity['accessibility'] - df_activity['home_accessibility']

  home_access = df_activity[df_activity['purpose'] == 'Home'].groupby(['ID', 'time_threshold', 'amenity', 'mode'])[['accessibility']].median().reset_index()


In [10]:
df_activity.to_parquet("dbs/activity_access.parquet", index=False)