In [37]:
from datetime import datetime
from tqdm import tqdm
import pandas as pd
import json

try:
    from routing_module import *
except:
    import sys
    sys.path.append('/home/happy956/')
    from routing_module import *

In [38]:
random_passenger = pd.read_csv('data/passenger/subway_random_passenger.csv')
kimpo_direction_schedule = pd.read_csv('data/subway/kimpo_schedule.csv', encoding='cp949')
yongchon_direction_schedule = pd.read_csv('data/subway/yongchon_schedule.csv', encoding='cp949')

kimpo_direction_schedule['direction'] = 1
yongchon_direction_schedule['direction'] = 2

## 1. subway passenger

### 1-1. preprocessing time columns in subway schedule

In [39]:
def date_preprocessing(df):
    df['출발시간'] = pd.to_datetime(df['출발시간'])
    df['도착시간'] = pd.to_datetime(df['도착시간'])
    
    start_init_time = df.loc[0, '출발시간']
    end_init_time = df.loc[0, '도착시간']
    
    data = []
    for _, row in df.iterrows():
        if row['출발시간'] < start_init_time:
            row['출발시간'] += pd.to_timedelta('1 day')
            row['도착시간'] += pd.to_timedelta('1 day')
        
        elif row['도착시간'] < end_init_time:
            row['도착시간'] += pd.to_timedelta('1 day')
        
        data.append(row)
        
    df = pd.DataFrame(data)
    return df

In [40]:
yongchon_direction_schedule = date_preprocessing(yongchon_direction_schedule)
kimpo_direction_schedule = date_preprocessing(kimpo_direction_schedule)

In [41]:
subway_schedule = pd.concat([yongchon_direction_schedule, kimpo_direction_schedule], axis=0)
subway_schedule = subway_schedule.reset_index(drop=True)

In [42]:
start_day = subway_schedule.loc[0, '출발시간']
init_dt_lst = [start_day.year, start_day.month, start_day.day, 0, 0, 0]
init_dt = datetime(*init_dt_lst)
init_dt

datetime.datetime(2023, 6, 8, 0, 0)

### 1-2. passenger trip & timestamp

In [43]:
total_data = []
evaluation = {
    'waiting_time': 0,
    'walking_time': 0,
    'moving_time': 0,
}

for idx, row in tqdm(random_passenger.iterrows(), total=len(random_passenger)):
    data = row.to_dict()
    
    start_time = data['start_time']
    start_station, end_station = data['start_station'], data['end_station']
    start_station = '사우' if '사우' in start_station else start_station
    end_station = '사우' if '사우' in end_station else end_station
    direction = data['direction']
    
    ### walking to start station ###
    start_lat, start_lon = data['start_lat'], data['start_lon']
    start_station_lat, start_station_lon = data['start_station_lat'], data['start_station_lon']
    
    routes, duration, distance = osrm_get_route((start_lon, start_lat, start_station_lon, start_station_lat), 'foot')
    timestamps = get_timestamp(routes, start_time, duration)
    evaluation['walking_time'] += duration
    
    total_data.append({
        'id': idx,
        'trip': list(map(lambda x: list(x[::-1]), routes)),
        'timestamp': timestamps
    })
    
    ### start bus ###
    ## start bus schedule filter
    cond1 = subway_schedule['출발정류장'] == start_station
    cond2 = subway_schedule['direction'] == direction
    filtered_bus_schedule = subway_schedule.loc[cond1 & cond2].copy()
    filtered_bus_schedule['start_time_min'] = filtered_bus_schedule['출발시간'].map(lambda x: (x - init_dt).total_seconds() / 60)
    filtered_bus_schedule = filtered_bus_schedule.sort_values('start_time_min')
    
    start_filtered_bus_schedule = filtered_bus_schedule[filtered_bus_schedule['start_time_min'] >= timestamps[-1]][:1].to_dict('records')[0]
    subway_id = start_filtered_bus_schedule['철도_id']
    evaluation['waiting_time'] += start_filtered_bus_schedule['start_time_min'] - timestamps[-1]
    
    ### end bus ###
    ## end bus schedule filter
    cond1 = subway_schedule['도착정류장'] == end_station
    cond2 = subway_schedule['direction'] == direction
    cond3 = subway_schedule['철도_id'] == subway_id
    filtered_bus_schedule = subway_schedule.loc[cond1 & cond2 & cond3].copy()
    filtered_bus_schedule['end_time_min'] = filtered_bus_schedule['도착시간'].map(lambda x: (x - init_dt).total_seconds() / 60)
    filtered_bus_schedule = filtered_bus_schedule.sort_values('end_time_min')
    
    end_filtered_bus_schedule = filtered_bus_schedule[filtered_bus_schedule['end_time_min'] >= timestamps[-1]][:1].to_dict('records')[0]
    
    ### walking to end destination ###
    start_time = end_filtered_bus_schedule['end_time_min']
    end_station_lat, end_station_lon = data['end_station_lat'], data['end_station_lon']
    end_lat, end_lon = data['end_lat'], data['end_lon']
    
    evaluation['moving_time'] += start_time - timestamps[-1]
    
    routes, duration, distance = osrm_get_route((end_station_lon, end_station_lat, end_lon, end_lat), 'foot')
    timestamps = get_timestamp(routes, start_time, duration)
    evaluation['walking_time'] += duration
    
    total_data.append({
        'id': idx,
        'trip': list(map(lambda x: list(x[::-1]), routes)),
        'timestamp': timestamps
    })

100%|██████████| 9268/9268 [00:56<00:00, 164.18it/s]


In [44]:
evaluation['waiting_time'] / 9268, evaluation['walking_time'] / 9268, evaluation['moving_time'] / 9268

(2.0027352179542373, 2.945855632283145, 11.989140051791134)

In [45]:
with open('data/passenger/subway_passenger_trip.json', 'w') as f:
    json.dump(total_data, f)

## 2. bus passenger - 버스 증차 전

### 2-1. preprocessing time columns in bus schedule

In [16]:
bus_random_passenger = pd.read_csv('data/passenger/prev_bus_random_passenger.csv')
bus_schedule = pd.read_csv('data/prev_bus/prev_bus_schedule.csv')
bus_schedule = bus_schedule.rename(columns={'type': 'direction'})
bus_schedule['bus_type'] = '70'

In [17]:
bus_random_passenger.shape

(664, 13)

In [19]:
bus_schedule = date_preprocessing(bus_schedule)

In [20]:
start_day = bus_schedule.loc[0, '출발시간']
init_dt_lst = [start_day.year, start_day.month, start_day.day, 0, 0, 0]
init_dt = datetime(*init_dt_lst)
init_dt

datetime.datetime(2023, 6, 8, 0, 0)

### 2-2. passenger trip & timestamp

In [21]:
total_data = []
evaluation = {
    'waiting_time': 0,
    'walking_time': 0,
    'moving_time': 0,
}

for idx, row in tqdm(bus_random_passenger.iterrows(), total=len(bus_random_passenger)):
    data = row.to_dict()
    
    bus_type = str(data['bus_type'])
    start_time = data['start_time']
    start_station, end_station = data['start_station'], data['end_station']
    direction = data['direction']
    
    ### walking to start station ###
    start_lat, start_lon = data['start_lat'], data['start_lon']
    start_station_lat, start_station_lon = data['start_station_lat'], data['start_station_lon']
    
    routes, duration, distance = osrm_get_route((start_lon, start_lat, start_station_lon, start_station_lat), 'foot')
    timestamps = get_timestamp(routes, start_time, duration)
    evaluation['walking_time'] += duration
    
    total_data.append({
        'id': idx,
        'trip': list(map(lambda x: list(x[::-1]), routes)),
        'timestamp': timestamps
    })
    
    ### start bus ###
    ## start bus schedule filter
    cond1 = bus_schedule['출발정류장'] == start_station
    cond2 = bus_schedule['direction'] == direction
    cond3 = bus_schedule['bus_type'] == bus_type
    filtered_bus_schedule = bus_schedule.loc[cond1 & cond2 & cond3].copy()
    filtered_bus_schedule['start_time_min'] = filtered_bus_schedule['출발시간'].map(lambda x: (x - init_dt).total_seconds() / 60)
    filtered_bus_schedule = filtered_bus_schedule.sort_values('start_time_min')
    
    try:
        start_filtered_bus_schedule = filtered_bus_schedule[filtered_bus_schedule['start_time_min'] >= timestamps[-1]][:1].to_dict('records')[0]
        bus_id = start_filtered_bus_schedule['버스_id']
        evaluation['waiting_time'] += start_filtered_bus_schedule['start_time_min'] - timestamps[-1]
    except:
        print(timestamps[-1])
        print(start_station, end_station, direction)
        display(filtered_bus_schedule)
        raise
        
    ### end bus ###
    ## end bus schedule filter
    cond1 = bus_schedule['도착정류장'] == end_station
    cond2 = bus_schedule['direction'] == direction
    cond3 = bus_schedule['버스_id'] == bus_id
    cond4 = bus_schedule['bus_type'] == bus_type
    filtered_bus_schedule = bus_schedule.loc[cond1 & cond2 & cond3 & cond4].copy()
    filtered_bus_schedule['end_time_min'] = filtered_bus_schedule['도착시간'].map(lambda x: (x - init_dt).total_seconds() / 60)
    filtered_bus_schedule = filtered_bus_schedule.sort_values('end_time_min')
    
    end_filtered_bus_schedule = filtered_bus_schedule[filtered_bus_schedule['end_time_min'] >= timestamps[-1]][:1].to_dict('records')[0]
    
    ### walking to end destination ###
    start_time = end_filtered_bus_schedule['end_time_min']
    end_station_lat, end_station_lon = data['end_station_lat'], data['end_station_lon']
    end_lat, end_lon = data['end_lat'], data['end_lon']
    
    evaluation['moving_time'] += start_time - timestamps[-1]
    
    routes, duration, distance = osrm_get_route((end_station_lon, end_station_lat, end_lon, end_lat), 'foot')
    timestamps = get_timestamp(routes, start_time, duration)
    evaluation['walking_time'] += duration
    
    total_data.append({
        'id': idx,
        'trip': list(map(lambda x: list(x[::-1]), routes)),
        'timestamp': timestamps
    })

100%|██████████| 664/664 [00:04<00:00, 163.93it/s]


In [23]:
print(evaluation['waiting_time'] / 664, evaluation['walking_time'] / 664, evaluation['moving_time'] / 664)

5.782856425702803 3.799728915662638 16.289231927710834


In [24]:
with open('data/passenger/prev_bus_passenger_trip.json', 'w') as f:
    json.dump(total_data, f)

### 3. bus passenger - 버스 증차 후

### 3-1. preprocessing time columns in bus schedule

In [46]:
bus_random_passenger = pd.read_csv('data/passenger/bus_random_passenger.csv')
bus70_schedule = pd.read_csv('data/bus/70_schedule.csv')
bus70A_schedule = pd.read_csv('data/bus/70A_schedule.csv')
bus70B_schedule = pd.read_csv('data/bus/70B_schedule.csv')

bus70_schedule = bus70_schedule.rename(columns={'type': 'direction'})
bus70A_schedule = bus70A_schedule.rename(columns={'type': 'direction'})
bus70B_schedule = bus70B_schedule.rename(columns={'type': 'direction'})
 
bus70_schedule['bus_type'] = '70'
bus70A_schedule['bus_type'] = '70A'
bus70B_schedule['bus_type'] = '70B'

In [47]:
bus70_schedule.shape, bus70A_schedule.shape, bus70B_schedule.shape

((270, 11), (64, 11), (64, 11))

In [48]:
bus70_schedule = date_preprocessing(bus70_schedule)
bus70A_schedule = date_preprocessing(bus70A_schedule)
bus70B_schedule = date_preprocessing(bus70B_schedule)

In [49]:
bus_schedule = pd.concat([bus70_schedule, bus70A_schedule, bus70B_schedule], axis=0)
bus_schedule = bus_schedule.reset_index(drop=True)

In [50]:
start_day = bus_schedule.loc[0, '출발시간']
init_dt_lst = [start_day.year, start_day.month, start_day.day, 0, 0, 0]
init_dt = datetime(*init_dt_lst)
init_dt

datetime.datetime(2023, 6, 9, 0, 0)

### 3-2. passenger trip & timestamp

In [51]:
total_data = []
evaluation = {
    '70': {
        'waiting_time': 0,
        'walking_time': 0,
        'moving_time': 0,
    },
    '70A': {
        'waiting_time': 0,
        'walking_time': 0,
        'moving_time': 0,
    },
    '70B': {
        'waiting_time': 0,
        'walking_time': 0,
        'moving_time': 0,
    }
}

for idx, row in tqdm(bus_random_passenger.iterrows(), total=len(bus_random_passenger)):
    data = row.to_dict()
    
    bus_type = data['bus_type']
    start_time = data['start_time']
    start_station, end_station = data['start_station'], data['end_station']
    direction = data['direction']
    
    ### walking to start station ###
    start_lat, start_lon = data['start_lat'], data['start_lon']
    start_station_lat, start_station_lon = data['start_station_lat'], data['start_station_lon']
    
    routes, duration, distance = osrm_get_route((start_lon, start_lat, start_station_lon, start_station_lat), 'foot')
    timestamps = get_timestamp(routes, start_time, duration)
    evaluation[bus_type]['walking_time'] += duration
    
    total_data.append({
        'id': idx,
        'trip': list(map(lambda x: list(x[::-1]), routes)),
        'timestamp': timestamps
    })
    
    ### start bus ###
    ## start bus schedule filter
    cond1 = bus_schedule['출발정류장'] == start_station
    cond2 = bus_schedule['direction'] == direction
    cond3 = bus_schedule['bus_type'] == bus_type
    filtered_bus_schedule = bus_schedule.loc[cond1 & cond2 & cond3].copy()
    filtered_bus_schedule['start_time_min'] = filtered_bus_schedule['출발시간'].map(lambda x: (x - init_dt).total_seconds() / 60)
    filtered_bus_schedule = filtered_bus_schedule.sort_values('start_time_min')
    
    start_filtered_bus_schedule = filtered_bus_schedule[filtered_bus_schedule['start_time_min'] >= timestamps[-1]][:1].to_dict('records')[0]
    bus_id = start_filtered_bus_schedule['버스_id']
    evaluation[bus_type]['waiting_time'] += start_filtered_bus_schedule['start_time_min'] - timestamps[-1]
    
    ### end bus ###
    ## end bus schedule filter
    cond1 = bus_schedule['도착정류장'] == end_station
    cond2 = bus_schedule['direction'] == direction
    cond3 = bus_schedule['버스_id'] == bus_id
    cond4 = bus_schedule['bus_type'] == bus_type
    filtered_bus_schedule = bus_schedule.loc[cond1 & cond2 & cond3 & cond4].copy()
    filtered_bus_schedule['end_time_min'] = filtered_bus_schedule['도착시간'].map(lambda x: (x - init_dt).total_seconds() / 60)
    filtered_bus_schedule = filtered_bus_schedule.sort_values('end_time_min')
    
    end_filtered_bus_schedule = filtered_bus_schedule[filtered_bus_schedule['end_time_min'] >= timestamps[-1]][:1].to_dict('records')[0]
    
    ### walking to end destination ###
    start_time = end_filtered_bus_schedule['end_time_min']
    end_station_lat, end_station_lon = data['end_station_lat'], data['end_station_lon']
    end_lat, end_lon = data['end_lat'], data['end_lon']
    
    evaluation[bus_type]['moving_time'] += start_time - timestamps[-1]
    
    routes, duration, distance = osrm_get_route((end_station_lon, end_station_lat, end_lon, end_lat), 'foot')
    timestamps = get_timestamp(routes, start_time, duration)
    evaluation[bus_type]['walking_time'] += duration
    
    total_data.append({
        'id': idx,
        'trip': list(map(lambda x: list(x[::-1]), routes)),
        'timestamp': timestamps
    })

100%|██████████| 655/655 [00:04<00:00, 160.65it/s]


In [52]:
print(evaluation['70']['waiting_time'] / 655, evaluation['70']['walking_time'] / 655, evaluation['70']['moving_time'] / 655)
print(evaluation['70A']['waiting_time'] / 655, evaluation['70A']['walking_time'] / 655, evaluation['70A']['moving_time'] / 655)
print(evaluation['70B']['waiting_time'] / 655, evaluation['70B']['walking_time'] / 655, evaluation['70B']['moving_time'] / 655)

2.7875419847328238 1.3651297709923664 7.047745547073791
1.4059999999999997 1.1629770992366408 5.2124376590330845
1.519562340966918 1.1721374045801525 6.025134860050893


In [35]:
with open('data/passenger/bus_passenger_trip.json', 'w') as f:
    json.dump(total_data, f)

### 4. bus passenger - 버스 전용차로

### 4-1. preprocessing time columns in bus schedule

In [53]:
bus_random_passenger = pd.read_csv('data/passenger/bus_random_passenger.csv')
bus70_schedule = pd.read_csv('data/bus/free_70_schedule.csv')
bus70A_schedule = pd.read_csv('data/bus/free_70A_schedule.csv')
bus70B_schedule = pd.read_csv('data/bus/free_70B_schedule.csv')

bus70_schedule = bus70_schedule.rename(columns={'type': 'direction'})
bus70A_schedule = bus70A_schedule.rename(columns={'type': 'direction'})
bus70B_schedule = bus70B_schedule.rename(columns={'type': 'direction'})
 
bus70_schedule['bus_type'] = '70'
bus70A_schedule['bus_type'] = '70A'
bus70B_schedule['bus_type'] = '70B'

In [54]:
bus70_schedule.shape, bus70A_schedule.shape, bus70B_schedule.shape

((270, 11), (64, 11), (64, 11))

In [55]:
bus70_schedule = date_preprocessing(bus70_schedule)
bus70A_schedule = date_preprocessing(bus70A_schedule)
bus70B_schedule = date_preprocessing(bus70B_schedule)

In [56]:
bus_schedule = pd.concat([bus70_schedule, bus70A_schedule, bus70B_schedule], axis=0)
bus_schedule = bus_schedule.reset_index(drop=True)

In [57]:
start_day = bus_schedule.loc[0, '출발시간']
init_dt_lst = [start_day.year, start_day.month, start_day.day, 0, 0, 0]
init_dt = datetime(*init_dt_lst)
init_dt

datetime.datetime(2023, 6, 9, 0, 0)

### 4-2. passenger trip & timestamp

In [58]:
total_data = []
evaluation = {
    '70': {
        'waiting_time': 0,
        'walking_time': 0,
        'moving_time': 0,
    },
    '70A': {
        'waiting_time': 0,
        'walking_time': 0,
        'moving_time': 0,
    },
    '70B': {
        'waiting_time': 0,
        'walking_time': 0,
        'moving_time': 0,
    }
}

for idx, row in tqdm(bus_random_passenger.iterrows(), total=len(bus_random_passenger)):
    data = row.to_dict()
    
    bus_type = data['bus_type']
    start_time = data['start_time']
    start_station, end_station = data['start_station'], data['end_station']
    direction = data['direction']
    
    ### walking to start station ###
    start_lat, start_lon = data['start_lat'], data['start_lon']
    start_station_lat, start_station_lon = data['start_station_lat'], data['start_station_lon']
    
    routes, duration, distance = osrm_get_route((start_lon, start_lat, start_station_lon, start_station_lat), 'foot')
    timestamps = get_timestamp(routes, start_time, duration)
    evaluation[bus_type]['walking_time'] += duration
    
    total_data.append({
        'id': idx,
        'trip': list(map(lambda x: list(x[::-1]), routes)),
        'timestamp': timestamps
    })
    
    ### start bus ###
    ## start bus schedule filter
    cond1 = bus_schedule['출발정류장'] == start_station
    cond2 = bus_schedule['direction'] == direction
    cond3 = bus_schedule['bus_type'] == bus_type
    filtered_bus_schedule = bus_schedule.loc[cond1 & cond2 & cond3].copy()
    filtered_bus_schedule['start_time_min'] = filtered_bus_schedule['출발시간'].map(lambda x: (x - init_dt).total_seconds() / 60)
    filtered_bus_schedule = filtered_bus_schedule.sort_values('start_time_min')
    
    start_filtered_bus_schedule = filtered_bus_schedule[filtered_bus_schedule['start_time_min'] >= timestamps[-1]][:1].to_dict('records')[0]
    bus_id = start_filtered_bus_schedule['버스_id']
    evaluation[bus_type]['waiting_time'] += start_filtered_bus_schedule['start_time_min'] - timestamps[-1]
    
    ### end bus ###
    ## end bus schedule filter
    cond1 = bus_schedule['도착정류장'] == end_station
    cond2 = bus_schedule['direction'] == direction
    cond3 = bus_schedule['버스_id'] == bus_id
    cond4 = bus_schedule['bus_type'] == bus_type
    filtered_bus_schedule = bus_schedule.loc[cond1 & cond2 & cond3 & cond4].copy()
    filtered_bus_schedule['end_time_min'] = filtered_bus_schedule['도착시간'].map(lambda x: (x - init_dt).total_seconds() / 60)
    filtered_bus_schedule = filtered_bus_schedule.sort_values('end_time_min')
    
    end_filtered_bus_schedule = filtered_bus_schedule[filtered_bus_schedule['end_time_min'] >= timestamps[-1]][:1].to_dict('records')[0]
    
    ### walking to end destination ###
    start_time = end_filtered_bus_schedule['end_time_min']
    end_station_lat, end_station_lon = data['end_station_lat'], data['end_station_lon']
    end_lat, end_lon = data['end_lat'], data['end_lon']
    
    evaluation[bus_type]['moving_time'] += start_time - timestamps[-1]
    
    routes, duration, distance = osrm_get_route((end_station_lon, end_station_lat, end_lon, end_lat), 'foot')
    timestamps = get_timestamp(routes, start_time, duration)
    evaluation[bus_type]['walking_time'] += duration
    
    total_data.append({
        'id': idx,
        'trip': list(map(lambda x: list(x[::-1]), routes)),
        'timestamp': timestamps
    })

100%|██████████| 655/655 [00:03<00:00, 166.15it/s]


In [59]:
print(evaluation['70']['waiting_time'] / 655, evaluation['70']['walking_time'] / 655, evaluation['70']['moving_time'] / 655)
print(evaluation['70A']['waiting_time'] / 655, evaluation['70A']['walking_time'] / 655, evaluation['70A']['moving_time'] / 655)
print(evaluation['70B']['waiting_time'] / 655, evaluation['70B']['walking_time'] / 655, evaluation['70B']['moving_time'] / 655)

2.6098829516539452 1.3651297709923664 6.151944020356234
1.4491297709923678 1.1629770992366408 5.483201017811708
1.582641221374044 1.1721374045801525 11.4330737913486


In [None]:
with open('data/passenger/bus_passenger_trip.json', 'w') as f:
    json.dump(total_data, f)