In [1]:
from datetime import datetime
from tqdm import tqdm
import pandas as pd
import json

try:
    from routing_module import *
except:
    import sys
    sys.path.append('/home/happy956/')
    from routing_module import *

In [2]:
random_passenger = pd.read_csv('data/passenger/decrease_subway_random_passenger.csv')
kimpo_direction_schedule = pd.read_csv('data/subway/kimpo_schedule.csv', encoding='cp949')
yongchon_direction_schedule = pd.read_csv('data/subway/yongchon_schedule.csv', encoding='cp949')

kimpo_direction_schedule['direction'] = 1
yongchon_direction_schedule['direction'] = 2

## 1. subway passenger

### 1-1. preprocessing time columns in subway schedule

In [3]:
def date_preprocessing(df):
    df['출발시간'] = pd.to_datetime(df['출발시간'])
    df['도착시간'] = pd.to_datetime(df['도착시간'])
    
    start_init_time = df.loc[0, '출발시간']
    end_init_time = df.loc[0, '도착시간']
    
    data = []
    for _, row in df.iterrows():
        if row['출발시간'] < start_init_time:
            row['출발시간'] += pd.to_timedelta('1 day')
            row['도착시간'] += pd.to_timedelta('1 day')
        
        elif row['도착시간'] < end_init_time:
            row['도착시간'] += pd.to_timedelta('1 day')
        
        data.append(row)
        
    df = pd.DataFrame(data)
    return df

In [4]:
yongchon_direction_schedule = date_preprocessing(yongchon_direction_schedule)
kimpo_direction_schedule = date_preprocessing(kimpo_direction_schedule)

In [5]:
subway_schedule = pd.concat([yongchon_direction_schedule, kimpo_direction_schedule], axis=0)
subway_schedule = subway_schedule.reset_index(drop=True)

In [6]:
start_day = subway_schedule.loc[0, '출발시간']
init_dt_lst = [start_day.year, start_day.month, start_day.day, 0, 0, 0]
init_dt = datetime(*init_dt_lst)
init_dt

datetime.datetime(2023, 6, 9, 0, 0)

### 1-2. passenger trip & timestamp

In [7]:
total_data = []
evaluation = {
    'waiting_time': 0,
    'walking_time': 0,
    'moving_time': 0,
}

for idx, row in tqdm(random_passenger.iterrows(), total=len(random_passenger)):
    data = row.to_dict()
    
    start_time = data['start_time']
    start_station, end_station = data['start_station'], data['end_station']
    start_station = '사우' if '사우' in start_station else start_station
    end_station = '사우' if '사우' in end_station else end_station
    direction = data['direction']
    
    ### walking to start station ###
    start_lat, start_lon = data['start_lat'], data['start_lon']
    start_station_lat, start_station_lon = data['start_station_lat'], data['start_station_lon']
    
    routes, duration, distance = osrm_get_route((start_lon, start_lat, start_station_lon, start_station_lat), 'foot')
    timestamps = get_timestamp(routes, start_time, duration)
    evaluation['walking_time'] += duration
    
    total_data.append({
        'id': idx,
        'trip': list(map(lambda x: list(x[::-1]), routes)),
        'timestamp': timestamps
    })
    
    ### start bus ###
    ## start bus schedule filter
    cond1 = subway_schedule['출발정류장'] == start_station
    cond2 = subway_schedule['direction'] == direction
    filtered_bus_schedule = subway_schedule.loc[cond1 & cond2].copy()
    filtered_bus_schedule['start_time_min'] = filtered_bus_schedule['출발시간'].map(lambda x: (x - init_dt).total_seconds() / 60)
    filtered_bus_schedule = filtered_bus_schedule.sort_values('start_time_min')
    
    start_filtered_bus_schedule = filtered_bus_schedule[filtered_bus_schedule['start_time_min'] >= timestamps[-1]][:1].to_dict('records')[0]
    subway_id = start_filtered_bus_schedule['철도_id']
    evaluation['waiting_time'] += start_filtered_bus_schedule['start_time_min'] - timestamps[-1]
    
    ### end bus ###
    ## end bus schedule filter
    cond1 = subway_schedule['도착정류장'] == end_station
    cond2 = subway_schedule['direction'] == direction
    cond3 = subway_schedule['철도_id'] == subway_id
    filtered_bus_schedule = subway_schedule.loc[cond1 & cond2 & cond3].copy()
    filtered_bus_schedule['end_time_min'] = filtered_bus_schedule['도착시간'].map(lambda x: (x - init_dt).total_seconds() / 60)
    filtered_bus_schedule = filtered_bus_schedule.sort_values('end_time_min')
    
    end_filtered_bus_schedule = filtered_bus_schedule[filtered_bus_schedule['end_time_min'] >= timestamps[-1]][:1].to_dict('records')[0]
    
    ### walking to end destination ###
    start_time = end_filtered_bus_schedule['end_time_min']
    end_station_lat, end_station_lon = data['end_station_lat'], data['end_station_lon']
    end_lat, end_lon = data['end_lat'], data['end_lon']
    
    evaluation['moving_time'] += start_time - timestamps[-1]
    
    routes, duration, distance = osrm_get_route((end_station_lon, end_station_lat, end_lon, end_lat), 'foot')
    timestamps = get_timestamp(routes, start_time, duration)
    evaluation['walking_time'] += duration
    
    total_data.append({
        'id': idx,
        'trip': list(map(lambda x: list(x[::-1]), routes)),
        'timestamp': timestamps
    })

100%|██████████| 7958/7958 [00:49<00:00, 160.63it/s]


In [8]:
evaluation['walking_time'] / 7958, evaluation['waiting_time'] / 7958, evaluation['moving_time'] / 7958

(2.892939180698703, 1.9881314400603132, 11.979083940688652)

In [45]:
with open('data/passenger/subway_passenger_trip.json', 'w') as f:
    json.dump(total_data, f)

## 2. bus passenger - 버스 증차 전

### 2-1. preprocessing time columns in bus schedule

In [9]:
bus_random_passenger = pd.read_csv('data/passenger/prev_bus_random_passenger.csv')
bus_schedule = pd.read_csv('data/prev_bus/prev_bus_schedule.csv')
bus_schedule = bus_schedule.rename(columns={'type': 'direction'})
bus_schedule['bus_type'] = '70'

In [10]:
bus_random_passenger.shape

(664, 13)

In [11]:
bus_schedule = date_preprocessing(bus_schedule)

In [12]:
start_day = bus_schedule.loc[0, '출발시간']
init_dt_lst = [start_day.year, start_day.month, start_day.day, 0, 0, 0]
init_dt = datetime(*init_dt_lst)
init_dt

datetime.datetime(2023, 6, 9, 0, 0)

### 2-2. passenger trip & timestamp

In [13]:
total_data = []
evaluation = {
    'waiting_time': 0,
    'walking_time': 0,
    'moving_time': 0,
}

for idx, row in tqdm(bus_random_passenger.iterrows(), total=len(bus_random_passenger)):
    data = row.to_dict()
    
    bus_type = str(data['bus_type'])
    start_time = data['start_time']
    start_station, end_station = data['start_station'], data['end_station']
    direction = data['direction']
    
    ### walking to start station ###
    start_lat, start_lon = data['start_lat'], data['start_lon']
    start_station_lat, start_station_lon = data['start_station_lat'], data['start_station_lon']
    
    routes, duration, distance = osrm_get_route((start_lon, start_lat, start_station_lon, start_station_lat), 'foot')
    timestamps = get_timestamp(routes, start_time, duration)
    evaluation['walking_time'] += duration
    
    total_data.append({
        'id': idx,
        'trip': list(map(lambda x: list(x[::-1]), routes)),
        'timestamp': timestamps
    })
    
    ### start bus ###
    ## start bus schedule filter
    cond1 = bus_schedule['출발정류장'] == start_station
    cond2 = bus_schedule['direction'] == direction
    cond3 = bus_schedule['bus_type'] == bus_type
    filtered_bus_schedule = bus_schedule.loc[cond1 & cond2 & cond3].copy()
    filtered_bus_schedule['start_time_min'] = filtered_bus_schedule['출발시간'].map(lambda x: (x - init_dt).total_seconds() / 60)
    filtered_bus_schedule = filtered_bus_schedule.sort_values('start_time_min')
    
    try:
        start_filtered_bus_schedule = filtered_bus_schedule[filtered_bus_schedule['start_time_min'] >= timestamps[-1]][:1].to_dict('records')[0]
        bus_id = start_filtered_bus_schedule['버스_id']
        evaluation['waiting_time'] += start_filtered_bus_schedule['start_time_min'] - timestamps[-1]
    except:
        print(timestamps[-1])
        print(start_station, end_station, direction)
        display(filtered_bus_schedule)
        raise
        
    ### end bus ###
    ## end bus schedule filter
    cond1 = bus_schedule['도착정류장'] == end_station
    cond2 = bus_schedule['direction'] == direction
    cond3 = bus_schedule['버스_id'] == bus_id
    cond4 = bus_schedule['bus_type'] == bus_type
    filtered_bus_schedule = bus_schedule.loc[cond1 & cond2 & cond3 & cond4].copy()
    filtered_bus_schedule['end_time_min'] = filtered_bus_schedule['도착시간'].map(lambda x: (x - init_dt).total_seconds() / 60)
    filtered_bus_schedule = filtered_bus_schedule.sort_values('end_time_min')
    
    end_filtered_bus_schedule = filtered_bus_schedule[filtered_bus_schedule['end_time_min'] >= timestamps[-1]][:1].to_dict('records')[0]
    
    ### walking to end destination ###
    start_time = end_filtered_bus_schedule['end_time_min']
    end_station_lat, end_station_lon = data['end_station_lat'], data['end_station_lon']
    end_lat, end_lon = data['end_lat'], data['end_lon']
    
    evaluation['moving_time'] += start_time - timestamps[-1]
    
    routes, duration, distance = osrm_get_route((end_station_lon, end_station_lat, end_lon, end_lat), 'foot')
    timestamps = get_timestamp(routes, start_time, duration)
    evaluation['walking_time'] += duration
    
    total_data.append({
        'id': idx,
        'trip': list(map(lambda x: list(x[::-1]), routes)),
        'timestamp': timestamps
    })

100%|██████████| 664/664 [00:04<00:00, 163.53it/s]


In [14]:
evaluation['walking_time'] / 664, evaluation['waiting_time'] / 664,  evaluation['moving_time'] / 664

(3.799728915662638, 5.782856425702803, 16.289231927710834)

In [24]:
with open('data/passenger/prev_bus_passenger_trip.json', 'w') as f:
    json.dump(total_data, f)

## 3. bus passenger - 버스 증차 후

### 3-1. preprocessing time columns in bus schedule

In [5]:
bus_random_passenger = pd.read_csv('data/passenger/decrease_bus_random_passenger.csv')
bus70_schedule = pd.read_csv('data/bus/70_schedule.csv')
bus70A_schedule = pd.read_csv('data/bus/70A_schedule.csv')
bus70B_schedule = pd.read_csv('data/bus/70B_schedule.csv')

bus70_schedule = bus70_schedule.rename(columns={'type': 'direction'})
bus70A_schedule = bus70A_schedule.rename(columns={'type': 'direction'})
bus70B_schedule = bus70B_schedule.rename(columns={'type': 'direction'})
 
bus70_schedule['bus_type'] = '70'
bus70A_schedule['bus_type'] = '70A'
bus70B_schedule['bus_type'] = '70B'

In [6]:
bus70_schedule.shape, bus70A_schedule.shape, bus70B_schedule.shape

((270, 11), (64, 11), (64, 11))

In [7]:
bus70_schedule = date_preprocessing(bus70_schedule)
bus70A_schedule = date_preprocessing(bus70A_schedule)
bus70B_schedule = date_preprocessing(bus70B_schedule)

In [8]:
bus_schedule = pd.concat([bus70_schedule, bus70A_schedule, bus70B_schedule], axis=0)
bus_schedule = bus_schedule.reset_index(drop=True)

In [9]:
start_day = bus_schedule.loc[0, '출발시간']
init_dt_lst = [start_day.year, start_day.month, start_day.day, 0, 0, 0]
init_dt = datetime(*init_dt_lst)
init_dt

datetime.datetime(2023, 6, 10, 0, 0)

### 3-2. passenger trip & timestamp

In [10]:
total_data = []
evaluation = {
    '70': {
        'waiting_time': 0,
        'walking_time': 0,
        'moving_time': 0,
    },
    '70A': {
        'waiting_time': 0,
        'walking_time': 0,
        'moving_time': 0,
    },
    '70B': {
        'waiting_time': 0,
        'walking_time': 0,
        'moving_time': 0,
    }
}

for idx, row in tqdm(bus_random_passenger.iterrows(), total=len(bus_random_passenger)):
    data = row.to_dict()
    
    bus_type = data['bus_type']
    start_time = data['start_time']
    start_station, end_station = data['start_station'], data['end_station']
    direction = data['direction']
    
    ### walking to start station ###
    start_lat, start_lon = data['start_lat'], data['start_lon']
    start_station_lat, start_station_lon = data['start_station_lat'], data['start_station_lon']
    
    routes, duration, distance = osrm_get_route((start_lon, start_lat, start_station_lon, start_station_lat), 'foot')
    timestamps = get_timestamp(routes, start_time, duration)
    evaluation[bus_type]['walking_time'] += duration
    
    total_data.append({
        'id': idx,
        'trip': list(map(lambda x: list(x[::-1]), routes)),
        'timestamp': timestamps
    })
    
    ### start bus ###
    ## start bus schedule filter
    cond1 = bus_schedule['출발정류장'] == start_station
    cond2 = bus_schedule['direction'] == direction
    cond3 = bus_schedule['bus_type'] == bus_type
    filtered_bus_schedule = bus_schedule.loc[cond1 & cond2 & cond3].copy()
    filtered_bus_schedule['start_time_min'] = filtered_bus_schedule['출발시간'].map(lambda x: (x - init_dt).total_seconds() / 60)
    filtered_bus_schedule = filtered_bus_schedule.sort_values('start_time_min')
    
    start_filtered_bus_schedule = filtered_bus_schedule[filtered_bus_schedule['start_time_min'] >= timestamps[-1]][:1].to_dict('records')[0]
    bus_id = start_filtered_bus_schedule['버스_id']
    evaluation[bus_type]['waiting_time'] += start_filtered_bus_schedule['start_time_min'] - timestamps[-1]
    
    ### end bus ###
    ## end bus schedule filter
    cond1 = bus_schedule['도착정류장'] == end_station
    cond2 = bus_schedule['direction'] == direction
    cond3 = bus_schedule['버스_id'] == bus_id
    cond4 = bus_schedule['bus_type'] == bus_type
    filtered_bus_schedule = bus_schedule.loc[cond1 & cond2 & cond3 & cond4].copy()
    filtered_bus_schedule['end_time_min'] = filtered_bus_schedule['도착시간'].map(lambda x: (x - init_dt).total_seconds() / 60)
    filtered_bus_schedule = filtered_bus_schedule.sort_values('end_time_min')
    
    end_filtered_bus_schedule = filtered_bus_schedule[filtered_bus_schedule['end_time_min'] >= timestamps[-1]][:1].to_dict('records')[0]
    
    ### walking to end destination ###
    start_time = end_filtered_bus_schedule['end_time_min']
    end_station_lat, end_station_lon = data['end_station_lat'], data['end_station_lon']
    end_lat, end_lon = data['end_lat'], data['end_lon']
    
    evaluation[bus_type]['moving_time'] += start_time - timestamps[-1]
    
    routes, duration, distance = osrm_get_route((end_station_lon, end_station_lat, end_lon, end_lat), 'foot')
    timestamps = get_timestamp(routes, start_time, duration)
    evaluation[bus_type]['walking_time'] += duration
    
    total_data.append({
        'id': idx,
        'trip': list(map(lambda x: list(x[::-1]), routes)),
        'timestamp': timestamps
    })

100%|██████████| 1973/1973 [00:12<00:00, 163.25it/s]


In [11]:
print(evaluation['70']['walking_time'] / 1973, evaluation['70']['waiting_time'] / 1973, evaluation['70']['moving_time'] / 1973)
print(evaluation['70A']['walking_time'] / 1973, evaluation['70A']['waiting_time'] / 1973, evaluation['70A']['moving_time'] / 1973)
print(evaluation['70B']['walking_time'] / 1973, evaluation['70B']['waiting_time'] / 1973, evaluation['70B']['moving_time'] / 1973)

1.270035478966039 2.6891434363912787 7.7784912992059425
1.1749163710086152 1.4060246663287759 6.066229092752162
1.0706994424733898 1.5764047981077864 7.438070620037168


In [35]:
with open('data/passenger/bus_passenger_trip.json', 'w') as f:
    json.dump(total_data, f)

### 4. bus passenger - 버스 전용차로

### 4-1. preprocessing time columns in bus schedule

In [12]:
bus_random_passenger = pd.read_csv('data/passenger/decrease_bus_random_passenger.csv')
bus70_schedule = pd.read_csv('data/bus/free_70_schedule.csv')
bus70A_schedule = pd.read_csv('data/bus/free_70A_schedule.csv')
bus70B_schedule = pd.read_csv('data/bus/free_70B_schedule.csv')

bus70_schedule = bus70_schedule.rename(columns={'type': 'direction'})
bus70A_schedule = bus70A_schedule.rename(columns={'type': 'direction'})
bus70B_schedule = bus70B_schedule.rename(columns={'type': 'direction'})
 
bus70_schedule['bus_type'] = '70'
bus70A_schedule['bus_type'] = '70A'
bus70B_schedule['bus_type'] = '70B'

In [13]:
bus70_schedule.shape, bus70A_schedule.shape, bus70B_schedule.shape

((270, 11), (64, 11), (64, 11))

In [14]:
bus70_schedule = date_preprocessing(bus70_schedule)
bus70A_schedule = date_preprocessing(bus70A_schedule)
bus70B_schedule = date_preprocessing(bus70B_schedule)

In [15]:
bus_schedule = pd.concat([bus70_schedule, bus70A_schedule, bus70B_schedule], axis=0)
bus_schedule = bus_schedule.reset_index(drop=True)

In [16]:
start_day = bus_schedule.loc[0, '출발시간']
init_dt_lst = [start_day.year, start_day.month, start_day.day, 0, 0, 0]
init_dt = datetime(*init_dt_lst)
init_dt

datetime.datetime(2023, 6, 10, 0, 0)

### 4-2. passenger trip & timestamp

In [17]:
total_data = []
evaluation = {
    '70': {
        'waiting_time': 0,
        'walking_time': 0,
        'moving_time': 0,
    },
    '70A': {
        'waiting_time': 0,
        'walking_time': 0,
        'moving_time': 0,
    },
    '70B': {
        'waiting_time': 0,
        'walking_time': 0,
        'moving_time': 0,
    }
}

for idx, row in tqdm(bus_random_passenger.iterrows(), total=len(bus_random_passenger)):
    data = row.to_dict()
    
    bus_type = data['bus_type']
    start_time = data['start_time']
    start_station, end_station = data['start_station'], data['end_station']
    direction = data['direction']
    
    ### walking to start station ###
    start_lat, start_lon = data['start_lat'], data['start_lon']
    start_station_lat, start_station_lon = data['start_station_lat'], data['start_station_lon']
    
    routes, duration, distance = osrm_get_route((start_lon, start_lat, start_station_lon, start_station_lat), 'foot')
    timestamps = get_timestamp(routes, start_time, duration)
    evaluation[bus_type]['walking_time'] += duration
    
    total_data.append({
        'id': idx,
        'trip': list(map(lambda x: list(x[::-1]), routes)),
        'timestamp': timestamps
    })
    
    ### start bus ###
    ## start bus schedule filter
    cond1 = bus_schedule['출발정류장'] == start_station
    cond2 = bus_schedule['direction'] == direction
    cond3 = bus_schedule['bus_type'] == bus_type
    filtered_bus_schedule = bus_schedule.loc[cond1 & cond2 & cond3].copy()
    filtered_bus_schedule['start_time_min'] = filtered_bus_schedule['출발시간'].map(lambda x: (x - init_dt).total_seconds() / 60)
    filtered_bus_schedule = filtered_bus_schedule.sort_values('start_time_min')
    
    start_filtered_bus_schedule = filtered_bus_schedule[filtered_bus_schedule['start_time_min'] >= timestamps[-1]][:1].to_dict('records')[0]
    bus_id = start_filtered_bus_schedule['버스_id']
    evaluation[bus_type]['waiting_time'] += start_filtered_bus_schedule['start_time_min'] - timestamps[-1]
    
    ### end bus ###
    ## end bus schedule filter
    cond1 = bus_schedule['도착정류장'] == end_station
    cond2 = bus_schedule['direction'] == direction
    cond3 = bus_schedule['버스_id'] == bus_id
    cond4 = bus_schedule['bus_type'] == bus_type
    filtered_bus_schedule = bus_schedule.loc[cond1 & cond2 & cond3 & cond4].copy()
    filtered_bus_schedule['end_time_min'] = filtered_bus_schedule['도착시간'].map(lambda x: (x - init_dt).total_seconds() / 60)
    filtered_bus_schedule = filtered_bus_schedule.sort_values('end_time_min')
    
    end_filtered_bus_schedule = filtered_bus_schedule[filtered_bus_schedule['end_time_min'] >= timestamps[-1]][:1].to_dict('records')[0]
    
    ### walking to end destination ###
    start_time = end_filtered_bus_schedule['end_time_min']
    end_station_lat, end_station_lon = data['end_station_lat'], data['end_station_lon']
    end_lat, end_lon = data['end_lat'], data['end_lon']
    
    evaluation[bus_type]['moving_time'] += start_time - timestamps[-1]
    
    routes, duration, distance = osrm_get_route((end_station_lon, end_station_lat, end_lon, end_lat), 'foot')
    timestamps = get_timestamp(routes, start_time, duration)
    evaluation[bus_type]['walking_time'] += duration
    
    total_data.append({
        'id': idx,
        'trip': list(map(lambda x: list(x[::-1]), routes)),
        'timestamp': timestamps
    })

100%|██████████| 1973/1973 [00:11<00:00, 164.88it/s]


In [18]:
print(evaluation['70']['walking_time'] / 1973, evaluation['70']['waiting_time'] / 1973, evaluation['70']['moving_time'] / 1973)
print(evaluation['70A']['walking_time'] / 1973, evaluation['70A']['waiting_time'] / 1973, evaluation['70A']['moving_time'] / 1973)
print(evaluation['70B']['walking_time'] / 1973, evaluation['70B']['waiting_time'] / 1973, evaluation['70B']['moving_time'] / 1973)

1.270035478966039 2.5541290758574093 6.283618854536246
1.1749163710086152 1.391672579827674 5.349156952187871
1.0706994424733898 1.4784237202230095 5.914071633721907


In [None]:
with open('data/passenger/bus_passenger_trip.json', 'w') as f:
    json.dump(total_data, f)