#### 1. 데이터 로드

In [3]:
import pandas as pd

speed_data = pd.read_csv('../dataset/predicted_speed.csv')
collision_data = pd.read_csv('../dataset/collision.csv')
sensor_location_data = pd.read_csv('../dataset/graph_sensor_locations.csv')

#### 2. 데이터 전처리

2-1. 시간 형식 통일

In [4]:
speed_data['datetime'] = pd.to_datetime(speed_data['Date Occurred'])
collision_data['datetime'] = pd.to_datetime(
    collision_data['Date Occurred'] + ' ' + collision_data['Time Occurred'], format='%Y-%m-%d %H:%M'
)

2-2. 센서 ID 매핑

In [5]:
from geopy.distance import geodesic

def find_nearest_sensor(collision_lat, collision_lon, sensor_data):
    collision_location = (collision_lat, collision_lon)
    distances = sensor_data.apply(
        lambda row: geodesic(collision_location, (row['latitude'], row['longitude'])).meters, axis=1
    )
    nearest_sensor = sensor_data.loc[distances.idxmin()]
    return nearest_sensor['sensor_id'], distances.min()

collision_data[['nearest_sensor_id', 'distance_to_sensor']] = collision_data.apply(
    lambda row: find_nearest_sensor(row['latitude'], row['longitude'], sensor_location_data),
    axis=1, result_type='expand'
)

#### 3. 데이터 분석

3-1. 사고 전후 평균 속도 및 표준 편차 계산

In [6]:
def get_speed_changes(collision_row, speed_data):
    sensor_id = str(int(collision_row['nearest_sensor_id']))
    if sensor_id in speed_data.columns:
        sensor_col = sensor_id
        accident_time = collision_row['datetime']

        time_window = speed_data[
            (speed_data['datetime'] >= accident_time - pd.Timedelta(hours=1)) &
            (speed_data['datetime'] <= accident_time + pd.Timedelta(hours=1))
        ]
        speeds = time_window[sensor_col].dropna() if sensor_col in time_window.columns else None

        if speeds is not None and len(speeds) > 0:
            return speeds.mean(), speeds.std()
    return None, None

collision_data[['mean_speed', 'speed_std_dev']] = collision_data.apply(
    lambda row: get_speed_changes(row, speed_data), axis=1, result_type='expand'
)

3-2. 사고 전후 속도 변화 계산

In [7]:
def calculate_speed_change(collision_row, speed_data):
    sensor_id = str(int(collision_row['nearest_sensor_id']))
    if sensor_id in speed_data.columns:
        sensor_col = sensor_id
        accident_time = collision_row['datetime']

        pre_accident = speed_data[
            (speed_data['datetime'] >= accident_time - pd.Timedelta(hours=1)) &
            (speed_data['datetime'] < accident_time)
        ]
        post_accident = speed_data[
            (speed_data['datetime'] > accident_time) &
            (speed_data['datetime'] <= accident_time + pd.Timedelta(hours=1))
        ]

        if sensor_col in pre_accident.columns and sensor_col in post_accident.columns:
            pre_speed = pre_accident[sensor_col].dropna()
            post_speed = post_accident[sensor_col].dropna()

            if len(pre_speed) > 0 and len(post_speed) > 0:
                return pre_speed.mean(), post_speed.mean(), post_speed.mean() - pre_speed.mean()
    return None, None, None

collision_data[['pre_speed_mean', 'post_speed_mean', 'speed_change']] = collision_data.apply(
    lambda row: calculate_speed_change(row, speed_data), axis=1, result_type='expand'
)

#### 4. 분석 결과 요약

In [8]:
import numpy as np

def calculate_speed_change(collision_row, speed_data):
    sensor_id = str(int(collision_row['nearest_sensor_id']))
    if sensor_id in speed_data.columns:
        sensor_col = sensor_id
        accident_time = collision_row['datetime']

        pre_accident = speed_data[
            (speed_data['datetime'] >= accident_time - pd.Timedelta(hours=1)) &
            (speed_data['datetime'] < accident_time)
        ]

        post_accident = speed_data[
            (speed_data['datetime'] > accident_time) &
            (speed_data['datetime'] <= accident_time + pd.Timedelta(hours=1))
        ]

        if sensor_col in pre_accident.columns and sensor_col in post_accident.columns:
            pre_speed = pre_accident[sensor_col].dropna()
            post_speed = post_accident[sensor_col].dropna()

            if len(pre_speed) > 0 and len(post_speed) > 0:
                # Calculate mean speeds and their difference
                pre_mean = pre_speed.mean()
                post_mean = post_speed.mean()
                return pre_mean, post_mean, post_mean - pre_mean
    return None, None, None

collision_data[['pre_speed_mean', 'post_speed_mean', 'speed_change']] = collision_data.apply(
    lambda row: calculate_speed_change(row, speed_data), axis=1, result_type='expand'
)

speed_change_summary = collision_data[['pre_speed_mean', 'post_speed_mean', 'speed_change']].describe()

output_file_path = '../dataset/collision_predicted_speed_data.csv'
collision_data[['pre_speed_mean', 'post_speed_mean', 'speed_change']].to_csv(output_file_path, index=False)