In [1]:
import os
PATH = 'C:/Users/user/DataPreprocessing/ModelingDataset/data'
os.chdir(PATH)

In [2]:
import pandas as pd
import geopandas as gpd
from shapely.ops import nearest_points

### 1. geopandas 라이브러리 활용, ship 현재위치에서 가장 가까운 관측소 찾기

In [10]:
df_shiplog = pd.read_csv('./shiplog.csv',sep=',', encoding = 'cp949', index_col = 0) # ais정보로 바꾸기
df_obs = pd.read_csv('./obs.csv', encoding = 'cp949', sep=',')

In [11]:
df_shiplog

Unnamed: 0,insertTime,mmsi,speed,shipLat,shipLon
0,2022-11-30 16:33:05,371369000,17.1,37.264305,126.245270
1,2022-11-30 16:33:05,440022920,0.0,37.452236,126.610077
2,2022-11-30 16:33:05,235086166,9.7,37.004253,126.162865
3,2022-11-30 16:33:05,440022920,0.0,37.452225,126.610085
4,2022-11-30 16:33:05,441103000,5.3,37.327690,126.510292
...,...,...,...,...,...
293989,2022-12-30 00:50:02,538009716,11.8,37.445282,126.577316
293990,2022-12-30 00:50:04,538010048,2.8,37.339527,126.634224
293991,2022-12-30 00:50:05,441174000,11.7,37.188816,126.381104
293992,2022-12-30 00:50:07,538010048,2.8,37.339565,126.415787


In [12]:
df_obs

Unnamed: 0,obsId,obsName,obsLat,obsLon
0,DT_0065,덕적도,37.226333,126.156556
1,DT_0008,안산,37.192222,126.647222
2,DT_0043,영흥도,37.238611,126.428611
3,DT_0001,인천,37.451944,126.592222
4,DT_0052,인천송도,37.338056,126.586111
5,DT_0050,태안,36.913056,126.238889
6,DT_0002,평택,36.966944,126.822778


In [13]:
# 위치 데이터를 geopandas로 바꾸기
shiplog = gpd.GeoDataFrame(df_shiplog, geometry=gpd.points_from_xy(df_shiplog['shipLat'], df_shiplog['shipLon']))
obs = gpd.GeoDataFrame(df_obs, geometry=gpd.points_from_xy(df_obs['obsLat'], df_obs['obsLon']))

In [15]:
# geopandas로 바꾼 데이터 위도,경도를 geometry point 로 변환
# 두 getodataFrame의 epsg를 설정해준다
shiplog.set_crs(epsg = 4326, inplace = True)
obs.set_crs(epsg = 4326, inplace = True)

Unnamed: 0,obsId,obsName,obsLat,obsLon,geometry
0,DT_0065,덕적도,37.226333,126.156556,POINT (37.22633 126.15656)
1,DT_0008,안산,37.192222,126.647222,POINT (37.19222 126.64722)
2,DT_0043,영흥도,37.238611,126.428611,POINT (37.23861 126.42861)
3,DT_0001,인천,37.451944,126.592222,POINT (37.45194 126.59222)
4,DT_0052,인천송도,37.338056,126.586111,POINT (37.33806 126.58611)
5,DT_0050,태안,36.913056,126.238889,POINT (36.91306 126.23889)
6,DT_0002,평택,36.966944,126.822778,POINT (36.96694 126.82278)


In [16]:
# 가장 가까운 좌표 찾는 함수
def get_nearest_values(row, other_gdf, point_column='geometry', value_column="geometry"):
    """Find the nearest point and return the corresponding value from specified value column."""
    
    # Create an union of the other GeoDataFrame's geometries:
    other_points = other_gdf["geometry"].unary_union
    
    # Find the nearest points
    nearest_geoms = nearest_points(row[point_column], other_points)
    
    # Get corresponding values from the other df
    nearest_data = other_gdf.loc[other_gdf["geometry"] == nearest_geoms[1]]
    
    nearest_value = nearest_data[value_column].values[0]
    
    return nearest_value

# 멀티포인트: shapely의 nearest_points처럼 사용하기 위해서
# unary_union: geometry 데이터의 합집합
unary_union = obs.unary_union

In [18]:
# get_nearest_values 함수를 적용시켜 가장 가까운 관측소 좌표 출력하기
# ais["가장가까운관측소"] = ais.apply(get_nearest_values, other_gdf=obs, point_column="geometry", axis=1)

# 가장 가까운 관측소의 이름으로 바꿔 출력하기
shiplog["nearestObs"] = shiplog.apply(get_nearest_values, other_gdf=obs, point_column="geometry", value_column="obsName", axis=1)

In [19]:
shiplog

Unnamed: 0,insertTime,mmsi,speed,shipLat,shipLon,geometry,nearestObs
0,2022-11-30 16:33:05,371369000,17.1,37.264305,126.245270,POINT (37.26431 126.24527),덕적도
1,2022-11-30 16:33:05,440022920,0.0,37.452236,126.610077,POINT (37.45224 126.61008),인천
2,2022-11-30 16:33:05,235086166,9.7,37.004253,126.162865,POINT (37.00425 126.16286),태안
3,2022-11-30 16:33:05,440022920,0.0,37.452225,126.610085,POINT (37.45222 126.61008),인천
4,2022-11-30 16:33:05,441103000,5.3,37.327690,126.510292,POINT (37.32769 126.51029),인천송도
...,...,...,...,...,...,...,...
293989,2022-12-30 00:50:02,538009716,11.8,37.445282,126.577316,POINT (37.44528 126.57732),인천
293990,2022-12-30 00:50:04,538010048,2.8,37.339527,126.634224,POINT (37.33953 126.63422),인천송도
293991,2022-12-30 00:50:05,441174000,11.7,37.188816,126.381104,POINT (37.18882 126.38110),영흥도
293992,2022-12-30 00:50:07,538010048,2.8,37.339565,126.415787,POINT (37.33957 126.41579),영흥도


In [21]:
# 추출 데이터 csv에 저장
shiplog.to_csv('./shiplog_obs.csv', encoding = 'cp949', sep=",")