## 0. 라이브러리 불러오기

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM

## 1. 데이터 불러오기

In [2]:
# 데이터 읽기
df = pd.read_csv('data/data_drifter.csv')

# 데이터 분할
X = df[['lons', 'lats', 'uo', 'vo', 'u10', 'v10']]
y = df[['next_lons', 'next_lats']]

# train, test 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#############################
# 2. 모델 구성하기
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(X_train.shape[1], 1)))
model.add(Dense(2))
model.compile(optimizer='adam', loss='mse')

# 모델 구조 확인
model.summary()

#############################
# 3. 학습 및 평가하기
# reshape input to be 3D [samples, timesteps, features]
X_train = X_train.values.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.values.reshape((X_test.shape[0], X_test.shape[1], 1))

# 학습
model.fit(X_train, y_train, epochs=200, validation_data=(X_test, y_test), verbose=1)

# 평가
loss = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Loss: {loss:.4f}")


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 50)                10400     
                                                                 
 dense (Dense)               (None, 2)                 102       
                                                                 
Total params: 10502 (41.02 KB)
Trainable params: 10502 (41.02 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/

In [3]:
# # 과적합 방지하는 코드 추가

# # 1. 데이터 불러오기
# # 데이터 읽기
# df = pd.read_csv('data/data_drifter.csv')

# # 데이터 분할
# X = df[['time', 'lons', 'lats', 'uo', 'vo', 'u10', 'v10', 'next_time']]
# y = df[['next_lons', 'next_lats']]

# # train, test 분할
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# # 2. 모델 구성하기
# model = Sequential()
# model.add(LSTM(50, activation='relu', input_shape=(X_train.shape[1], 1), dropout=0.2))  # Dropout 추가
# model.add(Dense(2))
# model.compile(optimizer='adam', loss='mse')

# # 모델 구조 확인
# model.summary()

# #############################
# # 3. 학습 및 평가하기
# # reshape input to be 3D [samples, timesteps, features]
# X_train = X_train.values.reshape((X_train.shape[0], X_train.shape[1], 1))
# X_test = X_test.values.reshape((X_test.shape[0], X_test.shape[1], 1))

# # Early Stopping 콜백 정의
# early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)  # 10 epoch 동안 개선이 없으면 중단

# # 학습 (Early Stopping 추가)
# model.fit(X_train, y_train, epochs=200, validation_data=(X_test, y_test), verbose=1, callbacks=[early_stopping])

# # 평가
# loss = model.evaluate(X_test, y_test, verbose=0)
# print(f"Test Loss: {loss:.4f}")

# 학습된 모델로 경로 예측하기

## 0. 라이브러리 불러오기

In [4]:
import pandas as pd
import numpy as np
import xarray as xr
from datetime import datetime, timedelta

## 1. 데이터 불러오기
    ### 1-1. 초기값 가져오기

In [5]:
# 데이터 읽기
drifter = pd.read_csv("data/drifter_2016.csv")

# 시간 변환 함수
def convert_to_nearest_6hour(year, month, day, hour, minute):
    # 데이터 값을 정수형으로 변환
    year = int(year)
    month = int(month)
    day = int(day)
    hour = int(hour)
    minute = int(minute)

    dt = datetime(year, month, day, hour, minute)
    base_dt = datetime(2016, 1, 1, 0, 0)
    delta = dt - base_dt
    
    hours_since_base = delta.total_seconds() / 3600
    nearest_6hour = round(hours_since_base / 6) * 6

    result_time = np.datetime64(base_dt + timedelta(hours=nearest_6hour))

    return nearest_6hour, result_time

# 결과를 저장할 리스트 생성
results = []

# .nc 파일 읽기
dataset_sea = xr.open_dataset('interpolated_sea_16.nc', engine='netcdf4')
dataset_wind = xr.open_dataset('interpolated_wind_16.nc', engine='netcdf4')
# 각 시작 지점에 대한 처리
for index, row in drifter.iterrows():
    time, result_time = convert_to_nearest_6hour(row['Year'], row['Month'], row['Day'], row['Hour'], row['Minute'])
    next_time = time + 6
    
    # uo, vo 값을 가져옴
    uo_value = dataset_sea['uo'].sel(time=result_time, longitude=row['Longitude'], latitude=row['Latitude'], method='nearest').item()
    vo_value = dataset_sea['vo'].sel(time=result_time, longitude=row['Longitude'], latitude=row['Latitude'], method='nearest').item()

    # u10, v10 값을 가져옴
    u10_value = dataset_wind['u10'].sel(time=result_time, longitude=row['Longitude'], latitude=row['Latitude'], method='nearest').item()
    v10_value = dataset_wind['v10'].sel(time=result_time, longitude=row['Longitude'], latitude=row['Latitude'], method='nearest').item()

    # 결과 저장
    results.append({
        'time': time,
        'lons': row['Longitude'],
        'lats': row['Latitude'],
        'uo': uo_value,
        'vo': vo_value,
        'u10': u10_value,
        'v10': v10_value,
        'next_time': next_time
    })

dataset_sea.close()
dataset_wind.close()

# 리스트를 DataFrame으로 변환
result_df = pd.DataFrame(results)

In [6]:
result_df

Unnamed: 0,time,lons,lats,uo,vo,u10,v10,next_time
0,1740,127.071833,32.507367,-0.042594,0.16361,-3.698855,-5.244993,1746
1,1794,126.615817,33.759317,0.111659,0.156357,-1.98905,-1.670222,1800
2,1884,129.281933,34.949633,-0.039718,0.288225,-6.011956,-6.245036,1890
3,1884,129.2124,34.986667,-0.04038,0.272089,-4.94286,-5.878523,1890
4,4722,127.074467,32.516267,0.049979,0.304935,-2.538096,6.621501,4728
5,4788,129.297233,34.92175,0.203083,0.389139,1.072325,2.255833,4794
6,4824,129.398283,37.552583,0.025877,0.015028,-3.240538,1.653777,4830


## 2. 모델로 예측하기

In [7]:
# 각 시작 지점에서의 100일간 예측
predict_duration = 400  # 100일 * 4 (하루에 6시간 단위로 4번 예측)

# 예측 결과를 저장할 DataFrame
predictions_df = pd.DataFrame(columns=['time', 'lons', 'lats', 'uo', 'vo', 'u10', 'v10', 'next_time', 'next_lons', 'next_lats'])

# .nc 파일 읽기
dataset_sea = xr.open_dataset('interpolated_sea_16.nc', engine='netcdf4')
dataset_wind = xr.open_dataset('interpolated_wind_16.nc', engine='netcdf4')

# 각 시작 지점 별로 for loop
for index, row in result_df.iterrows():
    print(f"Predicting for index {index}...")

    # 초기값 불러오기
    current_time = row['time']
    current_lon = row['lons']
    current_lat = row['lats']
    uo_value = row['uo']
    vo_value = row['vo']
    u10_value = row['u10']
    v10_value = row['v10']
    next_time = row['next_time']
    print(f"inital value load for index {index}...")
    print(f"result : {current_time}, {current_lon}, {current_lat}, {uo_value}, {vo_value}, {u10_value}, {v10_value}, {next_time}")
    predictions_list = []

    # 100일간 예측
    for _ in range(predict_duration):
        print(f"predicting for {_}", end="")
        # 모델에 입력값을 넣어 다음 위치를 예측합니다.
        input_data = np.array([[current_lon, current_lat, uo_value, vo_value, u10_value, v10_value]])
        input_data = input_data.reshape((input_data.shape[0], input_data.shape[1], 1))
        prediction = model.predict(input_data)
        
        # 예측된 위치를 업데이트
        predicted_lon = prediction[0][0]
        predicted_lat = prediction[0][1]

        print(f"result : {current_time}, {current_lon}, {current_lat}, {uo_value}, {vo_value}, {u10_value}, {v10_value}, {next_time}, {predicted_lon}, {predicted_lat}", end="")

        # 값 중에 nan이 있는지 확인
        values_to_check = [current_time, current_lon, current_lat, uo_value, vo_value, u10_value, v10_value, next_time, predicted_lon, predicted_lat]
        if any(np.isnan(value) for value in values_to_check):
            print("Encountered a NaN value. Stopping the prediction loop.")
            break

        predictions_list.append({
            'time': current_time,
            'lons': current_lon,
            'lats': current_lat,
            'uo': uo_value,
            'vo': vo_value,
            'u10': u10_value,
            'v10': v10_value,
            'next_time': next_time,
            'next_lons': predicted_lon,
            'next_lats': predicted_lat
        })

        
        # 다음 반복을 위한 현재 위치와 시간 업데이트
        current_lon = predicted_lon
        current_lat = predicted_lat
        current_time += 6 # 6시간 뒤로 이동
        next_time += 6 # 6시간 뒤로 이동
        
        # 위도 경도 값을 가져오기 위한 시간 변환
        base_dt = datetime(2016, 1, 1, 0, 0)
        search_time = np.datetime64(base_dt + timedelta(hours=current_time))

        # 현재 위치와 시간을 기반으로 uo, vo, u10, v10 값을 가져옵니다.
        uo_value = dataset_sea['uo'].sel(time=search_time, longitude=current_lon, latitude=current_lat, method='nearest').item()
        vo_value = dataset_sea['vo'].sel(time=search_time, longitude=current_lon, latitude=current_lat, method='nearest').item()
        u10_value = dataset_wind['u10'].sel(time=search_time, longitude=current_lon, latitude=current_lat, method='nearest').item()
        v10_value = dataset_wind['v10'].sel(time=search_time, longitude=current_lon, latitude=current_lat, method='nearest').item()
    # 리스트를 DataFrame으로 변환
    predictions_df = pd.DataFrame(predictions_list)
    predictions_df.to_csv(f"predictions_{index}.csv", index=False)
    print(f"Predictions for index {index} completed.")

dataset_sea.close()
dataset_wind.close()

Predicting for index 0...
inital value load for index 0...
result : 1740.0, 127.07183333333332, 32.50736666666667, -0.042593646328896284, 0.16361024975776672, -3.69885516166687, -5.244993209838867, 1746.0
result : 2064.0, 130.76556396484375, 34.232383728027344, nan, nan, 5.684497356414795, 3.0162715911865234, 2070.0, nan, nanEncountered a NaN value. Stopping the prediction loop.
Predictions for index 0 completed.
Predicting for index 1...
inital value load for index 1...
result : 1794.0, 126.61581666666666, 33.75931666666666, 0.11165913939476013, 0.15635666251182556, -1.9890496730804443, -1.6702220439910889, 1800.0
result : 1926.0, 129.3450927734375, 34.348392486572266, nan, nan, -3.60064435005188, -4.589128494262695, 1932.0, nan, nanEncountered a NaN value. Stopping the prediction loop.
Predictions for index 1 completed.
Predicting for index 2...
inital value load for index 2...
result : 1884.0, 129.28193333333334, 34.94963333333333, -0.03971831128001213, 0.28822505474090576, -6.01195