## 0. 라이브러리 불러오기

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

## 1. 데이터 불러오기

In [2]:
# 데이터 읽기
df = pd.read_csv('data/data_drifter.csv')

In [4]:
# 데이터 분할
X = df[['time', 'lons', 'lats', 'uo', 'vo', 'u10', 'v10', 'next_time']]
y = df[['next_lons', 'next_lats']]

In [8]:
# 데이터 정규화
scaler_x = MinMaxScaler()
scaler_y = MinMaxScaler()
X_scaled = scaler_x.fit_transform(X)
y_scaled = scaler_y.fit_transform(y)

In [9]:
# train, test 분할
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)

## 2. 모델 구성하기

In [10]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM

model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(X_train.shape[1], 1)))
model.add(Dense(2))
model.compile(optimizer='adam', loss='mse')

# 모델 구조 확인
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 50)                10400     
                                                                 
 dense (Dense)               (None, 2)                 102       
                                                                 
Total params: 10502 (41.02 KB)
Trainable params: 10502 (41.02 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


## 3. 학습 및 평가하기

In [11]:
# reshape input to be 3D [samples, timesteps, features]
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

# 학습
model.fit(X_train, y_train, epochs=200, validation_data=(X_test, y_test), verbose=1)

# 평가
loss = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Loss: {loss:.4f}")


Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

In [None]:
# 과적합 방지하는 코드 추가

from tensorflow.keras.callbacks import EarlyStopping

# EarlyStopping 콜백 설정
early_stop = EarlyStopping(monitor='val_loss', patience=10, verbose=1, restore_best_weights=True)

# reshape input to be 3D [samples, timesteps, features]
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

# 학습
model.fit(X_train, y_train, epochs=200, validation_data=(X_test, y_test), verbose=1, callbacks=[early_stop])

# 평가
loss = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Loss: {loss:.4f}")


# 학습된 모델로 경로 예측하기

## 0. 라이브러리 불러오기

In [13]:
import pandas as pd
import numpy as np
import xarray as xr
from datetime import datetime, timedelta

## 1. 데이터 불러오기
    ### 1-1. 초기값 가져오기

In [14]:
# 데이터 읽기
drifter = pd.read_csv("data/drifter_2016.csv")

In [31]:
# 시간 변환 함수
def convert_to_nearest_6hour(year, month, day, hour, minute):
    # 데이터 값을 정수형으로 변환
    year = int(year)
    month = int(month)
    day = int(day)
    hour = int(hour)
    minute = int(minute)

    dt = datetime(year, month, day, hour, minute)
    base_dt = datetime(2016, 1, 1, 0, 0)
    delta = dt - base_dt
    
    hours_since_base = delta.total_seconds() / 3600
    nearest_6hour = round(hours_since_base / 6) * 6

    result_time = np.datetime64(base_dt + timedelta(hours=nearest_6hour))

    return nearest_6hour, result_time

In [36]:
# 결과를 저장할 리스트 생성
results = []

In [37]:
# 각 시작 지점에 대한 처리
for index, row in drifter.iterrows():
    time, result_time = convert_to_nearest_6hour(row['Year'], row['Month'], row['Day'], row['Hour'], row['Minute'])
    next_time = time + 6

    dataset_sea = xr.open_dataset('interpolated_sea_16.nc', engine='netcdf4')
    dataset_wind = xr.open_dataset('interpolated_wind_16.nc', engine='netcdf4')
    
    # uo, vo 값을 가져옴
    uo_value = dataset_sea['uo'].sel(time=result_time, longitude=row['Longitude'], latitude=row['Latitude'], method='nearest').item()
    vo_value = dataset_sea['vo'].sel(time=result_time, longitude=row['Longitude'], latitude=row['Latitude'], method='nearest').item()

    # u10, v10 값을 가져옴
    u10_value = dataset_wind['u10'].sel(time=result_time, longitude=row['Longitude'], latitude=row['Latitude'], method='nearest').item()
    v10_value = dataset_wind['v10'].sel(time=result_time, longitude=row['Longitude'], latitude=row['Latitude'], method='nearest').item()

    # 결과 저장
    results.append({
        'time': time,
        'lons': row['Longitude'],
        'lats': row['Latitude'],
        'uo': uo_value,
        'vo': vo_value,
        'u10': u10_value,
        'v10': v10_value,
        'next_time': next_time
    })

In [38]:
# 리스트를 DataFrame으로 변환
result_df = pd.DataFrame(results)

In [40]:
result_df

Unnamed: 0,time,lons,lats,uo,vo,u10,v10,next_time
0,1740,127.071833,32.507367,-0.042594,0.16361,-3.698855,-5.244993,1746
1,1794,126.615817,33.759317,0.111659,0.156357,-1.98905,-1.670222,1800
2,1884,129.281933,34.949633,-0.039718,0.288225,-6.011956,-6.245036,1890
3,1884,129.2124,34.986667,-0.04038,0.272089,-4.94286,-5.878523,1890
4,4722,127.074467,32.516267,0.049979,0.304935,-2.538096,6.621501,4728
5,4788,129.297233,34.92175,0.203083,0.389139,1.072325,2.255833,4794
6,4824,129.398283,37.552583,0.025877,0.015028,-3.240538,1.653777,4830


## 2. 모델로 예측하기

In [None]:
# 각 시작 지점에서의 100일간 예측
predict_duration = 400  # 100일 * 4 (하루에 6시간 단위로 4번 예측)

# 예측 결과를 저장할 DataFrame
predictions_df = pd.DataFrame(columns=['start_lons', 'start_lats', 'predicted_lons', 'predicted_lats', 'time'])

for index, row in result_df.iterrows():
    current_lon = row['lons']
    current_lat = row['lats']
    current_time = row['time']

    for _ in range(predict_duration):
        # 현재 위치와 시간을 기반으로 uo, vo, u10, v10 값을 가져옵니다.
        uo_value = dataset_sea['uo'].sel(time=current_time, longitude=current_lon, latitude=current_lat, method='nearest').item()
        vo_value = dataset_sea['vo'].sel(time=current_time, longitude=current_lon, latitude=current_lat, method='nearest').item()
        u10_value = dataset_wind['u10'].sel(time=current_time, longitude=current_lon, latitude=current_lat, method='nearest').item()
        v10_value = dataset_wind['v10'].sel(time=current_time, longitude=current_lon, latitude=current_lat, method='nearest').item()

        # 모델에 입력값을 넣어 다음 위치를 예측합니다.
        input_data = np.array([[uo_value, vo_value, u10_value, v10_value]])
        input_data = input_data.reshape((input_data.shape[0], input_data.shape[1], 1))
        prediction = model.predict(input_data)
        
        # 예측된 위치를 업데이트
        predicted_lon = prediction[0][0]
        predicted_lat = prediction[0][1]

        predictions_df = predictions_df.append({
            'start_lons': current_lon,
            'start_lats': current_lat,
            'predicted_lons': predicted_lon,
            'predicted_lats': predicted_lat,
            'time': current_time
        }, ignore_index=True)

        # 다음 반복을 위한 현재 위치와 시간 업데이트
        current_lon = predicted_lon
        current_lat = predicted_lat
        current_time += 6  # 6시간 뒤로 이동

print(predictions_df)
