In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import matplotlib.pyplot as plt

# 데이터 읽기
df = pd.read_csv('data/data_drifter.csv')

# 데이터 분할
X = df[['time', 'lons', 'lats', 'uo', 'vo', 'u10', 'v10', 'next_time']]
y = df[['next_lons', 'next_lats']]

# 데이터 정규화
scaler_x = MinMaxScaler()
scaler_y = MinMaxScaler()
X_scaled = scaler_x.fit_transform(X)
y_scaled = scaler_y.fit_transform(y)

# K-Fold 교차 검증
kf = KFold(n_splits=5, shuffle=True, random_state=None)  # 5-fold 교차 검증

for train_index, test_index in kf.split(X_scaled):
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y_scaled[train_index], y_scaled[test_index]

    model = Sequential()
    model.add(Dense(2, activation='linear', input_shape=(X_train.shape[1],)))
    model.compile(optimizer='adam', loss='mae')

    # 모델 구조 확인
    model.summary()

    # 과적합 방지하는 코드 추가
    from tensorflow.keras.callbacks import EarlyStopping

    # EarlyStopping 콜백 설정
    early_stop = EarlyStopping(monitor='val_loss', patience=10, verbose=1, restore_best_weights=True)

    # 에포크별 MSE 기록
    history = model.fit(X_train, y_train, epochs=200, validation_data=(X_test, y_test), verbose=1, callbacks=[early_stop])

    # MSE 값 그래프로 표시
    train_mse = history.history['loss']
    val_mse = history.history['val_loss']

    plt.figure(figsize=(10, 6))
    plt.plot(train_mse, label='Train MSE')
    plt.plot(val_mse, label='Validation MSE')
    plt.xlabel('Epoch')
    plt.ylabel('MSE')
    plt.title('Training and Validation MSE')
    plt.legend()
    plt.show()

    # 평가
    test_loss = model.evaluate(X_test, y_test, verbose=1)
    print(f"Test Loss: {test_loss:.4f}")

    # 마지막 epoch에서의 검증데이터 예측 및 실제 값 표시
    y_pred = model.predict(X_test)

    # 예측 결과 복원 (정규화를 원래 스케일로 되돌림)
    y_pred_restored = scaler_y.inverse_transform(y_pred)
    y_test_restored = scaler_y.inverse_transform(y_test)

    # Scatter plot으로 마지막 epoch의 검증 데이터 실제 값과 예측값 표시
    plt.figure(figsize=(10, 6))
    plt.scatter(y_test_restored[:, 1], y_test_restored[:, 0], label='Actual', c='blue', marker='o')
    plt.scatter(y_pred_restored[:, 1], y_pred_restored[:, 0], label='Predicted', c='red', marker='x')
    plt.xlabel('lons')
    plt.ylabel('lats')
    plt.title('Last Epoch - Actual vs. Predicted')
    plt.legend()
    plt.show()


Model: "sequential_15"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_15 (Dense)            (None, 2)                 18        
                                                                 
Total params: 18 (72.00 Byte)
Trainable params: 18 (72.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200