In [None]:
import os
import zipfile
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# ========== 1. 解壓縮 full_data.zip ==========
zip_path = '/content/full_data.zip'
full_data_folder = '/content/data/full/'
os.makedirs(full_data_folder, exist_ok=True)

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(full_data_folder)

print(f"full_data.zip 已解壓縮到 {full_data_folder}")

# ========== 2. Sliding Window 參數設定 ==========
input_len = 6    # 使用過去6小時
predict_len = 3  # 預測未來3小時

feature_columns = ['StnPres', 'Temperature', 'RH', 'WS', 'Precp', 'Precp_3h', 'Precp_6h']
target_column = 'Precp'

# ========== 3. 開始遍歷每個完整檔案 ==========
X_list = []
Y_list = []

for file_name in os.listdir(full_data_folder):
    if not file_name.endswith('_full.csv'):
        continue

    file_path = os.path.join(full_data_folder, file_name)
    print(f"\n正在處理：{file_path}")

    try:
        df = pd.read_csv(file_path)

        # 確保時間排序
        df = df.sort_values('time').reset_index(drop=True)

        # 正規化處理（僅對特徵欄位）
        scaler = MinMaxScaler()
        df[feature_columns] = scaler.fit_transform(df[feature_columns])

        feature_data = df[feature_columns].values
        target_data = df[target_column].values

        total_length = len(df)

        # Sliding Window 切資料
        for i in range(total_length - input_len - predict_len + 1):
            x = feature_data[i:i+input_len]
            y = target_data[i+input_len:i+input_len+predict_len]
            X_list.append(x)
            Y_list.append(y)

    except Exception as e:
        print(f"錯誤處理 {file_name}：{e}")

# ========== 4. 切分訓練集與測試集 ==========
X = np.array(X_list)
Y = np.array(Y_list)

print(f"\n完成 sliding window 切割！")
print(f"X 形狀：{X.shape} （樣本數, 6小時, 特徵數）")
print(f"Y 形狀：{Y.shape} （樣本數, 3小時）")

# 切分資料集
X_train, X_test, Y_train, Y_test = train_test_split(
    X, Y, test_size=0.2, random_state=42, shuffle=True
)

print(f"\n切分結果：")
print(f"X_train: {X_train.shape}, Y_train: {Y_train.shape}")
print(f"X_test: {X_test.shape}, Y_test: {Y_test.shape}")

# ========== 5. 儲存成 Numpy 檔案 ==========
output_dir = '/content/data/processed/'
os.makedirs(output_dir, exist_ok=True)

np.save(os.path.join(output_dir, 'X_train.npy'), X_train)
np.save(os.path.join(output_dir, 'Y_train.npy'), Y_train)
np.save(os.path.join(output_dir, 'X_test.npy'), X_test)
np.save(os.path.join(output_dir, 'Y_test.npy'), Y_test)

print("\n成功儲存：")
print(f"- {output_dir}X_train.npy")
print(f"- {output_dir}Y_train.npy")
print(f"- {output_dir}X_test.npy")
print(f"- {output_dir}Y_test.npy")
