TEMPERATURE:

In [1]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from keras.models import Sequential
from keras.layers import Dense, LSTM
from tensorflow.keras.callbacks import EarlyStopping
import xgboost as xgb
from sklearn.model_selection import RandomizedSearchCV

def create_dataset(dataset, look_back=72):
    dataX, dataY = [], []
    for i in range(len(dataset) - look_back):
        dataX.append(dataset[i:(i + look_back), 0])
        dataY.append(dataset[i + look_back, 0])
    return np.array(dataX), np.array(dataY)

# 读取数据
data = pd.read_csv("H_20_latest-2024-2025.csv", sep=";")
selected_columns = ['AAAAMMJJHH', 'T', 'RR1', 'NUM_POSTE', 'LAT', 'LON', 'N', 'FF', 'U', 'PSTAT']  
df = data[selected_columns].copy()

# 处理时间格，选取云量 N、风速 FF、湿度 U、检测点气压 PSTAT、温度 T、降水量 RR1来预测降水率RR3（数据没有RR3）?
df['AAAAMMJJHH'] = pd.to_datetime(df['AAAAMMJJHH'], format='%Y%m%d%H')
df.rename(columns={'AAAAMMJJHH': 'date', 'T': 'temperature', 'RR1': 'rainfall', 
                   'N': 'cloud_cover', 'FF': 'wind_speed', 'U': 'humidity', 'PSTAT': 'pressure'}, inplace=True)

# 选择站点数据
df = df.loc[df['NUM_POSTE'] == 20004002]
df.set_index('date', inplace=True)
df.interpolate(method='linear', inplace=True)  # 填充缺失值

# 数据归一化
scaler_temp = MinMaxScaler()
df['temperature_scaled'] = scaler_temp.fit_transform(df[['temperature']])
scaler_rain = MinMaxScaler()
df['rainfall_scaled'] = scaler_rain.fit_transform(df[['rainfall']])
scaler_humidity = MinMaxScaler()
df['humidity_scaled'] = scaler_humidity.fit_transform(df[['humidity']])
scaler_wind = MinMaxScaler()
df['wind_speed_scaled'] = scaler_wind.fit_transform(df[['wind_speed']])
scaler_pressure = MinMaxScaler()
df['pressure_scaled'] = scaler_pressure.fit_transform(df[['pressure']])

data_scaled = df[['temperature_scaled']].values
X, y = create_dataset(data_scaled, look_back=72)
X = X.reshape((X.shape[0], X.shape[1], 1))

# Train LSTM Model (Optimized)
trainX, testX, trainY, testY = train_test_split(X, y, test_size=0.2, random_state=42)
model = Sequential([
    LSTM(50, input_shape=(X.shape[1], X.shape[2])),
    Dense(1)
])
model.compile(optimizer='adam', loss='mean_squared_error')
early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True, verbose=1)
model.fit(trainX, trainY, epochs=50, batch_size=64, validation_data=(testX, testY), verbose=2, callbacks=[early_stop])

2025-03-27 17:28:22.911850: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1743092902.986373   46199 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1743092903.008564   46199 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-27 17:28:23.155843: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-03-27 17:28:45.437049: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL

Epoch 1/50
129/129 - 9s - 67ms/step - loss: 0.0224 - val_loss: 0.0090
Epoch 2/50
129/129 - 5s - 42ms/step - loss: 0.0071 - val_loss: 0.0046
Epoch 3/50
129/129 - 5s - 40ms/step - loss: 0.0028 - val_loss: 0.0022
Epoch 4/50
129/129 - 5s - 39ms/step - loss: 0.0020 - val_loss: 0.0017
Epoch 5/50
129/129 - 5s - 39ms/step - loss: 0.0017 - val_loss: 0.0014
Epoch 6/50
129/129 - 5s - 41ms/step - loss: 0.0015 - val_loss: 0.0012
Epoch 7/50
129/129 - 5s - 40ms/step - loss: 0.0014 - val_loss: 0.0014
Epoch 8/50
129/129 - 5s - 41ms/step - loss: 0.0012 - val_loss: 0.0011
Epoch 9/50
129/129 - 5s - 40ms/step - loss: 0.0011 - val_loss: 9.4826e-04
Epoch 10/50
129/129 - 5s - 39ms/step - loss: 9.8032e-04 - val_loss: 8.0796e-04
Epoch 11/50
129/129 - 5s - 41ms/step - loss: 9.7601e-04 - val_loss: 7.4899e-04
Epoch 12/50
129/129 - 5s - 41ms/step - loss: 9.0079e-04 - val_loss: 7.7158e-04
Epoch 13/50
129/129 - 5s - 42ms/step - loss: 8.6419e-04 - val_loss: 9.0265e-04
Epoch 14/50
129/129 - 5s - 39ms/step - loss: 8.416

<keras.src.callbacks.history.History at 0x7f64a3f8ae90>

PLUIE:

In [2]:
import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE  # 解决类别不均衡
import matplotlib.pyplot as plt

# 读取数据
data = pd.read_csv("H_20_latest-2024-2025.csv", sep=";")
selected_columns = ['AAAAMMJJHH', 'T', 'RR1', 'NUM_POSTE', 'LAT', 'LON', 'N', 'FF', 'U', 'PSTAT']
df = data[selected_columns].copy()

# 处理时间
df['AAAAMMJJHH'] = pd.to_datetime(df['AAAAMMJJHH'], format='%Y%m%d%H')
df.rename(columns={'AAAAMMJJHH': 'date', 'T': 'temperature', 'RR1': 'rainfall', 
                   'N': 'cloud_cover', 'FF': 'wind_speed', 'U': 'humidity', 'PSTAT': 'pressure'}, inplace=True)

# 选择特定站点
df = df.loc[df['NUM_POSTE'] == 20004002]
df.set_index('date', inplace=True)
df.interpolate(method='linear', inplace=True)  # 线性填充缺失值

# **新增特征**（过去3小时累积降水量）
df['rainfall_3h'] = df['rainfall'].rolling(window=3, min_periods=1).sum()

# 数据归一化
scaler = MinMaxScaler()
for col in ['temperature', 'rainfall', 'cloud_cover', 'wind_speed', 'humidity', 'pressure', 'rainfall_3h']:
    df[f'{col}_scaled'] = scaler.fit_transform(df[[col]])

# **构建二分类目标变量**
df['rain_binary'] = (df['rainfall'] > 0).astype(int)  # 0 = 无降水，1 = 有降水

# **特征选择**
features = df[['temperature_scaled', 'cloud_cover_scaled', 'wind_speed_scaled', 
               'humidity_scaled', 'pressure_scaled', 'rainfall_3h_scaled']].values
target = df['rain_binary'].values  # 目标变量（降水 or 无降水）

# **解决类别不均衡**
smote = SMOTE(sampling_strategy=0.5, random_state=42)  # 生成少数类别样本
X_resampled, y_resampled = smote.fit_resample(features, target)

# **拆分数据集**
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# **优化 XGBoost 分类器**
xgb_model = xgb.XGBClassifier(
    scale_pos_weight=5,   # 调整类别权重
    colsample_bytree=0.8,
    learning_rate=0.02,  # 降低学习率
    max_depth= 8,     
    n_estimators=100,    # 增加迭代次数
    subsample=0.8,
    random_state=42
)

# 训练模型
xgb_model.fit(X_train, y_train)

# 预测
y_pred_prob = xgb_model.predict_proba(X_test)[:, 1]  # 获取降水概率
y_pred = (y_pred_prob > 0.4).astype(int)  # **降低阈值，提高降水预测**