<a href="https://colab.research.google.com/github/NancyYiWang/WildFireSmokePrediction/blob/main/PredictModel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
!pip install rasterio

import os
import numpy as np
import xarray as xr
import rasterio
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, LSTM, Dense, Flatten, TimeDistributed, Concatenate

Collecting rasterio
  Downloading rasterio-1.4.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.1 kB)
Collecting affine (from rasterio)
  Downloading affine-2.4.0-py3-none-any.whl.metadata (4.0 kB)
Collecting cligj>=0.5 (from rasterio)
  Downloading cligj-0.7.2-py3-none-any.whl.metadata (5.0 kB)
Collecting click-plugins (from rasterio)
  Downloading click_plugins-1.1.1-py2.py3-none-any.whl.metadata (6.4 kB)
Downloading rasterio-1.4.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (22.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m22.2/22.2 MB[0m [31m63.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading cligj-0.7.2-py3-none-any.whl (7.1 kB)
Downloading affine-2.4.0-py3-none-any.whl (15 kB)
Downloading click_plugins-1.1.1-py2.py3-none-any.whl (7.5 kB)
Installing collected packages: cligj, click-plugins, affine, rasterio
Successfully installed affine-2.4.0 click-plugins-1.1.1 cligj-0.7.2 rasterio-1.4.3


In [5]:
# Build training data from GOES .nc files

goes_dir = "/content/drive/My Drive/WildFire/DATA/NOAA_GOES_R/001"
smoke_output_file = "/content/drive/My Drive/WildFire/DATA/Processed/GOES_20240601_smoke.nc"
temp_output_file = "/content/drive/My Drive/WildFire/DATA/Processed/GOES_20240601_temp.nc"

smoke_variables = [
    "MVFR_Fog_Prob",
    "IFR_Fog_Prob",
    "LIFR_Fog_Prob",
    "Fog_Depth",
]
temp_variables = [
    "Sfc_Temp_Bias",
]

def extract_timestamp_from_filename(filename):

    try:
        timestamp = filename.split('_s')[1][:12]
        return timestamp
    except IndexError:
        print(f"Error extracting timestamp from {filename}")
        return None

def filter_first_file_per_hour(nc_files):

    hourly_files = {}
    for nc_file in nc_files:
        timestamp = extract_timestamp_from_filename(nc_file)
        if not timestamp:
            continue
        hour = timestamp[8:10]
        if hour not in hourly_files:
            hourly_files[hour] = nc_file
    return list(hourly_files.values())

def process_goes_files(file_list, variables):

    datasets = []
    for file in file_list:
        with xr.open_dataset(file) as ds:
            selected_vars = {var: ds[var] for var in variables if var in ds.variables}
            datasets.append(xr.Dataset(selected_vars))

    combined_dataset = xr.concat(datasets, dim="time")
    return combined_dataset

def main():

    all_files = sorted([f for f in os.listdir(goes_dir) if f.endswith(".nc")])

    selected_files = filter_first_file_per_hour(all_files)
    selected_files = [os.path.join(goes_dir, f) for f in selected_files]
    print(f"GOES files to be used: {selected_files}")

    smoke_data = process_goes_files(selected_files, smoke_variables)
    smoke_data.to_netcdf(smoke_output_file)
    print(f"Smoke-related data has been saved to: {smoke_output_file}")

    temp_data = process_goes_files(selected_files, temp_variables)
    temp_data.to_netcdf(temp_output_file)
    print(f"Temperature-related data has been saved to: {temp_output_file}")

if __name__ == "__main__":
    main()

GOES files to be used: ['/content/drive/My Drive/WildFire/DATA/NOAA_GOES_R/001/ABI-L2-GFLSC-M6_v3r1_g18_s202406010001179_e202406010003552_c202406010005019.nc', '/content/drive/My Drive/WildFire/DATA/NOAA_GOES_R/001/ABI-L2-GFLSC-M6_v3r1_g18_s202406010101179_e202406010103552_c202406010104599.nc', '/content/drive/My Drive/WildFire/DATA/NOAA_GOES_R/001/ABI-L2-GFLSC-M6_v3r1_g18_s202406010201180_e202406010203553_c202406010204482.nc', '/content/drive/My Drive/WildFire/DATA/NOAA_GOES_R/001/ABI-L2-GFLSC-M6_v3r1_g18_s202406010301180_e202406010303553_c202406010304596.nc', '/content/drive/My Drive/WildFire/DATA/NOAA_GOES_R/001/ABI-L2-GFLSC-M6_v3r1_g18_s202406010401180_e202406010403553_c202406010404397.nc', '/content/drive/My Drive/WildFire/DATA/NOAA_GOES_R/001/ABI-L2-GFLSC-M6_v3r1_g18_s202406010501180_e202406010503553_c202406010504465.nc', '/content/drive/My Drive/WildFire/DATA/NOAA_GOES_R/001/ABI-L2-GFLSC-M6_v3r1_g18_s202406010601180_e202406010603553_c202406010605063.nc', '/content/drive/My Drive

ConnectionAbortedError: [Errno 103] Unable to synchronously open file (file read failed: time = Sun Jan 12 08:17:54 2025
, filename = '/content/drive/My Drive/WildFire/DATA/NOAA_GOES_R/001/ABI-L2-GFLSC-M6_v3r1_g18_s202406010201180_e202406010203553_c202406010204482.nc', file descriptor = 44, errno = 103, error message = 'Software caused connection abort', buf = 0x5890919f3580, total read size = 32, bytes this sub-read = 32, bytes actually read = 18446744073709551615, offset = 0)

In [None]:
# 加载 NetCDF 天气数据
def load_weather_data(nc_files, time_steps):
    weather_features = []
    for file in nc_files:
        ds = xr.open_dataset(file)
        feature = ds.to_array().values[:time_steps]  # 截取时间序列
        weather_features.append(feature)
    return np.stack(weather_features, axis=-1)  # 合并为多维数组

# 加载地形数据
def load_terrain_data(terrain_file):
    with rasterio.open(terrain_file) as src:
        terrain = src.read(1)  # 加载地形高程数据
    return terrain / np.max(terrain)  # 归一化

# 加载烟雾和温差数据
def load_smoke_and_temp(smoke_file, temp_file, time_steps):
    smoke_data = xr.open_dataset(smoke_file).to_array().values[:time_steps]
    temp_data = xr.open_dataset(temp_file).to_array().values[:time_steps]
    return smoke_data, temp_data

def build_model(input_shape_smoke, input_shape_weather, input_shape_terrain):
    # 烟雾输入（CNN）
    smoke_input = Input(shape=input_shape_smoke, name="Smoke_Input")
    x = TimeDistributed(Conv2D(32, (3, 3), activation="relu", padding="same"))(smoke_input)
    x = TimeDistributed(Conv2D(64, (3, 3), activation="relu", padding="same"))(x)
    x = TimeDistributed(Flatten())(x)

    # 天气输入（LSTM）
    weather_input = Input(shape=input_shape_weather, name="Weather_Input")
    w = LSTM(64, return_sequences=True)(weather_input)
    w = LSTM(64)(w)

    # 地形输入（Dense）
    terrain_input = Input(shape=input_shape_terrain, name="Terrain_Input")
    t = Dense(64, activation="relu")(terrain_input)

    # 融合
    combined = Concatenate()([x, w, t])
    out = Dense(np.prod(input_shape_smoke[1:]), activation="sigmoid")(combined)
    out = tf.reshape(out, (-1, *input_shape_smoke[1:]))  # 恢复到烟雾分布的空间形状

    return Model(inputs=[smoke_input, weather_input, terrain_input], outputs=out)

In [None]:
# File Directories
nc_files = ["/content/drive/My Drive/WildFire/DATA/NOAA_Climate/vwnd.2024.nc",
            "/content/drive/My Drive/WildFire/DATA/NOAA_Climate/air.2024.nc",
            "/content/drive/My Drive/WildFire/DATA/NOAA_Climate/hgt.2024.nc",
            "/content/drive/My Drive/WildFire/DATA/NOAA_Climate/omega.2024.nc",
            "/content/drive/My Drive/WildFire/DATA/NOAA_Climate/rhum.2024.nc",
            "/content/drive/My Drive/WildFire/DATA/NOAA_Climate/uwnd.2024.nc"]
terrain_file = "/content/drive/My Drive/WildFire/DATA/Terrain/terrain_broad_calgary.tiff"
smoke_file = "path/to/smoke.nc"
temp_file = "path/to/temp.nc"

# 数据预处理
time_steps = 288  # 每 5 分钟一次，一天 288 次
weather_data = load_weather_data(nc_files, time_steps)
terrain_data = load_terrain_data(terrain_file)
smoke_data, temp_data = load_smoke_and_temp(smoke_file, temp_file, time_steps)

# 数据维度配置
input_shape_smoke = (time_steps, smoke_data.shape[1], smoke_data.shape[2], 1)
input_shape_weather = (time_steps, weather_data.shape[-1])
input_shape_terrain = (terrain_data.size,)

# 构建模型
model = build_model(input_shape_smoke, input_shape_weather, input_shape_terrain)
model.compile(optimizer="adam", loss="mse", metrics=["mae"])

# 训练模型
history = model.fit(
    [smoke_data, weather_data, terrain_data],
    smoke_data[:, 1:],  # 预测下一时间点的烟雾
    epochs=10,
    batch_size=16,
)

In [None]:
# 输入最新时间点的数据
current_smoke = smoke_data[-1:]
current_weather = weather_data[-1:]
current_terrain = terrain_data

# 预测下一小时的烟雾状态
predicted_smoke = model.predict([current_smoke, current_weather, current_terrain])