In [4]:
#Enable API here: https://console.cloud.google.com/apis/api/earthengine.googleapis.com
import ee

# Trigger the authentication flow.
ee.Authenticate()

# Initialize the library.
ee.Initialize(project='ee-hungweipan2-1') #put your own project id

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [7]:
# Gathering data
class FirePredictionModel:
    def __init__(self, roi_name, country_name):
        self.roi = ee.FeatureCollection("FAO/GAUL/2015/level1") \
                    .filter(ee.Filter.eq('ADM1_NAME', roi_name)) \
                    .filter(ee.Filter.eq('ADM0_NAME', country_name))

    def process_monthly_data(self, year, month):
        start_date = ee.Date.fromYMD(year, month, 1)
        end_date = start_date.advance(1, 'month')

        # Fire data (label)
        fire = ee.ImageCollection('MODIS/061/MOD14A1') \
            .filterBounds(self.roi) \
            .filterDate(start_date, end_date) \
            .max() \
            .select('FireMask') \
            .expression("(b('FireMask') == 7 || b('FireMask') == 8 || b('FireMask') == 9) ? 1 : 0") \
            .rename('FireOccurred') \
            .toFloat()

        # NDVI data
        ndvi = ee.ImageCollection('MODIS/061/MOD13A2') \
            .filterBounds(self.roi) \
            .filterDate(start_date, end_date) \
            .max() \
            .select('NDVI') \
            .multiply(0.0001) \
            .rename('NDVI')

        # Weather data
        weather = ee.ImageCollection('ECMWF/ERA5_LAND/DAILY_AGGR') \
            .filterBounds(self.roi) \
            .filterDate(start_date, end_date)

        max_temp = weather.select('temperature_2m').max().subtract(273.15).rename('MaxTemp')
        min_soil_moisture = weather.select('volumetric_soil_water_layer_1').min().rename('MinSoilMoisture')

        max_wind_speed = weather.map(lambda img: img.expression(
            'sqrt(u**2 + v**2)', {
                'u': img.select('u_component_of_wind_10m'),
                'v': img.select('v_component_of_wind_10m')
            }
        ).rename('wind_speed')).max().rename('max_wind_speed')

        # Combine image
        combined = fire \
            .addBands(ndvi) \
            .addBands(max_temp) \
            .addBands(min_soil_moisture) \
            .addBands(max_wind_speed)
        return combined

    def collect_training_data(self, years, months, scale=1000, geometries=False):
        training_data = []
        for year in years:
            for month in months:
                image = self.process_monthly_data(year, month)
                samples = image.sample(
                    region=self.roi.geometry(),
                    scale=scale,
                    geometries=geometries
                )
                training_data.append(samples)
        return ee.FeatureCollection(training_data).flatten()


    def export_training_data(self, training_data, description, folder, file_format='CSV'):
        task = ee.batch.Export.table.toDrive(
        collection=training_data,
        description=description,
        folder=folder,
        fileFormat=file_format
        )
        task.start()

# ------------------ 2015-2023 Training Data ------------------ #
# Initialize the model
model = FirePredictionModel(roi_name='Alberta', country_name='Canada')

# Define years and months
years = list(range(2015, 2024))
months = list(range(5, 11))

# Collect training data
training_data = model.collect_training_data(years, months)

# Export training data to Google Drive
model.export_training_data(training_data, description='TrainingData2015_2023', folder = 'Training Data')

print("Traning data (2015-2023) exporting...")



# ------------------ 2015-2024 Training Data ------------------ #
# Initialize the model
model = FirePredictionModel(roi_name='Alberta', country_name='Canada')

# Define years and months
years = list(range(2015, 2025))
months = list(range(5, 11))

# Collect training data
training_data = model.collect_training_data(years, months)

# Export training data to Google Drive
model.export_training_data(training_data, description='TrainingData2015_2024', folder = 'Training Data')

print("Traning data (2015-2024) exporting...")

# ------------------ 2024 Testing Data ------------------ #
# Initialize the model
model = FirePredictionModel(roi_name='Alberta', country_name='Canada')

# Define years and months
years = list(range(2024, 2025))
months = list(range(5,11))

# Collect training data
training_data = model.collect_training_data(years, months, scale=1000,geometries=True)


# Export training data to Google Drive
model.export_training_data(training_data, description='TestingData2024', folder = 'Training Data')

print("Testing data (2024) exporting...")
print("Check tasks status: https://code.earthengine.google.com/tasks")


Traning data (2015-2023) exporting...
Traning data (2015-2024) exporting...
Testing data (2024) exporting...


In [5]:
# Downsample
import pandas as pd

def downsample_csv(input_csv, output_csv, downsample_factor=0.1):
    """
    Downsample non-fire samples in the dataset to balance the data.

    Parameters:
    - input_csv (str): Path to the input CSV file.
    - output_csv (str): Path to save the downsampled CSV file.
    - downsample_factor (float): Fraction of non-fire samples to retain (e.g., 0.1 means 10%).

    Returns:
    - None: Saves the downsampled dataset to the specified output path.
    """
    # Load the original CSV
    data = pd.read_csv(input_csv)

    # Separate fire and non-fire samples
    fire_data = data[data['FireOccurred'] == 1]
    non_fire_data = data[data['FireOccurred'] == 0]

    # Downsample non-fire samples
    non_fire_downsampled = non_fire_data.sample(frac=downsample_factor, random_state=42)

    # Combine fire samples and downsampled non-fire samples
    combined_data = pd.concat([fire_data, non_fire_downsampled])

    # Save the downsampled dataset
    combined_data.to_csv(output_csv, index=False)
    print(f"Downsampled dataset saved to {output_csv}")

#Perform downsampling on the training dataset
#2015-2023 data
downsample_csv(
    input_csv='/content/drive/MyDrive/Training Data/TrainingData2015_2023.csv',
    output_csv='/content/drive/MyDrive/Training Data/TrainingData2015_2023_downsampled.csv',
    downsample_factor=0.1
)

#2015-2024 data
downsample_csv(
    input_csv='/content/drive/MyDrive/Training Data/TrainingData2015_2024.csv',
    output_csv='/content/drive/MyDrive/Training Data/TrainingData2015_2024_downsampled.csv',
    downsample_factor=0.1
)


Downsampled dataset saved to /content/drive/MyDrive/Training Data/TrainingData2015_2023_downsampled.csv
Downsampled dataset saved to /content/drive/MyDrive/Training Data/TrainingData2015_2024_downsampled.csv


In [7]:
file_path_train = '/content/drive/MyDrive/Training Data/TrainingData2015_2023_downsampled.csv'
train_data = pd.read_csv(file_path_train)

print(train_data.head())


  system:index  FireOccurred   MaxTemp  MinSoilMoisture    NDVI  \
0      0_52814           1.0  21.22248         0.278349  0.5257   
1      0_52815           1.0  21.22248         0.278349  0.4591   
2      0_52816           1.0  21.22248         0.278349  0.4591   
3      0_52817           1.0  21.22248         0.278349  0.6517   
4      0_52818           1.0  21.22248         0.278349  0.6517   

   max_wind_speed                                    .geo  
0         2.25131  {"type":"MultiPoint","coordinates":[]}  
1         2.25131  {"type":"MultiPoint","coordinates":[]}  
2         2.25131  {"type":"MultiPoint","coordinates":[]}  
3         2.25131  {"type":"MultiPoint","coordinates":[]}  
4         2.25131  {"type":"MultiPoint","coordinates":[]}  


In [6]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from sklearn.metrics import confusion_matrix
from joblib import dump


# ------------------ Model 1: 2015-2023 Data, Predicting 2024 ------------------ #
# Load data (2024)
file_path_test = '/content/drive/MyDrive/Training Data/TestingData2024.csv'

feature_columns = ['NDVI', 'MaxTemp', 'MinSoilMoisture', 'max_wind_speed']
label_column = 'FireOccurred'

test_data = pd.read_csv(file_path_test)
X_test = test_data[feature_columns]
y_test = test_data[label_column]


# Load training data (2015-2023)
file_path_train = '/content/drive/MyDrive/Training Data/TrainingData2015_2023_downsampled.csv'
train_data = pd.read_csv(file_path_train)

feature_columns = ['NDVI', 'MaxTemp', 'MinSoilMoisture', 'max_wind_speed']
label_column = 'FireOccurred'

X_train = train_data[feature_columns]
y_train = train_data[label_column]


# Train the Random Forest classifier
rf_classifier_1 = RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced')
rf_classifier_1.fit(X_train, y_train)

# Save the model
model_file_path_1 = '/content/drive/MyDrive/Training Data/rf_fire_model_2015_2023_with_downsampling.joblib'
dump(rf_classifier_1, model_file_path_1)
print(f"Model 1 saved to: {model_file_path_1}")

# Load 2024 data for prediction
file_path_test = '/content/drive/MyDrive/Training Data/TestingData2024.csv'
test_data = pd.read_csv(file_path_test)

X_test = test_data[feature_columns]
y_test = test_data[label_column]

# Predict on 2024 data
y_pred_1 = rf_classifier_1.predict(X_test)  # Binary output
y_pred_proba_1 = rf_classifier_1.predict_proba(X_test)[:, 1]  # Continuous output (Probability, between 1 and 0)

# Export predictions for 2024 data
test_data['FireProbability'] = y_pred_proba_1
output_file_path_1 = '/content/drive/MyDrive/Training Data/TestingData2024_with_predictions.csv'
test_data.to_csv(output_file_path_1, index=False)
print(f"Model 1 prediction saved to: {output_file_path_1}")

# ------------------ Model 2: 70/30 Split on 2015-2023 Data ------------------ #
# Reload training data (2015-2024)
file_path_train = '/content/drive/MyDrive/Training Data/TrainingData2015_2024_downsampled.csv'
data = pd.read_csv(file_path_train)

feature_columns = ['NDVI', 'MaxTemp', 'MinSoilMoisture', 'max_wind_speed']
label_column = 'FireOccurred'

train_data = pd.read_csv(file_path_train)
X_train = train_data[feature_columns]
y_train = train_data[label_column]

# Split data into train (70%) and test (30%)
X_train_split, X_test_split, y_train_split, y_test_split = train_test_split(X_train, y_train, test_size=0.3, random_state=42)

# Train the Random Forest classifier on 70% of the data
rf_classifier_2 = RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced')
rf_classifier_2.fit(X_train, y_train_split)

# Save the model
model_file_path_2 = '/content/drive/MyDrive/Training Data/rf_fire_model_70_30_split_with_downsampling.joblib'
dump(rf_classifier_2, model_file_path_2)
print(f"Model 2 saved to: {model_file_path_2}")

# Predict on the 30% test set
y_pred_2 = rf_classifier_2.predict(X_test_split)
y_pred_proba_2 = rf_classifier_2.predict_proba(X_test_split)[:, 1]

# Export predictions for the 30% test data
test_data_split = pd.DataFrame(X_test_split, columns=feature_columns)
test_data_split['Actual'] = y_test_split
test_data_split['Predicted'] = y_pred_2
test_data_split['FireProbability'] = y_pred_proba_2

output_file_path_2 = '/content/drive/MyDrive/Training Data/TrainingData_70_30_split_with_predictions.csv'
test_data_split.to_csv(output_file_path_2, index=False)
print(f"Model 2 prediction saved to: {output_file_path_2}")

KeyError: "['max_temp', 'min_soil_moisture'] not in index"

In [6]:
!pip install rasterio

Collecting rasterio
  Downloading rasterio-1.4.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.1 kB)
Collecting affine (from rasterio)
  Downloading affine-2.4.0-py3-none-any.whl.metadata (4.0 kB)
Collecting cligj>=0.5 (from rasterio)
  Downloading cligj-0.7.2-py3-none-any.whl.metadata (5.0 kB)
Collecting click-plugins (from rasterio)
  Downloading click_plugins-1.1.1-py2.py3-none-any.whl.metadata (6.4 kB)
Downloading rasterio-1.4.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (22.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m22.2/22.2 MB[0m [31m64.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading cligj-0.7.2-py3-none-any.whl (7.1 kB)
Downloading affine-2.4.0-py3-none-any.whl (15 kB)
Downloading click_plugins-1.1.1-py2.py3-none-any.whl (7.5 kB)
Installing collected packages: cligj, click-plugins, affine, rasterio
Successfully installed affine-2.4.0 click-plugins-1.1.1 cligj-0.7.2 rasterio-1.4.3


In [None]:
# Convert CSV to Raster (GeoTIFF)
!pip install rasterio
import pandas as pd
import geopandas as gpd
from shapely.geometry import shape
from rasterio.features import rasterize
import rasterio
import numpy as np
import json

# Load CSV
file_path = '/content/drive/MyDrive/Training Data/TestingData2024_fireseason_with_predictions.csv'
df = pd.read_csv(file_path)

# Convert the .geo field to spatial points
df['geometry'] = df['.geo'].apply(lambda x: shape(json.loads(x)))  # Use json.loads to correctly parse the GeoJSON string
gdf = gpd.GeoDataFrame(df, geometry='geometry', crs="EPSG:4326")

# Set the output GeoTIFF file name
output_tiff_path = '/content/drive/MyDrive/Training Data/TestingData2024_fireseason_with_predictions.tif'

# Set resolution to 1000 meters (approximately 0.009 degrees)
resolution = 0.009  # Resolution (unit: degrees)

# Create rasterized data
bounds = gdf.total_bounds  # Get bounds (minx, miny, maxx, maxy)
transform = rasterio.transform.from_bounds(*bounds,
                                           width=int((bounds[2]-bounds[0])/resolution),
                                           height=int((bounds[3]-bounds[1])/resolution))

# Set the output image size
out_shape = (
    int((bounds[3] - bounds[1]) / resolution),  # Number of rows
    int((bounds[2] - bounds[0]) / resolution)   # Number of columns
)

# Rasterize
raster = rasterize(
    ((geom, value) for geom, value in zip(gdf.geometry, gdf['FireProbability'])),
    out_shape=out_shape,
    transform=transform,
    fill=0,
    all_touched=True,
    dtype='float32'
)

# Save as GeoTIFF
with rasterio.open(
    output_tiff_path,
    'w',
    driver='GTiff',
    height=raster.shape[0],
    width=raster.shape[1],
    count=1,
    dtype='float32',
    crs="EPSG:4326",
    transform=transform,
) as dst:
    dst.write(raster, 1)

print(f"GeoTIFF saved at: {output_tiff_path}")
