In [None]:
#Enable API here: https://console.cloud.google.com/apis/api/earthengine.googleapis.com
import ee

# Trigger the authentication flow.
ee.Authenticate()

# Initialize the library.
ee.Initialize(project='ee-hungweipan2-1')

*** Earth Engine *** Share your feedback by taking our Annual Developer Satisfaction Survey: https://google.qualtrics.com/jfe/form/SV_0JLhFqfSY1uiEaW?source=Init


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
class FirePredictionModel:
    def __init__(self, roi_name, country_name):
        self.roi = ee.FeatureCollection("FAO/GAUL/2015/level1") \
                    .filter(ee.Filter.eq('ADM1_NAME', roi_name)) \
                    .filter(ee.Filter.eq('ADM0_NAME', country_name))

    def process_monthly_data(self, year, month):
        start_date = ee.Date.fromYMD(year, month, 1)
        end_date = start_date.advance(1, 'month')

        # Fire data (label)
        fire = ee.ImageCollection('MODIS/061/MOD14A1') \
            .filterBounds(self.roi) \
            .filterDate(start_date, end_date) \
            .max() \
            .select('FireMask') \
            .expression("(b('FireMask') == 7 || b('FireMask') == 8 || b('FireMask') == 9) ? 1 : 0") \
            .rename('FireOccurred') \
            .toFloat()

        # NDVI data
        ndvi = ee.ImageCollection('MODIS/061/MOD13A2') \
            .filterBounds(self.roi) \
            .filterDate(start_date, end_date) \
            .mean() \
            .select('NDVI') \
            .multiply(0.0001) \
            .toFloat()

        # Weather data
        weather = ee.ImageCollection('ECMWF/ERA5_LAND/DAILY_AGGR') \
            .filterBounds(self.roi) \
            .filterDate(start_date, end_date) \
            .mean() \
            .select([
                'temperature_2m',
                'dewpoint_temperature_2m',
                'u_component_of_wind_10m',
                'v_component_of_wind_10m'
            ])
        wind_speed = weather.expression(
            'sqrt(u**2 + v**2)', {
                'u': weather.select('u_component_of_wind_10m'),
                'v': weather.select('v_component_of_wind_10m')
            }
        ).rename('wind_speed')

        relative_humidity = weather.expression(
            '100 * (6.11 * pow(10, (7.5 * dew) / (237.7 + dew))) / (6.11 * pow(10, (7.5 * temp) / (237.7 + temp)))', {
                'temp': weather.select('temperature_2m').subtract(273.15),  # Convert to Celsius
                'dew': weather.select('dewpoint_temperature_2m').subtract(273.15)  # Convert to Celsius
              }
            ).rename('relative_humidity')


        # DEM data
        dem = ee.ImageCollection('NRCan/CDEM').mosaic().clip(self.roi).toFloat().rename('DEM')

        # Combine image
        combined = fire \
            .addBands(ndvi.rename('NDVI')) \
            .addBands(weather.select(['temperature_2m']).subtract(273.15).rename(['temp'])) \
            .addBands(relative_humidity) \
            .addBands(wind_speed) \
            .addBands(dem)
        return combined

    def collect_training_data(self, years, months, scale=10000,geometries=False):
        training_data = []
        for year in years:
            for month in months:
                image = self.process_monthly_data(year, month)
                samples = image.sample(
                    region=self.roi.geometry(),
                    scale=scale,
                    geometries=False
                )
                training_data.append(samples)
        return ee.FeatureCollection(training_data).flatten()


    def export_training_data(self, training_data, description, folder, file_format='CSV'):
        task = ee.batch.Export.table.toDrive(
        collection=training_data,
        description=description,
        folder=folder,
        fileFormat=file_format
        )
        task.start()

# Initialize the model
model = FirePredictionModel(roi_name='Alberta', country_name='Canada')

# Define years and months
years = list(range(2015, 2024))
months = list(range(5, 11))

# Collect training data
training_data = model.collect_training_data(years, months)

# Export training data to Google Drive
model.export_training_data(training_data, description='TrainingData2015_2023_fireseason', folder = 'wildfire_prediction')

print("Traning data exporting...")


# Initialize the model
model = FirePredictionModel(roi_name='Alberta', country_name='Canada')

# Define years and months
years = list(range(2024, 2025))
months = list(range(5,11))

# Collect training data
training_data = model.collect_training_data(years, months, scale=10000,geometries=True)


# Export training data to Google Drive
model.export_training_data(training_data, description='TrainingData2024_fireseason', folder = 'wildfire_prediction')

print("Traning Model exporting...")



In [None]:
import pandas as pd

# Load data (2015-2023)
file_path_train = '/content/drive/MyDrive/Training Data/TrainingData2015_2023_fireseason.csv'
data = pd.read_csv(file_path_train)

feature_columns = ['NDVI', 'temp', 'relative_humidity', 'wind_speed', 'DEM']
label_column = 'FireOccurred'

train_data = pd.read_csv(file_path_train)
X_train = train_data[feature_columns]
y_train = train_data[label_column]

# Load data (2024)
file_path_test = '/content/drive/MyDrive/Training Data/TrainingData2024_fireseason.csv'

feature_columns = ['NDVI', 'temp', 'relative_humidity', 'wind_speed', 'DEM']
label_column = 'FireOccurred'

test_data = pd.read_csv(file_path_test)
X_test = test_data[feature_columns]
y_test = test_data[label_column]


import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from sklearn.metrics import confusion_matrix
from joblib import dump

# ------------------ Model 1: 2015-2023 Data, Predicting 2024 ------------------ #
# Load training data (2015-2023)
file_path_train = '/content/drive/MyDrive/Training Data/TrainingData2015_2023_fireseason.csv'
train_data = pd.read_csv(file_path_train)

feature_columns = ['NDVI', 'temp', 'relative_humidity', 'wind_speed', 'DEM']
label_column = 'FireOccurred'

X_train = train_data[feature_columns]
y_train = train_data[label_column]

# Downsampling: RandomUnderSampler
undersampler = RandomUnderSampler(random_state=42)
X_train_resampled, y_train_resampled = undersampler.fit_resample(X_train, y_train)

# Train the Random Forest classifier
rf_classifier_1 = RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced')
rf_classifier_1.fit(X_train_resampled, y_train_resampled)

# Save the model
model_file_path_1 = '/content/drive/MyDrive/Training Data/rf_fire_model_2015_2023_with_downsampling.joblib'
dump(rf_classifier_1, model_file_path_1)
print(f"Model 1 saved to: {model_file_path_1}")

# Load 2024 data for prediction
file_path_test = '/content/drive/MyDrive/Training Data/TrainingData2024_fireseason.csv'
test_data = pd.read_csv(file_path_test)

X_test = test_data[feature_columns]
y_test = test_data[label_column]

# Predict on 2024 data
y_pred_1 = rf_classifier_1.predict(X_test)  # Binary output
y_pred_proba_1 = rf_classifier_1.predict_proba(X_test)[:, 1]  # Continuous output (Probability, between 1 and 0)

# Export predictions for 2024 data
test_data['FireProbability'] = y_pred_proba_1
output_file_path_1 = '/content/drive/MyDrive/Training Data/TrainingData2024_fireseason_with_predictions.csv'
test_data.to_csv(output_file_path_1, index=False)
print(f"Model 1 prediction saved to: {output_file_path_1}")

# ------------------ Model 2: 70/30 Split on 2015-2023 Data ------------------ #
# Split data into train (70%) and test (30%)
X_train_split, X_test_split, y_train_split, y_test_split = train_test_split(X_train, y_train, test_size=0.3, random_state=42)

# Downsampling on the training data
X_train_split_resampled, y_train_split_resampled = undersampler.fit_resample(X_train_split, y_train_split)

# Train the Random Forest classifier on 70% of the data
rf_classifier_2 = RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced')
rf_classifier_2.fit(X_train_split_resampled, y_train_split_resampled)

# Save the model
model_file_path_2 = '/content/drive/MyDrive/Training Data/rf_fire_model_70_30_split_with_downsampling.joblib'
dump(rf_classifier_2, model_file_path_2)
print(f"Model 2 saved to: {model_file_path_2}")

# Predict on the 30% test set
y_pred_2 = rf_classifier_2.predict(X_test_split)
y_pred_proba_2 = rf_classifier_2.predict_proba(X_test_split)[:, 1]

# Export predictions for the 30% test data
test_data_split = pd.DataFrame(X_test_split, columns=feature_columns)
test_data_split['Actual'] = y_test_split
test_data_split['Predicted'] = y_pred_2
test_data_split['FireProbability'] = y_pred_proba_2

output_file_path_2 = '/content/drive/MyDrive/Training Data/TrainingData_70_30_split_with_predictions.csv'
test_data_split.to_csv(output_file_path_2, index=False)
print(f"Model 2 prediction saved to: {output_file_path_2}")