In [43]:
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings("ignore")

## Load Data

In [44]:
# Load the data
fire_data = pd.read_csv('input_data/processed/fire_data.csv')
cell_static_data = pd.read_csv('input_data/processed/cell_static.csv')
cell_dynamic_data = pd.read_csv('input_data/processed/cell_dynamic.csv')

## Preprocess Data

In [45]:
# Only keep data in the date range from 2015-01-01 to 2022-02-23, i.e., pre-war data
fire_data['ACQ_DATE'] = pd.to_datetime(fire_data['ACQ_DATE'])
cell_dynamic_data['ACQ_DATE'] = pd.to_datetime(cell_dynamic_data['ACQ_DATE'])
fire_data = fire_data[(fire_data['ACQ_DATE'] >= '2015-01-01') & (fire_data['ACQ_DATE'] <= '2022-02-23')]
cell_dynamic_data = cell_dynamic_data[(cell_dynamic_data['ACQ_DATE'] >= '2015-01-01') & (cell_dynamic_data['ACQ_DATE'] <= '2022-02-23')]

In [46]:
# Copy the fire data for later use
fire_data_copy = fire_data[['FIRE_ID', 'GRID_CELL_50KM', 'ACQ_DATE', 'LONGITUDE', 'LATITUDE']].copy()

In [47]:
# Drop irrelevant columns
fire_data.drop(columns=['FIRE_ID', 'LATITUDE', 'LONGITUDE', 'GRID_CELL', 'OBLAST_ID', 
                        'LATITUDE_1KM', 'LONGITUDE_1KM', 'GRID_CELL_1KM', 'OBLAST_ID_1KM', 
                        'FIRE_COUNT_CELL_1KM',], inplace=True)
# Drop duplicates
fire_data.drop_duplicates(inplace=True)
# Reset index
fire_data.reset_index(drop=True, inplace=True)
fire_data.head()

Unnamed: 0,ACQ_DATE,DAY_OF_YEAR,LATITUDE_50KM,LONGITUDE_50KM,GRID_CELL_50KM,OBLAST_ID_50KM,FIRE_COUNT_CELL_50KM
0,2015-01-01,1,47.0,37.5,47.0_37.5,UA14,4
1,2015-01-02,2,50.5,28.5,50.5_28.5,UA18,1
2,2015-01-03,3,48.0,33.5,48.0_33.5,UA12,1
3,2015-01-03,3,48.5,35.0,48.5_35.0,UA12,1
4,2015-01-04,4,49.0,24.5,49.0_24.5,UA26,1


In [48]:
def merge_static_data(data, static_data, resolution='50KM'):
    # Drop all columns in the static data that are not relevant for the specific resolution
    static_data = static_data[[col for col in static_data.columns if col.endswith(resolution)]]
    # Drop all duplicates
    static_data.drop_duplicates(inplace=True)
    # Merge the fire data with the static data
    merged_data = pd.merge(data, static_data, how='left', on=['GRID_CELL_50KM', 'OBLAST_ID_50KM', 'LATITUDE_50KM', 'LONGITUDE_50KM'])
    return merged_data

In [49]:
# Merge the fire data with the static data
fire_data_processed = merge_static_data(fire_data, cell_static_data)
fire_data_processed.head()

Unnamed: 0,ACQ_DATE,DAY_OF_YEAR,LATITUDE_50KM,LONGITUDE_50KM,GRID_CELL_50KM,OBLAST_ID_50KM,FIRE_COUNT_CELL_50KM,POP_DENSITY_50KM,LAND_USE_CLASS_1.0_50KM,LAND_USE_CLASS_2.0_50KM,LAND_USE_CLASS_3.0_50KM,LAND_USE_CLASS_4.0_50KM,LAND_USE_CLASS_0_50KM
0,2015-01-01,1,47.0,37.5,47.0_37.5,UA14,4,235.527153,0.015863,0.041639,0.333374,0.096907,0.0
1,2015-01-02,2,50.5,28.5,50.5_28.5,UA18,1,135.458196,0.313123,0.114159,0.477092,0.084996,0.0
2,2015-01-03,3,48.0,33.5,48.0_33.5,UA12,1,235.527153,0.034811,0.148977,0.644517,0.138799,0.0
3,2015-01-03,3,48.5,35.0,48.5_35.0,UA12,1,235.527153,0.073142,0.086862,0.548732,0.222136,0.0
4,2015-01-04,4,49.0,24.5,49.0_24.5,UA26,1,235.527153,0.388293,0.058082,0.397541,0.139557,0.0


In [50]:
def generate_fire_time_series(data, start_date, end_date, resolution='50KM'):
    time_series_data = {}
    # Iterate over all cells in the grid_cell column
    for cell in data['GRID_CELL_{}'.format(resolution)].unique():
        # Filter the data for the specific cell
        cell_data = data[data['GRID_CELL_{}'.format(resolution)] == cell]
        # Save the static data from all columns except the ACQ_DATE, DAY_OF_YEAR, and FIRE_COUNT_CELL columns
        static_data = cell_data.iloc[0].drop(['ACQ_DATE', 'DAY_OF_YEAR', 'FIRE_COUNT_CELL_{}'.format(resolution)])
        # Set ACQ_DATE as the index and reindex with the complete date range
        cell_data.set_index('ACQ_DATE', inplace=True)
        cell_data.index = pd.to_datetime(cell_data.index)
        cell_data = cell_data.reindex(pd.date_range(start=start_date, end=end_date, freq='D'), fill_value=0)
        # Override the DAY_OF_YEAR column with the correct values
        cell_data['DAY_OF_YEAR'] = cell_data.index.dayofyear
        # Override the ACQ_DATE column with the correct values
        cell_data['ACQ_DATE'] = cell_data.index
        cell_data.reset_index(drop=True, inplace=True)
        cell_data = cell_data[['ACQ_DATE'] + [col for col in cell_data.columns if col != 'ACQ_DATE']]
        # Override the columns with the static data
        for col in static_data.index:
            cell_data[col] = static_data[col]
        # Save the data
        time_series_data[cell] = cell_data
    # Merge the time series data into a single DataFrame
    time_series_data = pd.concat(time_series_data.values())
    return time_series_data

In [51]:
# Create a date range from 2015-01-01 to 2022-02-23, i.e., pre-war data
fire_data_processed = generate_fire_time_series(fire_data_processed, '2015-01-01', '2022-02-23', '50KM')
fire_data_processed.head()

Unnamed: 0,ACQ_DATE,DAY_OF_YEAR,LATITUDE_50KM,LONGITUDE_50KM,GRID_CELL_50KM,OBLAST_ID_50KM,FIRE_COUNT_CELL_50KM,POP_DENSITY_50KM,LAND_USE_CLASS_1.0_50KM,LAND_USE_CLASS_2.0_50KM,LAND_USE_CLASS_3.0_50KM,LAND_USE_CLASS_4.0_50KM,LAND_USE_CLASS_0_50KM
0,2015-01-01,1,47.0,37.5,47.0_37.5,UA14,4,235.527153,0.015863,0.041639,0.333374,0.096907,0.0
1,2015-01-02,2,47.0,37.5,47.0_37.5,UA14,0,235.527153,0.015863,0.041639,0.333374,0.096907,0.0
2,2015-01-03,3,47.0,37.5,47.0_37.5,UA14,0,235.527153,0.015863,0.041639,0.333374,0.096907,0.0
3,2015-01-04,4,47.0,37.5,47.0_37.5,UA14,0,235.527153,0.015863,0.041639,0.333374,0.096907,0.0
4,2015-01-05,5,47.0,37.5,47.0_37.5,UA14,0,235.527153,0.015863,0.041639,0.333374,0.096907,0.0


In [52]:
def merge_dynamic_data(data, dynamic_data):
    # Merge the fire data with the dynamic data
    merged_data = pd.merge(data, dynamic_data, how='left', left_on=['OBLAST_ID_50KM', 'ACQ_DATE'], right_on=['OBLAST_ID', 'ACQ_DATE'])
    # Drop the OBLAST_ID column
    merged_data.drop(columns=['OBLAST_ID'], inplace=True)
    return merged_data

In [53]:
# Merge the fire data with the dynamic data
fire_data_processed = merge_dynamic_data(fire_data_processed, cell_dynamic_data)
fire_data_processed.head()

Unnamed: 0,ACQ_DATE,DAY_OF_YEAR,LATITUDE_50KM,LONGITUDE_50KM,GRID_CELL_50KM,OBLAST_ID_50KM,FIRE_COUNT_CELL_50KM,POP_DENSITY_50KM,LAND_USE_CLASS_1.0_50KM,LAND_USE_CLASS_2.0_50KM,LAND_USE_CLASS_3.0_50KM,LAND_USE_CLASS_4.0_50KM,LAND_USE_CLASS_0_50KM,TEMPERATURE_2M_MAX (°C),TEMPERATURE_2M_MIN (°C),TEMPERATURE_2M_MEAN (°C),RAIN_SUM (MM),SNOWFALL_SUM (CM),WIND_DIRECTION_10M_DOMINANT (°),CLOUD_COVER (%)
0,2015-01-01,1,47.0,37.5,47.0_37.5,UA14,4,235.527153,0.015863,0.041639,0.333374,0.096907,0.0,-8.8,-18.0,-13.4,0.0,0.0,243.0,71.833333
1,2015-01-02,2,47.0,37.5,47.0_37.5,UA14,0,235.527153,0.015863,0.041639,0.333374,0.096907,0.0,-0.3,-8.4,-3.4,0.0,0.28,246.0,98.958333
2,2015-01-03,3,47.0,37.5,47.0_37.5,UA14,0,235.527153,0.015863,0.041639,0.333374,0.096907,0.0,2.0,-1.5,0.3,0.0,1.89,248.0,88.291667
3,2015-01-04,4,47.0,37.5,47.0_37.5,UA14,0,235.527153,0.015863,0.041639,0.333374,0.096907,0.0,0.6,-2.0,-0.3,0.0,0.63,239.0,89.958333
4,2015-01-05,5,47.0,37.5,47.0_37.5,UA14,0,235.527153,0.015863,0.041639,0.333374,0.096907,0.0,-1.4,-4.0,-2.6,0.0,0.21,241.0,95.458333


## Create Training and Validation/Calibration Data

In [54]:
# Split the data into training and test sets, the test set is the last 365 days
X_train = fire_data_processed[fire_data_processed['ACQ_DATE'] < '2021-02-23'].drop(columns=['ACQ_DATE', 
                                                                                            'GRID_CELL_50KM', 'OBLAST_ID_50KM', 'FIRE_COUNT_CELL_50KM'])
y_train = fire_data_processed[fire_data_processed['ACQ_DATE'] < '2021-02-23']['FIRE_COUNT_CELL_50KM']

X_test = fire_data_processed[fire_data_processed['ACQ_DATE'] >= '2021-02-23'].drop(columns=['ACQ_DATE', 
                                                                                            'GRID_CELL_50KM', 'OBLAST_ID_50KM', 'FIRE_COUNT_CELL_50KM'])
y_test = fire_data_processed[fire_data_processed['ACQ_DATE'] >= '2021-02-23']['FIRE_COUNT_CELL_50KM']

# Get two additional features, i.e., the ACQ_DATE and the GRID_CELL_50KM
acq_date_test = fire_data_processed[fire_data_processed['ACQ_DATE'] >= '2021-02-23']['ACQ_DATE']
grid_cell_test = fire_data_processed[fire_data_processed['ACQ_DATE'] >= '2021-02-23']['GRID_CELL_50KM']

## Train the Pipeline

In [55]:
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from quantile_forest import RandomForestQuantileRegressor
from sklearn.pipeline import Pipeline

# Create the pipeline
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('pca', PCA(n_components=0.95, random_state=42)),
    ('regressor', RandomForestQuantileRegressor(n_estimators=100, random_state=42))
])

# Fit the pipeline on the training data
pipeline.fit(X_train, y_train)

# Print the pipeline steps
print(pipeline)

Pipeline(steps=[('scaler', StandardScaler()),
                ('pca', PCA(n_components=0.95, random_state=42)),
                ('regressor', RandomForestQuantileRegressor(random_state=42))])


## Compute the Error Threshold

In [56]:
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.metrics import balanced_accuracy_score

def calculate_significance_score(value, threshold):
    # Calculate the significance score
    if value < threshold:
        return ((value - threshold) / threshold)
    else:
        return (value - threshold) / value

class ThresholdStep(BaseEstimator, TransformerMixin):
    def __init__(self, threshold=None):
        self.threshold = threshold

    def fit(self, y_pred, y_counts=None, y_labels=None):
        if y_counts is None or y_labels is None:
            raise ValueError('Transformer requires y_counts and y_labels to be passed.')
        
        # Calculate the error
        error = y_counts - y_pred
        # Set all negative values to zero
        error[error < 0] = 0

        if type(y_labels) == float:
            # Calculate the cumulative sum of fires sorted by error
            sorted_indices = np.argsort(error).reset_index(drop=True)
            sorted_y_val = y_counts.iloc[sorted_indices]
            cumulative_fires = np.cumsum(sorted_y_val)

            # Determine the threshold for 5% of all fires
            total_fires = np.sum(y_counts)
            threshold_index = np.searchsorted(cumulative_fires, y_labels * total_fires)
            threshold = error.iloc[sorted_indices[threshold_index]]

            # Assign labels based on the threshold
            y_labels = (error > threshold).astype(int)
        
        # Initialize variables to store the best threshold and the highest balanced accuracy
        best_threshold = None
        highest_balanced_accuracy = 0

        # Define a range of possible thresholds
        thresholds = np.linspace(error.min(), error.max(), 100)

        # Iterate over each threshold
        for threshold in thresholds:
            # Classify samples based on the current threshold
            y_pred_threshold = (error > threshold).astype(int)
            
            # Calculate the balanced accuracy for the current threshold
            balanced_acc = balanced_accuracy_score(y_labels, y_pred_threshold)
            
            # Update the best threshold if the current balanced accuracy is higher
            if balanced_acc > highest_balanced_accuracy:
                highest_balanced_accuracy = balanced_acc
                best_threshold = threshold

        self.threshold = best_threshold
        return self

    def transform(self, y_pred, y_counts=None):
        if self.threshold is None:
            raise ValueError("Threshold is not set. Please fit the transformer first.")
        
        # Calculate the error
        error = y_counts - y_pred
        # Set all negative values to zero
        error[error < 0] = 0
        # Compare the error with the threshold
        is_abnormal = error > self.threshold
        # Calculate the significance score
        significance_score = np.array(pd.Series(error).apply(calculate_significance_score, threshold=self.threshold))

        return np.array([is_abnormal.astype(int), significance_score])

In [57]:
# Add the threshold step to the pipeline
pipeline.steps.append(('threshold', ThresholdStep()))
# Fit the pipeline on the validation data
# Predict abnormal fire counts
_ = pipeline.named_steps['threshold'].fit(
        pipeline.named_steps['regressor'].predict(
            pipeline.named_steps['pca'].transform(
                pipeline.named_steps['scaler'].transform(X_test)), quantiles=[0.95]), y_counts=y_test, y_labels=0.95)
# Print the updated pipeline steps
print(pipeline)

Pipeline(steps=[('scaler', StandardScaler()),
                ('pca', PCA(n_components=0.95, random_state=42)),
                ('regressor', RandomForestQuantileRegressor(random_state=42)),
                ('threshold', ThresholdStep(threshold=13.171717171717171))])


## Implement the Decay Function

In [58]:
from sklearn.base import BaseEstimator, TransformerMixin

class RecalculateConfidenceScores(BaseEstimator, TransformerMixin):
    def __init__(self, decay_rate, midpoint, cutoff):
        self.decay_rate = decay_rate
        self.midpoint = midpoint
        self.cutoff = cutoff

    def sigmoid_decay(self, time_diff):
        if time_diff > self.cutoff:
            return 0  # Influence reaches zero after the cut-off
        return 1 / (1 + np.exp(self.decay_rate * (time_diff - self.midpoint)))

    def fit(self, X, y=None):
        return self

    def transform(self, X, dates=None, grid_cells=None):
        if dates is None or grid_cells is None:
            raise ValueError("Dates and grid cells are required for the recalculation of confidence scores.")
        y_scores = X[1]
        y_pred = X[0]
        # Combine confidence scores, dates, and grid cells into a single list of events with indices
        indexed_events = list(enumerate(zip(y_scores, dates, grid_cells)))
        # Sort events by 'date' while preserving their original index
        indexed_events_sorted = sorted(indexed_events, key=lambda x: x[1][1])  # Sort by date (the second element of the tuple)
        # Initialize recalculated confidence scores with placeholders
        recalculated_confidences = [None] * len(y_scores)
        # Track the last war-related fire by grid cell
        last_war_events = {}
        # Loop through each event in the sorted order
        for i, (original_index, (current_conf, current_date, grid_cell)) in enumerate(indexed_events_sorted):
            # If this is a war-related fire, reset the decay process for this grid cell
            if current_conf > 0:
                last_war_events[grid_cell] = {
                    'ACQ_DATE': current_date,
                    'SIGNIFICANCE_SCORE': current_conf
                }
                recalculated_confidences[original_index] = current_conf  # No decay for the current event
            elif grid_cell in last_war_events:
                # Calculate the time difference from the last war-related fire in the same grid cell
                last_war_event = last_war_events[grid_cell]
                time_diff = (current_date - last_war_event['ACQ_DATE'])
                # Transform the time difference, which is in nanoseconds, to days
                time_diff = time_diff / np.timedelta64(1, 'D')
                # Apply the decay function to the subsequent fires in the same grid cell
                decayed_influence = self.sigmoid_decay(time_diff) * last_war_event['SIGNIFICANCE_SCORE']
                # If the decayed influence is zero or less than the original confidence, keep the original confidence
                if decayed_influence > current_conf and decayed_influence > 0:
                    new_conf = decayed_influence
                else:
                    new_conf = current_conf  # Preserve original confidence
                
                recalculated_confidences[original_index] = new_conf
            else:
                # If no war-related fire has been detected in this grid cell, keep the original confidence
                recalculated_confidences[original_index] = current_conf
        
        recalculated_confidences = np.array(recalculated_confidences)
        labels = np.where(recalculated_confidences > 0, 1, 0)
        return [labels, recalculated_confidences, y_pred, y_scores]

In [59]:
# Add the decay recalculation step to the pipeline
pipeline.steps.append(('decay', RecalculateConfidenceScores(decay_rate=1.0, midpoint=5, cutoff=10)))
# Print the updated pipeline steps
print(pipeline)

Pipeline(steps=[('scaler', StandardScaler()),
                ('pca', PCA(n_components=0.95, random_state=42)),
                ('regressor', RandomForestQuantileRegressor(random_state=42)),
                ('threshold', ThresholdStep(threshold=13.171717171717171)),
                ('decay',
                 RecalculateConfidenceScores(cutoff=10, decay_rate=1.0,
                                             midpoint=5))])


## Predict the Validation Data using the Pipeline

In [60]:
# Predict abnormal fire counts
y_pred_decay, y_scores_decay, y_pred, y_scores = pipeline.named_steps['decay'].transform(
    pipeline.named_steps['threshold'].transform(
        pipeline.named_steps['regressor'].predict(
            pipeline.named_steps['pca'].transform(
                pipeline.named_steps['scaler'].transform(X_test)), quantiles=[0.95]), y_test), acq_date_test, grid_cell_test)

In [61]:
# Calculate the number of fires labelled as normal and abnormal for validation data
normal_fires = y_test[y_pred == 0].sum()
abnormal_fires = y_test[y_pred == 1].sum()
normal_fires_decay = y_test[y_pred_decay == 0].sum()
abnormal_fires_decay = y_test[y_pred_decay == 1].sum()

print(f'Normal Fires: {normal_fires}, Abnormal Fires: {abnormal_fires}')
print(f'Percentage of Normal Fires: {normal_fires / (normal_fires + abnormal_fires) * 100:.2f}%')
print(f'Percentage of Abnormal Fires: {abnormal_fires / (normal_fires + abnormal_fires) * 100:.2f}%')
print()
print(f'Normal Fires (Decay): {normal_fires_decay}, Abnormal Fires (Decay): {abnormal_fires_decay}')
print(f'Percentage of Normal Fires (Decay): {normal_fires_decay / (normal_fires_decay + abnormal_fires_decay) * 100:.2f}%')
print(f'Percentage of Abnormal Fires (Decay): {abnormal_fires_decay / (normal_fires_decay + abnormal_fires_decay) * 100:.2f}%')

Normal Fires: 57474, Abnormal Fires: 3523
Percentage of Normal Fires: 94.22%
Percentage of Abnormal Fires: 5.78%

Normal Fires (Decay): 53602, Abnormal Fires (Decay): 7395
Percentage of Normal Fires (Decay): 87.88%
Percentage of Abnormal Fires (Decay): 12.12%


In [62]:
# Calculate the mean scores for normal and abnormal fires
normal_scores = y_scores[y_pred == 0]
abnormal_scores = y_scores[y_pred == 1]

normal_scores_decay = y_scores_decay[y_pred_decay == 0]
abnormal_scores_decay = y_scores_decay[y_pred_decay == 1]

print(f'Mean Significance Score for Normal Fires: {normal_scores.mean():.2f}')
print(f'Mean Significance Score for Abnormal Fires: {abnormal_scores.mean():.2f}')
print()
print(f'Mean Significance Score for Normal Fires (Decay): {normal_scores_decay.mean():.2f}')
print(f'Mean Significance Score for Abnormal Fires (Decay): {abnormal_scores_decay.mean():.2f}')

Mean Significance Score for Normal Fires: -1.00
Mean Significance Score for Abnormal Fires: 0.31

Mean Significance Score for Normal Fires (Decay): -1.00
Mean Significance Score for Abnormal Fires (Decay): 0.17


## Save the Pipeline and Results

In [63]:
import pickle

# Save the updated pipeline
with open('saved_models/pipeline.pkl', 'wb') as file:
    pickle.dump(pipeline, file)

In [64]:
# Select the validation date range from 2021-02-23 to 2022-02-23
fire_data_processed = fire_data_processed[(fire_data_processed['ACQ_DATE'] >= '2021-02-23') & (fire_data_processed['ACQ_DATE'] <= '2022-02-23')]
fire_data_copy = fire_data_copy[(fire_data_copy['ACQ_DATE'] >= '2021-02-23') & (fire_data_copy['ACQ_DATE'] <= '2022-02-23')]

# Add the abnormal labels to fire_data_processed
fire_data_processed['ABNORMAL_LABEL'] = y_pred.tolist()
fire_data_processed['SIGNIFICANCE_SCORE'] = y_scores.tolist()
fire_data_processed['ABNORMAL_LABEL_DECAY'] = y_pred_decay.tolist()
fire_data_processed['SIGNIFICANCE_SCORE_DECAY'] = y_scores_decay.tolist()

# Merge fire_data with fire_data_processed to map the abnormal labels to individual fire IDs
fire_data_copy = fire_data_copy.merge(fire_data_processed[['GRID_CELL_50KM', 'ACQ_DATE', 'ABNORMAL_LABEL', 
                                                           'SIGNIFICANCE_SCORE', 'ABNORMAL_LABEL_DECAY', 'SIGNIFICANCE_SCORE_DECAY']], 
                            on=['GRID_CELL_50KM', 'ACQ_DATE'], 
                            how='left')

# Keep only the FIRE_ID, ACQ_DATE, LONGITUDE, LATITUDE, and ABNORMAL_LABEL columns
fire_data_copy = fire_data_copy[['FIRE_ID', 'ACQ_DATE', 'LONGITUDE', 'LATITUDE', 
                                 'ABNORMAL_LABEL', 'SIGNIFICANCE_SCORE', 'ABNORMAL_LABEL_DECAY', 'SIGNIFICANCE_SCORE_DECAY']]

# Display the updated fire_data DataFrame
fire_data_copy.head()

Unnamed: 0,FIRE_ID,ACQ_DATE,LONGITUDE,LATITUDE,ABNORMAL_LABEL,SIGNIFICANCE_SCORE,ABNORMAL_LABEL_DECAY,SIGNIFICANCE_SCORE_DECAY
0,606587,2021-02-23,37.8656,47.6242,0.0,-1.0,0,-1.0
1,1576778,2021-02-23,37.603394,47.094631,0.0,-1.0,0,-1.0
2,1576773,2021-02-23,37.592018,47.092762,0.0,-1.0,0,-1.0
3,1576768,2021-02-23,37.558506,47.159988,0.0,-1.0,0,-1.0
4,1576764,2021-02-23,37.601517,47.094345,0.0,-1.0,0,-1.0


In [65]:
# Save the fire_data DataFrame to a CSV file
fire_data_copy.to_csv('output_data/abnormal_fires_validation.csv', index=False)