In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import geopandas as gpd
import folium
import plotly.graph_objects as go
import plotly.express as px

In [None]:
raw_data_1 = pd.read_csv('fire_archive_M6_96619.csv')
raw_data_2 = pd.read_csv('fire_nrt_M6_96619.csv')
fire_data = pd.concat([raw_data_1, raw_data_2])
fire_data.head()

In [None]:
fire_data.info()

In [None]:
fire_data.duplicated().sum()

In [None]:
fire_data = fire_data.drop(columns = 'type')

In [None]:
fire_data.head()

In [None]:
fire_data['acq_date'].min()
fire_data['acq_date'].max()
print('Min:', fire_data['acq_date'].min())
print('Max:', fire_data['acq_date'].max())

In [None]:
fire_data['acq_date'] = pd.to_datetime(fire_data['acq_date'])

fire_data['month'] = fire_data['acq_date'].dt.month
fire_data['date'] = fire_data['acq_date'].dt.day
fire_data['day_of_week'] = fire_data['acq_date'].dt.day_name()

fire_data.head()

In [None]:
fire_data['acq_date_string'] = fire_data['acq_date'].dt.strftime('%Y-%m-%d')
fire_data['acq_date_string'].dtype

In [None]:
fire_data['month'].unique()

# Fire counts

In [None]:
fire_count_per_month = fire_data.groupby('month').size()
fire_count_per_month.head()

In [None]:
fire_count_per_month.plot(kind='bar', figsize=(10, 6), color='skyblue')

plt.title('Count of Fire Records by Month')
plt.xlabel('Month')
plt.ylabel('Count of Fires')
plt.xticks(rotation=None)
plt.tight_layout()
plt.show()

In [None]:
fire_count_daynight = fire_data.groupby(['month', 'daynight']).size().unstack()
fire_count_daynight.head()

In [None]:
fire_count_daynight.plot(kind='bar', stacked=True, figsize=(12, 6))
plt.title('Fire Count by Month and Day/Night')
plt.xlabel('Month')
plt.ylabel('Count of Fires')
plt.legend(title='Day/Night', labels = ['Day', 'Night'])
plt.xticks(rotation=None)
plt.tight_layout()
plt.show()

# Geospatial fires (static)

In [None]:
fire_data_aug = fire_data[fire_data['month'] == 8]
fire_data_sep = fire_data[fire_data['month'] == 9]
fire_data_oct = fire_data[fire_data['month'] == 10]
fire_data_nov = fire_data[fire_data['month'] == 11]
fire_data_dec = fire_data[fire_data['month'] == 12]

In [None]:
def create_geodataframe(df):
    return gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.longitude, df.latitude))

gdf_aug = create_geodataframe(fire_data_aug)
gdf_sep = create_geodataframe(fire_data_sep)
gdf_oct = create_geodataframe(fire_data_oct)
gdf_nov = create_geodataframe(fire_data_nov)
gdf_dec = create_geodataframe(fire_data_dec)
gdf_all = create_geodataframe(fire_data)

# Plotting the subplots for each month and the entire fire_data
fig, axes = plt.subplots(3, 2, figsize=(14, 18))
fig.suptitle('Fire Data by Months and Entire Data')

gdf_aug.plot(ax=axes[0, 0], color='blue', markersize=5)
axes[0, 0].set_title('August')

gdf_sep.plot(ax=axes[0, 1], color='green', markersize=5)
axes[0, 1].set_title('September')

gdf_oct.plot(ax=axes[1, 0], color='red', markersize=5)
axes[1, 0].set_title('October')

gdf_nov.plot(ax=axes[1, 1], color='purple', markersize=5)
axes[1, 1].set_title('November')

gdf_dec.plot(ax=axes[2, 0], color='orange', markersize=5)
axes[2, 0].set_title('December')

gdf_all.plot(ax=axes[2, 1], color='gray', markersize=5)
axes[2, 1].set_title('Entire Data')

plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.show()

In [None]:
# Function to filter "day" records and create GeoDataFrame
def create_day_geodataframe(df):
    day_data = df[df['daynight'] == 'D']
    return gpd.GeoDataFrame(day_data, geometry=gpd.points_from_xy(day_data.longitude, day_data.latitude))

# Creating GeoDataFrames for "day" records for the months and entire fire_data
gdf_day_aug = create_day_geodataframe(fire_data_aug)
gdf_day_sep = create_day_geodataframe(fire_data_sep)
gdf_day_oct = create_day_geodataframe(fire_data_oct)
gdf_day_nov = create_day_geodataframe(fire_data_nov)
gdf_day_dec = create_day_geodataframe(fire_data_dec)
gdf_day_all = create_day_geodataframe(fire_data)

# Plotting the subplots for "day" records for each month and the entire fire_data
fig, axes = plt.subplots(3, 2, figsize=(14, 18))
fig.suptitle('Fire Data by Months and Entire Data (Day Records)')

gdf_day_aug.plot(ax=axes[0, 0], color='blue', markersize=5)
axes[0, 0].set_title('August (Day)')

gdf_day_sep.plot(ax=axes[0, 1], color='green', markersize=5)
axes[0, 1].set_title('September (Day)')

gdf_day_oct.plot(ax=axes[1, 0], color='red', markersize=5)
axes[1, 0].set_title('October (Day)')

gdf_day_nov.plot(ax=axes[1, 1], color='purple', markersize=5)
axes[1, 1].set_title('November (Day)')

gdf_day_dec.plot(ax=axes[2, 0], color='orange', markersize=5)
axes[2, 0].set_title('December (Day)')

gdf_day_all.plot(ax=axes[2, 1], color='grey', markersize=5)
axes[2, 1].set_title('Entire Data (Day)')

plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.show()

In [None]:
def create_day_geodataframe(df):
    day_data = df[df['daynight'] == 'N']
    return gpd.GeoDataFrame(day_data, geometry=gpd.points_from_xy(day_data.longitude, day_data.latitude))

# Creating GeoDataFrames for "day" records for the months and entire fire_data
gdf_day_aug = create_day_geodataframe(fire_data_aug)
gdf_day_sep = create_day_geodataframe(fire_data_sep)
gdf_day_oct = create_day_geodataframe(fire_data_oct)
gdf_day_nov = create_day_geodataframe(fire_data_nov)
gdf_day_dec = create_day_geodataframe(fire_data_dec)
gdf_day_all = create_day_geodataframe(fire_data)

# Plotting the subplots for "day" records for each month and the entire fire_data
fig, axes = plt.subplots(3, 2, figsize=(14, 18))
fig.suptitle('Fire Data by Months and Entire Data (Day Records)')

gdf_day_aug.plot(ax=axes[0, 0], color='blue', markersize=5)
axes[0, 0].set_title('August (Night)')

gdf_day_sep.plot(ax=axes[0, 1], color='green', markersize=5)
axes[0, 1].set_title('September (Night)')

gdf_day_oct.plot(ax=axes[1, 0], color='red', markersize=5)
axes[1, 0].set_title('October (Night)')

gdf_day_nov.plot(ax=axes[1, 1], color='purple', markersize=5)
axes[1, 1].set_title('November (Night)')

gdf_day_dec.plot(ax=axes[2, 0], color='orange', markersize=5)
axes[2, 0].set_title('December (Night)')

gdf_day_all.plot(ax=axes[2, 1], color='grey', markersize=5)
axes[2, 1].set_title('Entire Data (Night)')

plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.show()

# Geospatial Fires (Dynamic/interactive)

In [None]:
fig = go.Figure(go.Densitymapbox(lat=fire_data.latitude, 
                                 lon=fire_data.longitude, 
                                 z= fire_data.brightness, 
                                 radius=8, 
                                 colorscale ='Reds'))

fig.update_layout(mapbox_style="carto-positron",
                  mapbox_center_lon=134,
                  mapbox_center_lat=-25,
                  mapbox_zoom=2.5)

fig.update_layout(title = 'Australian Fires',
                  title_font = dict(size = 20,color = 'FireBrick'),
                  title_x = 0.5)

fig.show()

In [None]:
fdtime = fire_data.sort_values(by = ['acq_date_string'],ascending = True)

fig = px.density_mapbox(fdtime, lon = 'longitude', lat = 'latitude',z = 'brightness', 
                        radius = 8,
                        center = dict(lon = 134,lat = -25),
                        zoom = 2.5,
                        mapbox_style='carto-positron',
                        color_continuous_scale = 'reds',
                        animation_frame = 'acq_date_string',
                        labels = {"acq_date":"Date"})

fig.update_layout(title = 'Australian Fires: From 2019/08/01 to 2020/01/11',
                  title_font = dict(size = 18,color = 'FireBrick'),
                  title_x = 0.5)

fig.show()

In [None]:
fdday =fire_data[fire_data['daynight'] == 'D'].sort_values(by = ['acq_date_string'],ascending = True)

fig = px.density_mapbox(fdday, lon = 'longitude', lat = 'latitude',z = 'brightness', 
                        radius = 8,
                        center = dict(lon = 134,lat = -25),
                        zoom = 2.5,
                        mapbox_style='carto-positron',
                        color_continuous_scale = 'reds',
                        animation_frame = 'acq_date_string',
                        labels = {"acq_date":"Date"})

fig.update_layout(title = 'Australian Fires: From 2019/08/01 to 2020/01/11 During Day',
                  title_font = dict(size = 18,color = 'FireBrick'),
                  title_x = 0.5)

fig.show()

In [None]:
fdnight =fire_data[fire_data['daynight'] == 'N'].sort_values(by = ['acq_date_string'],ascending = True)

fig = px.density_mapbox(fdnight, lon = 'longitude', lat = 'latitude',z = 'brightness', 
                        radius = 8,
                        center = dict(lon = 134,lat = -25),
                        zoom = 2.5,
                        mapbox_style='carto-positron',
                        color_continuous_scale = 'reds',
                        animation_frame = 'acq_date_string',
                        labels = {"acq_date":"Date"})

fig.update_layout(title = 'Australian Fires: From 2019/08/01 to 2020/01/11 During Night',
                  title_font = dict(size = 18,color = 'FireBrick'),
                  title_x = 0.5)

fig.show()

# Time series Forecasting

In [None]:
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [None]:
# Aggregating the data to get the daily count of fire occurrences
daily_fire_counts = fire_data.groupby('acq_date').size()

# Displaying the first few rows of the aggregated data
daily_fire_counts.head()


In [None]:
# Splitting the data into training and test sets (80% train, 20% test)
train_size = int(len(daily_fire_counts) * 0.8)
train, test = daily_fire_counts[0:train_size], daily_fire_counts[train_size:]

# Plotting auto-correlation and partial auto-correlation to help identify ARIMA parameters
fig, ax = plt.subplots(1, 2, figsize=(12, 4))

# Plotting the ACF
plot_acf(train, ax=ax[0], lags=40)

# Plotting the PACF
plot_pacf(train, ax=ax[1], lags=40)

plt.tight_layout()
plt.show()

In [None]:
# Fitting the ARIMA(1, 0, 1) model
model = ARIMA(train, order=(1, 0, 1))
model_fit = model.fit()

# Summary of the model
model_fit.summary()

In [None]:
forecast = model_fit.forecast(steps=len(test))

# Calculate evaluation metrics
mae = mean_absolute_error(test, forecast)
rmse = np.sqrt(mean_squared_error(test, forecast))

# Plotting actual vs predicted values
plt.figure(figsize=(14, 7))
plt.plot(train.index, train, label='Training Data', color='blue')
plt.plot(test.index, test, label='Actual Test Data', color='green')
plt.plot(test.index, forecast, label='Predicted Data', color='red', linestyle='--')
plt.title('ARIMA Forecast vs Actuals')
plt.legend(loc='best')
plt.grid(True)
plt.show()

mae, rmse

In [None]:
import warnings

# Defining the p, d, and q ranges to try
p_range = range(0, 3)
d_range = range(0, 3)
q_range = range(0, 3)

# Suppressing warnings
warnings.filterwarnings("ignore")

# Storing the results
best_aic = float("inf")
best_order = None
best_model = None

# Grid search over p, d, q values
for p in p_range:
    for d in d_range:
        for q in q_range:
            try:
                # Fitting the ARIMA model
                temp_model = ARIMA(train, order=(p, d, q))
                temp_model_fit = temp_model.fit()
                
                # Checking if this model has a better AIC than the previous best
                if temp_model_fit.aic < best_aic:
                    best_aic = temp_model_fit.aic
                    best_order = (p, d, q)
                    best_model = temp_model_fit
            except:
                continue

best_aic, best_order


In [None]:
# Fitting the ARIMA(1, 2, 2) model to the training data
optimized_model = ARIMA(train, order=(1, 2, 2))
optimized_model_fit = optimized_model.fit()

# Making predictions on the test set
optimized_forecast = optimized_model_fit.forecast(steps=len(test))

# Calculate evaluation metrics
optimized_mae = mean_absolute_error(test, optimized_forecast)
optimized_rmse = np.sqrt(mean_squared_error(test, optimized_forecast))

# Plotting actual vs predicted values
plt.figure(figsize=(14, 7))
plt.plot(train.index, train, label='Training Data', color='blue')
plt.plot(test.index, test, label='Actual Test Data', color='green')
plt.plot(test.index, optimized_forecast, label='Optimized Predicted Data', color='red', linestyle='--')
plt.title('Optimized ARIMA Forecast vs Actuals')
plt.legend(loc='best')
plt.grid(True)
plt.show()

optimized_mae, optimized_rmse


In [None]:
optimized_model_fit.summary()