In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [3]:
df = pd.read_csv('/kaggle/input/wildfiredata/engineered_wildfire_data.csv')

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 450342 entries, 0 to 450341
Data columns (total 32 columns):
 #   Column      Non-Null Count   Dtype  
---  ------      --------------   -----  
 0   temp        450342 non-null  float64
 1   rh          450342 non-null  float64
 2   ws          450342 non-null  float64
 3   wd          450342 non-null  int64  
 4   pcp         450342 non-null  float64
 5   ffmc        450342 non-null  float64
 6   dmc         450342 non-null  float64
 7   dc          450342 non-null  float64
 8   isi         450342 non-null  float64
 9   bui         450342 non-null  float64
 10  fwi         450342 non-null  float64
 11  ros         450339 non-null  float64
 12  sfc         450339 non-null  float64
 13  tfc         450339 non-null  float64
 14  bfc         239132 non-null  float64
 15  hfi         450339 non-null  float64
 16  cfb         450339 non-null  float64
 17  pcuring     425990 non-null  float64
 18  greenup     425990 non-null  float64
 19  el

In [5]:
df.head()

Unnamed: 0,temp,rh,ws,wd,pcp,ffmc,dmc,dc,isi,bui,...,tfc0,sfc0,year,month,day,lat_sin,lat_cos,lon_sin,lon_cos,year_month
0,-1.006741,2.094934,-0.649149,320,0.43,82.976,30.078,161.161,2.68,41.018,...,0.35,0.35,2020,6,2,0.883899,0.467678,-0.311904,-0.950114,2020-6
1,0.423696,0.99279,-0.700308,145,1.237,68.466,0.0,294.02,0.977,0.0,...,0.1,0.1,2020,6,11,0.87989,0.475177,-0.268096,-0.963392,2020-6
2,0.226618,1.122454,-0.86797,30,0.591,88.685,55.743,202.448,5.536,66.032,...,1.36,1.36,2020,6,20,0.736971,0.675925,-0.939322,0.343037,2020-6
3,1.366794,-1.665321,-0.84315,271,0.0,98.652,290.568,841.23,22.181,311.848,...,0.35,0.35,2020,6,22,0.535709,0.844403,-0.934801,-0.355172,2020-6
4,1.170139,0.020311,-0.000958,50,0.001,91.66,18.664,102.62,10.972,25.66,...,0.35,0.35,2020,6,13,0.551529,0.834155,-0.999729,-0.023267,2020-6


In [6]:
df.isnull().sum()

temp               0
rh                 0
ws                 0
wd                 0
pcp                0
ffmc               0
dmc                0
dc                 0
isi                0
bui                0
fwi                0
ros                3
sfc                3
tfc                3
bfc           211210
hfi                3
cfb                3
pcuring        24352
greenup        24352
elev               0
sfl                0
cfl                0
tfc0               3
sfc0               3
year               0
month              0
day                0
lat_sin            0
lat_cos            0
lon_sin            0
lon_cos            0
year_month         0
dtype: int64

In [7]:
df = df.sort_values(['year', 'month', 'day'], ascending=True)

In [8]:
# df.fillna(method='ffill', inplace=True)

In [9]:
primary_features = [
    'temp',    # Temperature
    'rh',      # Relative Humidity
    'ws',      # Wind Speed
    'wd',      # Wind Direction
    'ffmc',    # Fine Fuel Moisture Code
    'dmc',     # Duff Moisture Code
    'dc',      # Drought Code
    'isi',     # Initial Spread Index
    'bui',     # Buildup Index
    'fwi',     # Fire Weather Index
    'ros',     # Rate of Spread
    'sfc',     # Surface Fuel Consumption
    'tfc',     # Total Fuel Consumption
    'hfi',     # Head Fire Intensity
    'pcuring', # Percent Curing (if available)
    'elev',    # Elevation
    'sfl',     # Surface Fuel Load (if this represents available surface fuels)
    'cfl'      # Crown Fuel Load
]

In [10]:
# Time-based features
time_features = ['month_sin', 'month_cos', 'day_sin', 'day_cos', 'age']
# Note: 'year_month' is omitted as it's likely redundant with 'year' and 'month'

# Derived features
derived_features = [
    'temp_lag_1',     # Temperature from previous time step
    'ws_lag_1',       # Wind speed from previous time step
    'rh_lag_1',       # Relative humidity from previous time step
    'ffmc_lag_1',     # FFMC from previous time step
    'isi_lag_1',      # ISI from previous time step
    'fwi_lag_1',      # FWI from previous time step
    'drought_index',  # Composite drought index (e.g., (dc + dmc) / 2)
  #  'day_of_year',    # Calculated from year, month, day
    'season'          # Derived from month (e.g., Spring, Summer, Fall, Winter)
]

In [11]:
# Combine all features
all_features = primary_features + time_features + derived_features

In [12]:
def derive_features(df):
    # Ensure datetime index
    df['date'] = pd.to_datetime(df['year'].astype(str) + '-' + df['month'].astype(str) + '-' + df['day'].astype(str))
    df.set_index('date', inplace=True)
    
    # Create day of year
#     df['day_of_year'] = df.index.dayofyear
    
    # Create season
    df['season'] = pd.cut(df['month'], bins=[0, 3, 6, 9, 12], labels=['Winter', 'Spring', 'Summer', 'Fall'])
    
    # Create lag features
    for feature in ['temp', 'ws', 'rh', 'ffmc', 'isi', 'fwi']:
        df[f'{feature}_lag_1'] = df[feature].shift(1)
    
    # Create drought index
    df['drought_index'] = (df['dc'] + df['dmc']) / 2
    
    return df

# Apply the function to your dataframe
df = derive_features(df)

In [13]:
def add_fourier_features(df):
    # Create cyclic features using sine and cosine transformations
    df['month_sin'] = np.sin(2 * np.pi * df['month'] / 12)
    df['month_cos'] = np.cos(2 * np.pi * df['month'] / 12)
    df['day_sin'] = np.sin(2 * np.pi * df['day'] / 31)
    df['day_cos'] = np.cos(2 * np.pi * df['day'] / 31)
    return df

In [14]:
def add_age_feature(df):
    df['age'] = (df.index - df.index.min()).days
    return df

In [15]:
df = add_fourier_features(df)
df = add_age_feature(df)

In [16]:
df = df.drop(columns=['year', 'month', 'day'])

In [17]:
final_df = pd.concat([df[all_features], df['cfb']], axis=1)

In [18]:
final_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 450342 entries, 2020-03-01 to 2023-11-04
Data columns (total 32 columns):
 #   Column         Non-Null Count   Dtype   
---  ------         --------------   -----   
 0   temp           450342 non-null  float64 
 1   rh             450342 non-null  float64 
 2   ws             450342 non-null  float64 
 3   wd             450342 non-null  int64   
 4   ffmc           450342 non-null  float64 
 5   dmc            450342 non-null  float64 
 6   dc             450342 non-null  float64 
 7   isi            450342 non-null  float64 
 8   bui            450342 non-null  float64 
 9   fwi            450342 non-null  float64 
 10  ros            450339 non-null  float64 
 11  sfc            450339 non-null  float64 
 12  tfc            450339 non-null  float64 
 13  hfi            450339 non-null  float64 
 14  pcuring        425990 non-null  float64 
 15  elev           450342 non-null  int64   
 16  sfl            450342 non-null  float64 

In [19]:
lags_sequence = [1,2,3,4,5,6,7]

In [20]:
# input_size = X_train.shape[2]  # number of features
# prediction_length = 24 # Predicting cfb for the next time step
# #context_length = X_train.shape[1]  # Same as sequence_length
# context_length = X_train.shape[1] - max(lags_sequence) - prediction_length

In [21]:
from transformers import AutoformerConfig, AutoformerModel

config = AutoformerConfig(
    input_size=35,  # number of input features
    context_length=48,  # context length excluding lags
    prediction_length=24,  # number of time steps to predict
    num_time_features=len(time_features),  # number of time features
#     num_static_real_features=1,  # number of static real features
 #   num_dynamic_real_features=len(all_features) - len(time_features),  # number of dynamic real features
    d_model=64,
    encoder_layers=2,
    decoder_layers=2,
    encoder_attention_heads=2,
    decoder_attention_heads=2,
    encoder_ffn_dim=128,
    decoder_ffn_dim=128,
    lags_sequence=lags_sequence
)

model = AutoformerModel(config)

In [22]:
# Use the context and prediction lengths from the configuration
context_length = config.context_length
prediction_length = config.prediction_length
lags_sequence = config.lags_sequence

In [23]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

def prepare_data(df, target_column='cfb', test_size=0.2):
    # Ensure the dataframe is sorted by date
    df = df.sort_index()

    # Separate the target column from the features
    features = df.drop(columns=[target_column])
    target = df[target_column]

    # Split the data into training and testing sets
    train_features, test_features, train_target, test_target = train_test_split(
        features, target, test_size=test_size, shuffle=False
    )

    # Identify categorical and numerical features
    categorical_features = ['season']
    numeric_features = train_features.select_dtypes(include=['float64', 'int64']).columns.tolist()
    
 #   time_features = ['year', 'month', 'day', 'day_of_year']
    numeric_features = [f for f in numeric_features if f not in time_features]

    # Create the preprocessing pipelines for both numeric and categorical data
    numeric_transformer = StandardScaler()
    categorical_transformer = OneHotEncoder(handle_unknown='ignore')

    # Combine the transformers into a preprocessor
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', numeric_transformer, numeric_features),
            ('cat', categorical_transformer, categorical_features),
            ('time', 'passthrough', time_features)  # passthrough time features
        ]
    )

    # Apply the transformations to train and test data
    train_features_scaled = preprocessor.fit_transform(train_features)
    test_features_scaled = preprocessor.transform(test_features)

    # Reconstruct DataFrame from the transformed data
    numeric_columns = preprocessor.transformers_[0][2]
    categorical_columns = preprocessor.transformers_[1][1].get_feature_names_out(categorical_features)
    time_columns = preprocessor.transformers_[2][2]
    
    all_columns = list(numeric_columns) + list(categorical_columns) + list(time_columns)
    
    # Convert the transformed data back to DataFrame
    train_features_scaled_df = pd.DataFrame(train_features_scaled, index=train_features.index, columns=all_columns)
    test_features_scaled_df = pd.DataFrame(test_features_scaled, index=test_features.index, columns=all_columns)
    print(train_features_scaled_df)
    # Add the target column back to the scaled data
    train_scaled = train_features_scaled_df.copy()
    train_scaled[target_column] = train_target
    test_scaled = test_features_scaled_df.copy()
    test_scaled[target_column] = test_target

    # Function to create sequences
    def create_sequences(data, context_length, prediction_length, lags_sequence):
        X, y = [], []
        past_length = context_length + max(lags_sequence)
        total_length = past_length + prediction_length

        for i in range(len(data) - total_length + 1):
            # Extract past values (context + lags)
            past_values = data.iloc[i:i + past_length].values
            X.append(past_values)

            # Extract future values (predictions)
            future_values = data.iloc[i + past_length:i + total_length].values
            y.append(future_values)

        return np.array(X), np.array(y)
    print('time features: ', time_columns)
    time_feature_indices = [train_scaled.columns.get_loc(col) for col in time_columns]
    X_train, y_train = create_sequences(train_scaled, context_length, prediction_length, lags_sequence)
    X_test, y_test = create_sequences(test_scaled, context_length, prediction_length, lags_sequence)

    return X_train, y_train, X_test, y_test, time_feature_indices

# Prepare the data
X_train, y_train, X_test, y_test, time_feature_indices = prepare_data(final_df)

                temp        rh        ws        wd      ffmc       dmc  \
date                                                                     
2020-03-01  0.169331  0.999267  1.725997  0.737452 -0.160303 -0.828024   
2020-03-01 -1.392263 -0.420849  2.855663  1.053733 -1.057628 -1.022287   
2020-03-01 -0.925501 -0.162646  0.697408  0.737452  0.068199 -0.752206   
2020-03-01 -1.189209  0.353760  0.854368  1.159160 -0.112282 -0.928488   
2020-03-01 -0.212340  0.353760  1.181042  1.475441 -0.675485 -0.173166   
...              ...       ...       ...       ...       ...       ...   
2023-01-24 -2.075024  0.353760 -0.250357 -2.109077 -0.622241 -1.085546   
2023-01-24 -1.710516  0.160108 -0.007321 -2.109077 -0.556084 -1.076072   
2023-01-24 -1.090737  0.547412  0.973823  1.370014 -4.317738 -1.002084   
2023-01-25 -0.080704  3.258541  3.475067 -0.211391 -5.786535 -0.715961   
2023-01-25 -2.362586  1.967527  1.077714  1.275129 -3.677206 -0.587229   

                  dc       isi       

In [24]:
print("Shape of X_train:", X_train.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of y_test:", y_test.shape)

Shape of X_train: (360195, 55, 35)
Shape of y_train: (360195, 24, 35)
Shape of X_test: (89991, 55, 35)
Shape of y_test: (89991, 24, 35)


In [25]:
from torch.utils.data import DataLoader, TensorDataset
import torch

# Convert data to PyTorch tensors and create DataLoaders
train_dataset = TensorDataset(torch.FloatTensor(X_train), torch.FloatTensor(y_train))
test_dataset = TensorDataset(torch.FloatTensor(X_test), torch.FloatTensor(y_test))
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [26]:
config.context_length

48

In [27]:
lags_sequence

[1, 2, 3, 4, 5, 6, 7]

In [28]:
len(all_features) - len(time_features) - 3

23

In [29]:
def create_past_observed_mask(values):
    # Create a mask where 1 indicates observed values and 0 indicates missing values (NaNs)
    mask = ~torch.isnan(values)
    return mask

def create_future_observed_mask(values):
    # Create a mask where 1 indicates observed values and 0 indicates missing values (NaNs)
    mask = ~torch.isnan(values)
    return mask

In [30]:
# Linear layer to project the output to the desired dimension
projection_layer = torch.nn.Linear(320, config.input_size)

In [31]:
# Train the model
from torch.optim import Adam
from torch.nn import MSELoss
import torch

optimizer = Adam(list(model.parameters()) + list(projection_layer.parameters()), lr=1e-3)
criterion = MSELoss()

num_epochs = 100
batch_size = 32

# Helper function to prepare input features
def prepare_inputs(x, y, context_length, prediction_length, lags_sequence):
    batch_size = x.size(0)
    past_length = context_length + max(lags_sequence)
    
    past_values = x[:, :past_length, :]
    past_time_features = x[:, :, time_feature_indices]
    
    # Create past_observed_mask
    past_observed_mask = create_past_observed_mask(past_values)

    # Replace NaNs in past_values with zeros
    past_values = torch.nan_to_num(past_values)
    
    # Prepare future values from the context
    future_values = y
    future_time_features = y[:, :, time_feature_indices]

#     # Create future_observed_mask
#     future_observed_mask = create_future_observed_mask(future_values)

#     # Replace NaNs in future_values with zeros
#     future_values = torch.nan_to_num(future_values)
    
    # Prepare static real features (assuming no static features here, but can be modified as needed)
#     static_real_features = None
    
    return past_values, past_time_features, past_observed_mask, future_values, future_time_features

In [32]:
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for context, future_target in train_loader:
        optimizer.zero_grad()
#         print('x', context.shape)
#         print('y', future_target.shape)

        # Prepare inputs
        past_values, past_time_features, past_observed_mask, future_values, future_time_features = \
            prepare_inputs(context, future_target, context_length, prediction_length, lags_sequence)

#         print('past values shape', past_values.shape)
#         print('future values shape', future_values.shape)
#         print('past time features', past_time_features.shape)
#         print('past mask', past_observed_mask.shape)
#         print('future time features', future_time_features.shape)
        
        outputs = model(
            past_values=past_values,
            past_time_features=past_time_features,
            past_observed_mask=past_observed_mask,
           # static_categorical_features=None,
          #  static_real_features=static_real_features,
            future_values=future_values,
            future_time_features=future_time_features
        ).last_hidden_state
        # Project the output to the desired dimension
        projected_outputs = projection_layer(outputs[:, -config.prediction_length:, :])
#         print('output shape', projected_outputs.shape)
        loss = criterion(projected_outputs, future_values)
#         loss = criterion(outputs[:, -prediction_length:, :], future_values)
        loss.backward()
        optimizer.step()
        
    print(f"Epoch {epoch+1}, Loss: {loss.item()}")

KeyboardInterrupt: 

In [None]:
# Evaluate the model
model.eval()
with torch.no_grad():
    test_loss = 0.0
    for context, target in val_loader:
        # Prepare inputs
        past_values, past_time_features, past_observed_mask, future_values, future_time_features, static_real_features = \
        prepare_inputs(context, context_length, prediction_length, lags_sequence)
        # Forward pass
        outputs = model(past_values=past_values,
                        past_time_features=past_time_features,
                        past_observed_mask=past_observed_mask,
                        static_categorical_features=None,  # No static categorical features
                        static_real_features=static_real_features,
                        future_values=future_values,
                        future_time_features=future_time_features).last_hidden_state
        
        loss = criterion(outputs[:, -prediction_length], target)  # Compare only the last time step
        test_loss += loss.item()
    test_loss /= len(val_loader)
    print(f"Test Loss: {test_loss}")