In [3]:
import pandas as pd
import numpy as np

In [4]:
df = pd.read_csv('../data/engineered_wildfire_data.csv')

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 450342 entries, 0 to 450341
Data columns (total 32 columns):
 #   Column      Non-Null Count   Dtype  
---  ------      --------------   -----  
 0   temp        450342 non-null  float64
 1   rh          450342 non-null  float64
 2   ws          450342 non-null  float64
 3   wd          450342 non-null  int64  
 4   pcp         450342 non-null  float64
 5   ffmc        450342 non-null  float64
 6   dmc         450342 non-null  float64
 7   dc          450342 non-null  float64
 8   isi         450342 non-null  float64
 9   bui         450342 non-null  float64
 10  fwi         450342 non-null  float64
 11  ros         450339 non-null  float64
 12  sfc         450339 non-null  float64
 13  tfc         450339 non-null  float64
 14  bfc         239132 non-null  float64
 15  hfi         450339 non-null  float64
 16  cfb         450339 non-null  float64
 17  pcuring     425990 non-null  float64
 18  greenup     425990 non-null  float64
 19  el

### Transform Dataset to TimeSeriesDataset of Pytorch

In [6]:
def prepare_wildfire_data(df, lat_bins=50, lon_bins=50):
    # Create a datetime column
    df['date'] = pd.to_datetime(df[['year', 'month', 'day']])
    
    # Create group_id based on latitude and longitude
    df['lat'] = np.arcsin(df['lat_sin'])  # Convert back to latitude
    df['lon'] = np.arctan2(df['lon_sin'], df['lon_cos'])  # Convert back to longitude
    
    df['lat_group'] = pd.cut(df['lat'], bins=lat_bins, labels=False)
    df['lon_group'] = pd.cut(df['lon'], bins=lon_bins, labels=False)
    df['group_id'] = df['lat_group'].astype(str) + '_' + df['lon_group'].astype(str)
    df['group_id'] = pd.factorize(df['group_id'])[0]

    return df

df = prepare_wildfire_data(df)

In [7]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(df, test_size=0.2, random_state=42)

In [8]:
## Define the data schema
static_categoricals=['group_id']
static_reals=['elev', 'lat_sin', 'lat_cos', 'lon_sin', 'lon_cos']
time_varying_known_reals=['month', 'day']
time_varying_unknown_reals=['temp', 'rh', 'ws', 'wd', 'pcp', 'ffmc', 'dmc', 'dc', 'isi', 'bui', 
                            'ros', 'sfc', 'tfc', 'bfc', 'hfi', 'cfb', 'pcuring', 'greenup', 
                            'sfl', 'cfl', 'tfc0', 'sfc0']
group_ids=['group_id']
max_encoder_length=30
max_prediction_length=7
lat_bins=50
lon_bins=50

In [14]:
!pip install numpy==1.26.4



In [15]:
from sklearn.preprocessing import StandardScaler
from pytorch_forecasting import TimeSeriesDataSet
from pytorch_forecasting.models import TemporalFusionTransformer

scaler = StandardScaler()
numerical_features = static_reals + time_varying_known_reals + time_varying_unknown_reals
train[numerical_features] = scaler.fit_transform(train[numerical_features])
test[numerical_features] = scaler.transform(train[numerical_features])

# Create the TimeSeriesDataSet
dataset = TimeSeriesDataSet(
    df,
    time_idx=time_idx,
    target=target_variable,
    group_ids=group_ids,
    static_categoricals=static_categoricals,
    static_reals=static_reals,
    time_varying_known_reals=time_varying_known_reals,
    time_varying_unknown_reals=time_varying_unknown_reals,
    max_encoder_length=max_encoder_length,
    max_prediction_length=max_prediction_length,
)

ModuleNotFoundError: No module named 'numpy.lib.function_base'