In [6]:
import pandas as pd
import numpy as np
import tensorflow as tf

In [7]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dense, Flatten, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import StratifiedKFold, train_test_split

In [8]:
df = pd.read_csv(r"data\labelled\santacruz_labelled.csv")

In [9]:
df

Unnamed: 0,INDEX,YEAR,HR,DT,MN,DPT,WBT,DBT,Normal_Temp,SLP,MSLP,RH,DD,FFF,RF,Heatwave
0,43003,2010,0,1,1,14.8,17.4,21.2,30.4,1008.4,1010.1,67,0.0,0.0,0.0,0
1,43003,2010,0,2,1,17.8,19.4,22.0,30.4,1009.5,1011.2,77,0.0,0.0,0.0,0
2,43003,2010,0,3,1,20.0,21.4,24.0,30.4,1011.5,1013.2,78,0.0,0.0,0.0,0
3,43003,2010,0,4,1,16.1,18.6,22.4,30.4,1012.0,1013.7,68,32.0,6.0,0.0,0
4,43003,2010,0,5,1,17.8,18.4,19.4,30.4,1009.3,1011.0,90,0.0,0.0,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35271,43003,2024,84,26,11,13.5,27.4,19.4,33.4,0.0,0.0,69,0.0,0.0,0.0,0
35272,43003,2024,84,27,11,11.7,27.4,22.0,33.4,0.0,0.0,52,0.0,0.0,0.0,0
35273,43003,2024,84,28,11,12.1,27.4,19.6,33.4,0.0,0.0,62,0.0,0.0,0.0,0
35274,43003,2024,84,29,11,12.5,27.4,22.0,33.4,0.0,0.0,55,5.0,6.0,0.0,0


In [10]:
features = ['DPT', 'WBT', 'DBT', 'Normal_Temp', 'SLP', 'MSLP', 'RH', 'DD', 'FFF', 'RF']
target = 'Heatwave'

scaler = MinMaxScaler()
df[features] = scaler.fit_transform(df[features])

In [11]:
# Convert to sequences for LSTM
sequence_length = 10
X, y = [], []
for i in range(len(df) - sequence_length):
    X.append(df[features].iloc[i:i+sequence_length].values)
    y.append(df[target].iloc[i+sequence_length])
X, y = np.array(X), np.array(y)

In [12]:
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
fold_no = 1
accuracies = []

for train_idx, test_idx in kfold.split(X, y):
    X_train, X_test = X[train_idx], X[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]

In [13]:
model = Sequential([
        Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(sequence_length, len(features))),
        MaxPooling1D(pool_size=2),
        LSTM(50, return_sequences=True),
        LSTM(50),
        Dense(50, activation='relu'),
        Dropout(0.5),
        Dense(1, activation='sigmoid')
    ])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [14]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [15]:
print(f"Training on Fold {fold_no}...")
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test), verbose=1)

Training on Fold 1...
Epoch 1/20
[1m882/882[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 7ms/step - accuracy: 0.9858 - loss: 0.1056 - val_accuracy: 0.9902 - val_loss: 0.0521
Epoch 2/20
[1m882/882[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 6ms/step - accuracy: 0.9900 - loss: 0.0609 - val_accuracy: 0.9902 - val_loss: 0.0515
Epoch 3/20
[1m882/882[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - accuracy: 0.9908 - loss: 0.0581 - val_accuracy: 0.9902 - val_loss: 0.0515
Epoch 4/20
[1m882/882[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - accuracy: 0.9894 - loss: 0.0593 - val_accuracy: 0.9902 - val_loss: 0.0511
Epoch 5/20
[1m882/882[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - accuracy: 0.9901 - loss: 0.0561 - val_accuracy: 0.9902 - val_loss: 0.0533
Epoch 6/20
[1m882/882[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - accuracy: 0.9904 - loss: 0.0548 - val_accuracy: 0.9902 - val_loss: 0.0504
Epoch

<keras.src.callbacks.history.History at 0x206cd5d4310>

In [16]:
loss, accuracy = model.evaluate(X_test, y_test)
accuracies.append(accuracy)
print(f"Fold {fold_no} Accuracy: {accuracy:.4f}")
fold_no += 1

# Print average accuracy
print(f"Average Cross-Validation Accuracy: {np.mean(accuracies):.4f}")


[1m221/221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9943 - loss: 0.0300
Fold 1 Accuracy: 0.9902
Average Cross-Validation Accuracy: 0.9902
