In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score, precision_score, recall_score, confusion_matrix
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
import keras_tuner as kt
from sklearn.utils.class_weight import compute_class_weight
from keras.models import load_model

In [None]:
from google.colab import files
uploaded = files.upload()

Saving modis_flood_features_cleaned_v1.1.csv to modis_flood_features_cleaned_v1.1.csv


In [None]:
# Load Dataset
df = pd.read_csv('modis_flood_features_cleaned_v1.1.csv', parse_dates=['date'])

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1472156 entries, 0 to 1472155
Data columns (total 16 columns):
 #   Column          Non-Null Count    Dtype         
---  ------          --------------    -----         
 0   date            1472156 non-null  datetime64[ns]
 1   lon             1472156 non-null  float64       
 2   lat             1472156 non-null  float64       
 3   flooded         1472156 non-null  float64       
 4   jrc_perm_water  1472156 non-null  float64       
 5   precip_1d       1472156 non-null  float64       
 6   precip_3d       1472156 non-null  float64       
 7   NDVI            1472156 non-null  float64       
 8   NDWI            1472156 non-null  float64       
 9   landcover       1472156 non-null  float64       
 10  elevation       1472156 non-null  float64       
 11  slope           1472156 non-null  float64       
 12  aspect          1472156 non-null  float64       
 13  upstream_area   1472156 non-null  float64       
 14  TWI             14

In [None]:
# Preprocessing Data
X = df.drop(columns=['date', 'target', 'flooded', 'jrc_perm_water'], errors='ignore')
y = df['target']
X = X.apply(pd.to_numeric, errors='coerce').fillna(0)

# Normalisasi
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

scaler = preprocessing.RobustScaler()
robust_df = scaler.fit_transform(x)
robust_df = pd.DataFrame(robust_df, columns =['x1', 'x2'])

# Reshape Data
X_lstm = X_scaled.reshape((X_scaled.shape[0], 1, X_scaled.shape[1]))

In [None]:
# Fungsi untuk mendefinisikan model LSTM
def build_model(hp):
    model = Sequential()

    # Pilihan jumlah unit LSTM
    units = hp.Int('units', min_value=64, max_value=256, step=64)

    # Menambahkan layer LSTM
    model.add(LSTM(units, input_shape=(X_lstm.shape[1], X_lstm.shape[2]), return_sequences=True))
    model.add(Dropout(hp.Float('dropout', min_value=0.1, max_value=0.5, step=0.1)))
    model.add(LSTM(units // 2, return_sequences=False))
    model.add(Dropout(hp.Float('dropout', min_value=0.1, max_value=0.5, step=0.1)))

    model.add(Dense(1, activation='sigmoid'))

    # Menentukan optimizer dan learning rate
    model.compile(optimizer=Adam(learning_rate=hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG')),
              loss='binary_crossentropy',
              metrics=['f1_score','precision', 'recall'])
    return model

In [None]:
# Tuning
!pip install keras-tuner
import keras_tuner as kt
tuner = kt.Hyperband(build_model,
                     objective='val_loss',
                     max_epochs=10,
                     factor=5,
                     directory='hyperband',
                     project_name='lstm_tuning')

Reloading Tuner from hyperband/lstm_tuning/tuner0.json


In [None]:
# Menyimpan hasil tuning
tuner.search(X_lstm, y, epochs=5, batch_size=32, validation_split=0.2)

# Menampilkan hasil tuning
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]
print("Best Hyperparameters: ", best_hyperparameters.values)

Best Hyperparameters:  {'units': 192, 'dropout': 0.1, 'learning_rate': 0.0016583485942236376, 'tuner/epochs': 5, 'tuner/initial_epoch': 0, 'tuner/bracket': 0, 'tuner/round': 0}


In [None]:
# Model terbaik berdasarkan tuning
best_model = tuner.hypermodel.build(best_hyperparameters)
best_model.summary()

# Menyimpan model terbaik
best_model.save('best_lstm_model.h5')
print("Model terbaik telah disimpan ke dalam file 'best_lstm_model.h5'")




Model terbaik telah disimpan ke dalam file 'best_lstm_model.h5'


In [None]:
# Stratified K-Fold Cross-Validation Setup
kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

f1_scores = []
precision_scores = []
recall_scores = []
f1_macro_scores = []
roc_auc_scores = []
conf_matrices = []

In [None]:
# Stratified K-Fold Cross-validation loop
for train_index, test_index in kf.split(X_lstm, y):
    X_train, X_test = X_lstm[train_index], X_lstm[test_index]
    y_train, y_test = y[train_index], y[test_index]

In [None]:
# Membuat ulang model LSTM di setiap fold
model = Sequential()
model.add(LSTM(192, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True))
model.add(Dropout(0.1))
model.add(LSTM(96, return_sequences=False))
model.add(Dropout(0.1))
model.add(Dense(1, activation='sigmoid'))

  super().__init__(**kwargs)


In [None]:
# Kompilasi model
model.compile(optimizer=Adam(learning_rate=0.0016583485942236376), loss='binary_crossentropy', metrics=['f1_score','precision','recall'])

# EarlyStopping
early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Melatih model
model.fit(X_train, y_train, epochs=5, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stop], verbose=0)

<keras.src.callbacks.history.History at 0x7ca9b4c80150>

In [None]:
model.summary()

In [None]:
# Evaluasi model pada fold ini
y_pred = model.predict(X_test).round()

f1_scores.append(f1_score(y_test, y_pred))
precision_scores.append(precision_score(y_test, y_pred))
recall_scores.append(recall_score(y_test, y_pred))

# F1 Score Macro (Rata-rata F1 Score untuk semua kelas)
f1_macro_scores.append(f1_score(y_test, y_pred, average='macro'))

# Menghitung dan menyimpan confusion matrix
conf_matrices.append(confusion_matrix(y_test, y_pred))

[1m4601/4601[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 2ms/step


In [None]:
# Rata-rata metrik untuk eksperimen LSTM dengan Stratified K-Fold
print("Eksperimen LSTM dengan Stratified K-Fold - Rata-rata Metrik Evaluasi:")
print(f"F1 Score (Micro): {np.mean(f1_scores)}")
print(f"Precision: {np.mean(precision_scores)}")
print(f"Recall: {np.mean(recall_scores)}")
print(f"F1 Score (Macro): {np.mean(f1_macro_scores)}")

Eksperimen LSTM dengan Stratified K-Fold - Rata-rata Metrik Evaluasi:
F1 Score (Micro): 0.49774236387782206
Precision: 0.7326035965598123
Recall: 0.37691069991954945
F1 Score (Macro): 0.7422800351978863


In [None]:
# Rata-rata Confusion Matrix
avg_conf_matrix = np.mean(conf_matrices, axis=0).astype(int)
print("\nRata-rata Confusion Matrix (10-Fold):")
print(avg_conf_matrix)


Rata-rata Confusion Matrix (10-Fold):
[[141559    684]
 [  3098   1874]]


In [None]:
# Menyimpan model terbaik
best_model = load_model('best_lstm_model.h5')
print("Model terbaik berhasil dimuat dari file 'best_lstm_model.h5'")

In [None]:
# Evaluasi dengan Class Weights
class_weights = compute_class_weight('balanced', classes=np.unique(y), y=y)
class_weight_dict = dict(zip(np.unique(y), class_weights))

# Stratified K-Fold Cross-validation dengan Class Weights
for train_index, test_index in kf.split(X_lstm, y):
    X_train, X_test = X_lstm[train_index], X_lstm[test_index]
    y_train, y_test = y[train_index], y[test_index]

In [None]:
# LSTM Model with Class Weights
model_class_weights = Sequential()
model_class_weights.add(LSTM(192, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True))
model_class_weights.add(Dropout(0.1))
model_class_weights.add(LSTM(96, return_sequences=False))
model_class_weights.add(Dropout(0.1))
model_class_weights.add(Dense(1, activation='sigmoid'))


  super().__init__(**kwargs)


In [None]:
# Kompilasi model dengan class_weights
model_class_weights.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['f1_score','precision','recall'])

# Melatih model dengan class weights
model_class_weights.fit(X_train, y_train, epochs=5, batch_size=32, class_weight=class_weight_dict, validation_data=(X_test, y_test), verbose=0)

# Prediksi dan evaluasi
y_pred_class_weights = model_class_weights.predict(X_test).round()

f1_scores.append(f1_score(y_test, y_pred_class_weights))
precision_scores.append(precision_score(y_test, y_pred_class_weights))
recall_scores.append(recall_score(y_test, y_pred_class_weights))

[1m4601/4601[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 2ms/step


In [None]:
# Evaluasi dengan Class Weights
print("Eksperimen LSTM dengan Class Weights - Rata-rata Metrik Evaluasi:")
print(f"F1 Score: {np.mean(f1_scores)}")
print(f"Precision: {np.mean(precision_scores)}")
print(f"Recall: {np.mean(recall_scores)}")

Eksperimen LSTM dengan Class Weights - Rata-rata Metrik Evaluasi:
F1 Score: 0.46671832838798355
Precision: 0.5088485170978114
Recall: 0.6502413515687852
