In [1]:
import pandas as pd

# Load the dataset
dataset_path = "C:/Users/ATAISH NEHRA/Downloads/merged_natural_disaster_dataset_1992_2020.csv"
data = pd.read_csv(dataset_path)

In [2]:
# Select relevant features
features = [
    'Country', 'Year', 'Temperature', 'Sea level Value', 
    'Artificial surfaces (including urban and associated areas): Climate altering', 
    'Grassland: Climate regulating', 'Woody crops: Climate regulating', 
    'Terrestrial barren land: Climate neutral', 'Shrub-covered areas: Climate regulating', 
    'Carbon stocks in forests', 'Forest area', 'Index of carbon stocks in forests', 
    'Index of forest extent', 'Land area'
]
target = ['Total Disasters']

In [3]:
# Filter the dataset for selected features and the target
data_selected = data[features + target]

# Display the first few rows to verify
print(data_selected.head())

       Country  Year  Temperature  Sea level Value  \
0  Afghanistan  1992       -0.294        -5.160000   
1  Afghanistan  1993        0.220        -0.245294   
2  Afghanistan  1994        0.430        -2.592353   
3  Afghanistan  1995        0.359         2.565000   
4  Afghanistan  1996       -0.116        -5.005455   

   Artificial surfaces (including urban and associated areas): Climate altering  \
0                                            25.3207                              
1                                            25.7741                              
2                                            26.1577                              
3                                            26.6867                              
4                                            27.2273                              

   Grassland: Climate regulating  Woody crops: Climate regulating  \
0                     24035.3850                          87.4339   
1                     24035.4149      

In [4]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
import numpy as np

# One-hot encode the 'Country' column and standardize the other numeric features
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), [col for col in features if col not in ['Country', 'Year']]),
        ('cat', OneHotEncoder(), ['Country'])
    ])

# Separate features and target
X = data_selected[features]
y = data_selected[target].values

# Apply preprocessing
X_preprocessed = preprocessor.fit_transform(X)

# For LSTM, we also need to reshape our target to be 3D (samples, timesteps, features)
# Currently simplifying by assuming one year per sample, so timesteps = 1 for now
y_reshaped = y.reshape((y.shape[0], 1, 1))

print(f"Shape of X_preprocessed: {X_preprocessed.shape}")
print(f"Shape of y_reshaped: {y_reshaped.shape}")

Shape of X_preprocessed: (899, 43)
Shape of y_reshaped: (899, 1, 1)


In [18]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dropout
from tensorflow.keras.regularizers import l1_l2
from tensorflow.keras.regularizers import l2

# Assuming X_preprocessed is already in a suitable shape for a simplified LSTM model
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_preprocessed, y_reshaped, test_size=0.2, random_state=42)

model = model = Sequential([
    LSTM(300, activation='tanh', return_sequences=True, input_shape=(X_train.shape[1], 1),
         kernel_regularizer=l2(0.001), recurrent_regularizer=l2(0.001), bias_regularizer=l2(0.001)),
    Dropout(0.2),
    LSTM(50, activation='tanh', return_sequences=False,
         kernel_regularizer=l2(0.001), recurrent_regularizer=l2(0.001), bias_regularizer=l2(0.001)),
    Dropout(0.2),
    Dense(2)
])
model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')  # Reduced learning rate for potentially finer convergence

# Reshape X to fit the LSTM input requirements: [samples, timesteps, features]
X_train_reshaped = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test_reshaped = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

# Train the model
history = model.fit(X_train_reshaped, y_train, epochs=500, validation_split=0.2, verbose=1)

# Print the model's architecture
model.summary()

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

In [19]:
model.save('lstm_natural_disasters_model.h9')



INFO:tensorflow:Assets written to: lstm_natural_disasters_model.h9\assets


INFO:tensorflow:Assets written to: lstm_natural_disasters_model.h9\assets


In [20]:
from tensorflow.keras.models import load_model
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

# Load the trained LSTM model
model = load_model('lstm_natural_disasters_model.h9')

# Assuming these are the relevant features based on previous steps
features = [
    'Temperature', 'Sea level Value',
    'Artificial surfaces (including urban and associated areas): Climate altering',
    'Grassland: Climate regulating', 'Woody crops: Climate regulating',
    'Terrestrial barren land: Climate neutral', 'Shrub-covered areas: Climate regulating',
    'Carbon stocks in forests', 'Forest area', 'Index of carbon stocks in forests',
    'Index of forest extent', 'Land area'
]

# Preprocessor setup (repeat from the training phase)
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), features),
        ('cat', OneHotEncoder(), ['Country'])
    ],
    remainder='drop'  # This drops the columns that we don't manually transform
)

In [21]:
# Fit the preprocessor on your original data again or ensure it's saved and loaded appropriately
# For simplicity, let's assume it's fit on the original dataset again
# IMPORTANT: This needs your original dataset loaded as `data`
data = pd.read_csv("C:/Users/ATAISH NEHRA/Downloads/merged_natural_disaster_dataset_1992_2020.csv")  # Adjust path as needed
preprocessor.fit(data[['Country'] + features])

def make_prediction(country, year_range, preprocessor, model):
    # Create dummy data for prediction
    predict_data = pd.DataFrame({
        'Country': [country for _ in year_range],
        'Year': list(year_range),
    })
    for feature in features:
        predict_data[feature] = 0  # Placeholder, replace with appropriate logic

    # Preprocess the data
    X_preprocessed = preprocessor.transform(predict_data)
    
    # Correctly reshape for LSTM
    X_reshaped = X_preprocessed.reshape((len(year_range), -1, 1))  # Adjusted reshaping

    # Make prediction
    predictions = model.predict(X_reshaped)
    return np.sum(predictions)


# Predict total disasters for a given country and range of years
country = 'Nepal'  # Example country
year_range = range(2021, 2026)  # Example year range
total_disasters = make_prediction(country, year_range, preprocessor, model)
print(f"Predicted total disasters for {country} in {year_range}: {total_disasters}")

Predicted total disasters for Nepal in range(2021, 2026): 18.408109664916992


In [22]:
# Load the trained LSTM model if not already loaded
model = load_model('lstm_natural_disasters_model.h6')

# Prepare X_test in the same way as X_train (this part is correct in your provided code)
X_test_reshaped = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

# Make predictions on the test data
y_pred = model.predict(X_test_reshaped)

# Flatten y_pred to ensure it's 2D (samples, features)
y_pred_flat = y_pred.flatten()

# Ensure y_test is flattened if it's not already
# This corrects the shape of y_test to match the flattened y_pred for evaluation
y_test_flat = y_test.reshape(-1)

# Calculate the evaluation metrics
from sklearn.metrics import mean_absolute_error, mean_squared_error
mae = mean_absolute_error(y_test_flat, y_pred_flat)
mse = mean_squared_error(y_test_flat, y_pred_flat)
rmse = np.sqrt(mse)

print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")

Mean Absolute Error (MAE): 3.8053341216511196
Mean Squared Error (MSE): 34.087010386735926
Root Mean Squared Error (RMSE): 5.838408206586443
