In [1]:
import pandas as pd
df = pd.read_csv('new_dataset.csv')
df.head()


Unnamed: 0,valid_time,latitude,longitude,number,expver,u10,v10,d2m,t2m,sp,...,hcc,lcc,mcc,tcc,e,cp,lsp,ptype,sf,z
0,2023-01-01 00:00:00,31.0,77.5,0,1,-0.859558,0.626511,270.64673,273.42407,80197.13,...,0.835052,0.0,0.984222,0.991821,-5.702022e-07,0.0,0.0,0.0,0.0,19556.635
1,2023-01-01 00:00:00,31.0,77.75,0,1,-0.664246,-0.819778,263.54907,271.98853,79665.13,...,0.88974,0.0,0.968719,0.984161,-3.297115e-06,0.0,0.0,0.0,0.0,20091.033
2,2023-01-01 00:00:00,31.0,78.0,0,1,-0.497253,-1.05806,254.27562,271.30103,76393.13,...,0.899414,0.0,0.938049,0.961334,-5.457783e-06,0.0,0.0,0.0,0.0,23481.467
3,2023-01-01 00:00:00,31.0,78.25,0,1,-0.787292,-1.046341,252.62328,267.635,72034.13,...,0.695007,0.0,0.956085,0.971252,-3.773952e-06,0.0,0.0,0.0,0.0,28226.463
4,2023-01-01 00:00:00,31.0,78.5,0,1,-0.747253,-1.185989,253.39671,262.30103,66061.13,...,0.311951,0.0,0.987366,0.993134,-5.851034e-07,0.0,0.0,0.0,1.192093e-07,35119.863


In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Load CSV data
data = pd.read_csv('new_dataset.csv')

# Fill missing data
data.fillna(method='ffill', inplace=True)

# Feature Engineering for Heat Index (Real-Feel Temperature)
# Use dewpoint temperature and air temperature (d2m and t2m) for heat index estimation
data['heat_index'] = 0.5 * (data['t2m'] + data['d2m'])

# Target variables: labels for each task
y_storm_event = data['tp']  # total precipitation for storm event prediction
y_heat_index = data['heat_index']  # for heat index prediction
y_drought = data['e']  # evaporation for drought index prediction

# Features
X = data[['u10', 'v10', 'd2m', 't2m', 'sp', 'tp', 'skt', 'ssrd', 'hcc', 'lcc', 'mcc', 'tcc', 'z']]

# Scaling the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_storm_train, y_storm_test, y_heat_train, y_heat_test, y_drought_train, y_drought_test = train_test_split(
    X_scaled, y_storm_event, y_heat_index, y_drought, test_size=0.2, random_state=42
)

# Reshape input data for LSTM [samples, time_steps, features]
X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))

### 2. **LSTM Model for Multi-task Learning**

# Define the LSTM model
model = Sequential()
model.add(LSTM(128, input_shape=(1, X_train.shape[2]), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(64))
model.add(Dropout(0.2))

# Output layers for each task
model.add(Dense(3))  # One output for each task (storm, heat, drought)

model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Train the model
history = model.fit(X_train, np.column_stack([y_storm_train, y_heat_train, y_drought_train]),
                    epochs=50, batch_size=32, validation_split=0.2)

### 3. **Evaluation**
# Evaluate on test data
results = model.evaluate(X_test, np.column_stack([y_storm_test, y_heat_test, y_drought_test]))
print("Test Loss, Test MAE:", results)


  data.fillna(method='ffill', inplace=True)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Test Loss, Test MAE: [0.3337078392505646, 0.2819579541683197]


In [3]:
from sklearn.metrics import mean_squared_error

# Make predictions on the test data
y_pred = model.predict(X_test)

# Split the predictions into respective tasks
y_storm_pred, y_heat_pred, y_drought_pred = y_pred[:, 0], y_pred[:, 1], y_pred[:, 2]

# Calculate MSE for each task
mse_storm = mean_squared_error(y_storm_test, y_storm_pred)
mse_heat = mean_squared_error(y_heat_test, y_heat_pred)
mse_drought = mean_squared_error(y_drought_test, y_drought_pred)

# Print the results
print(f"MSE for Storm Event Prediction: {mse_storm}")
print(f"MSE for Heat Index Prediction: {mse_heat}")
print(f"MSE for Drought Index Prediction: {mse_drought}")


MSE for Storm Event Prediction: 1.2339225821627374e-06
MSE for Heat Index Prediction: 1.0011203291645745
MSE for Drought Index Prediction: 1.5690173450880654e-06


Model performance is excellent for both storm event and drought index prediction, with extremely low MSE values.Heat index is moderately good.

In [4]:
# Compile the model
model.compile(
    optimizer='adam',                      # Optimizer (Adam works well for LSTMs)
    loss=['mean_squared_error',            # Loss function for each output task
          'mean_squared_error',
          'mean_squared_error'],
    loss_weights=[1.0, 1.0, 1.0],          # You can adjust the weights if you want to prioritize some tasks over others
    metrics=['mean_squared_error']          # Evaluation metric for the tasks
)

# Print model summary
model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 1, 128)            72704     
                                                                 
 dropout (Dropout)           (None, 1, 128)            0         
                                                                 
 lstm_1 (LSTM)               (None, 64)                49408     
                                                                 
 dropout_1 (Dropout)         (None, 64)                0         
                                                                 
 dense (Dense)               (None, 3)                 195       
                                                                 
Total params: 122307 (477.76 KB)
Trainable params: 122307 (477.76 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [5]:
# Save the entire model to a file
model.save('multi_task_lstm_model.h5')

print("Model saved successfully!")


Model saved successfully!


  saving_api.save_model(
