In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.regularizers import L2
from sklearn.metrics import mean_squared_error

In [3]:
# Load the dataset
data = pd.read_csv('data/processedX.csv')

In [4]:
# Extract temperature and humidity columns
temp_columns = [col for col in data.columns if 'temperature_2m' in col]
hum_columns = [col for col in data.columns if 'relative_humidity_2m' in col]


In [5]:
# Normalize the data
scaler_temp = MinMaxScaler()
scaler_hum = MinMaxScaler()
scaled_temp = scaler_temp.fit_transform(data[temp_columns])
scaled_hum = scaler_hum.fit_transform(data[hum_columns])
scaled_data = np.hstack((scaled_temp, scaled_hum))


In [6]:
# Create sequences for time series forecasting
def create_sequences(data, lookback=30):
    sequences = []
    targets = []
    for i in range(len(data) - lookback):
        sequences.append(data[i:i + lookback])
        targets.append(data[i + lookback])
    return np.array(sequences), np.array(targets)


In [7]:
# Create sequences
X, y = create_sequences(scaled_data, lookback=30)


In [8]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)


In [9]:
# Define a more complex LSTM model
model = Sequential([
    Bidirectional(LSTM(256, activation='relu', return_sequences=True,
                       input_shape=(X_train.shape[1], X_train.shape[2]), kernel_regularizer=L2(0.001))),
    Dropout(0.4),
    Bidirectional(LSTM(256, activation='relu', return_sequences=True, kernel_regularizer=L2(0.001))),
    Dropout(0.4),
    Bidirectional(LSTM(128, activation='relu')),
    Dropout(0.3),
    Dense(128, activation='relu'),
    Dropout(0.2),
    Dense(64, activation='relu'),
    Dense(14)  # 7 temperature + 7 humidity predictions
])

  super().__init__(**kwargs)


In [10]:
# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss='mse')


In [11]:
# Callbacks for early stopping and learning rate reduction
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)


In [None]:
# Train the model with a larger batch size (128)
history = model.fit(X_train, y_train, epochs=100, batch_size=128, validation_data=(X_test, y_test),
                    callbacks=[early_stopping, reduce_lr])


Epoch 1/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 402ms/step - loss: 1.6740 - val_loss: 1.2928 - learning_rate: 1.0000e-04
Epoch 2/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 377ms/step - loss: 1.2561 - val_loss: 1.0933 - learning_rate: 1.0000e-04
Epoch 3/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 378ms/step - loss: 1.0548 - val_loss: 0.9303 - learning_rate: 1.0000e-04
Epoch 4/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 375ms/step - loss: 0.8958 - val_loss: 0.7827 - learning_rate: 1.0000e-04
Epoch 5/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 397ms/step - loss: 0.7530 - val_loss: 0.6519 - learning_rate: 1.0000e-04
Epoch 6/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 363ms/step - loss: 0.6289 - val_loss: 0.5464 - learning_rate: 1.0000e-04
Epoch 7/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 353ms/step - loss: 0.52

In [None]:
# Save the trained model
model.save('lstm_temperature_humidity_model.h5')  # Save the model to a file


In [None]:
# Evaluate the model
loss = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss:.4f}")


In [None]:
# Make predictions
predictions = model.predict(X_test)


In [None]:
# Reverse scaling to get actual temperature and humidity values
predicted_temp = scaler_temp.inverse_transform(predictions[:, :7])
predicted_hum = scaler_hum.inverse_transform(predictions[:, 7:])
actual_temp = scaler_temp.inverse_transform(y_test[:, :7])
actual_hum = scaler_hum.inverse_transform(y_test[:, 7:])


In [None]:
# Calculate RMSE for temperature and humidity
rmse_temp = np.sqrt(mean_squared_error(actual_temp, predicted_temp))
rmse_hum = np.sqrt(mean_squared_error(actual_hum, predicted_hum))


In [None]:
# Calculate accuracy percentage
accuracy_temp = (1 - rmse_temp / np.mean(actual_temp)) * 100
accuracy_hum = (1 - rmse_hum / np.mean(actual_hum)) * 100

print(f"Temperature RMSE: {rmse_temp:.2f}")
print(f"Humidity RMSE: {rmse_hum:.2f}")
print(f"Temperature Accuracy: {accuracy_temp:.2f}%")
print(f"Humidity Accuracy: {accuracy_hum:.2f}%")


In [None]:
# Plot training & validation loss
plt.figure(figsize=(10, 5))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend()
plt.show()


In [None]:
# Plot actual vs predicted temperature
plt.figure(figsize=(10, 5))
plt.plot(predicted_temp[:, 0], label='Predicted Temperature')
plt.plot(actual_temp[:, 0], label='Actual Temperature')
plt.legend()
plt.title('Temperature: Predicted vs Actual')
plt.show()


In [None]:
# Plot actual vs predicted humidity
plt.figure(figsize=(10, 5))
plt.plot(predicted_hum[:, 0], label='Predicted Humidity')
plt.plot(actual_hum[:, 0], label='Actual Humidity')
plt.legend()
plt.title('Humidity: Predicted vs Actual')
plt.show()