# Weather Forecasting RNN - Minimal Version (For Memorization)

Time series prediction: RainTomorrow using LSTM


In [None]:
# Imports
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt

np.random.seed(42)
tf.random.set_seed(42)


In [None]:
# Load data
df = pd.read_csv('rain_forecasting assign4.csv')
df['Date'] = pd.to_datetime(df['Date'])
df = df.sort_values('Date').reset_index(drop=True)

# Encode categorical
label_encoder = LabelEncoder()
df['RainToday'] = label_encoder.fit_transform(df['RainToday'])
df['RainTomorrow'] = label_encoder.transform(df['RainTomorrow'])

# Select features
feature_columns = ['MinTemp', 'MaxTemp', 'Humidity9am', 'Humidity3pm', 
                   'Pressure9am', 'Pressure3pm', 'WindSpeed9am', 'WindSpeed3pm', 'RainToday']
X = df[feature_columns].values
y = df['RainTomorrow'].values

print(f"X shape: {X.shape}, y shape: {y.shape}")


In [None]:
# Normalize features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)


In [None]:
# Create sequences
def create_sequences(data, targets, lookback=7):
    X_seq = []
    y_seq = []
    for i in range(lookback, len(data)):
        X_seq.append(data[i-lookback:i])
        y_seq.append(targets[i])
    return np.array(X_seq), np.array(y_seq)

lookback = 7
X_seq, y_seq = create_sequences(X_scaled, y, lookback=lookback)
print(f"Sequence shape: {X_seq.shape}")


In [None]:
# Split chronologically (not random!)
split_idx = int(len(X_seq) * 0.8)
X_train = X_seq[:split_idx]
X_test = X_seq[split_idx:]
y_train = y_seq[:split_idx]
y_test = y_seq[split_idx:]
print(f"Train: {X_train.shape}, Test: {X_test.shape}")


In [None]:
# Build LSTM model
model = keras.Sequential([
    layers.LSTM(50, return_sequences=True, input_shape=(lookback, X_train.shape[2])),
    layers.Dropout(0.2),
    layers.LSTM(50, return_sequences=False),
    layers.Dropout(0.2),
    layers.Dense(25, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

model.summary()


In [None]:
# Compile
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [None]:
# Train
history = model.fit(X_train, y_train, epochs=30, batch_size=32, validation_split=0.2, verbose=1)


In [None]:
# Evaluate
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=1)
print(f"Test Accuracy: {test_accuracy:.4f}")

# Predict
y_pred_proba = model.predict(X_test)
y_pred = (y_pred_proba >= 0.5).astype(int).flatten()

print(classification_report(y_test, y_pred, target_names=['No Rain', 'Rain']))


In [None]:
# Plot training history
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train')
plt.plot(history.history['val_accuracy'], label='Val')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train')
plt.plot(history.history['val_loss'], label='Val')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
# Confusion matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(6, 5))
plt.imshow(cm, cmap='Blues')
plt.colorbar()
plt.xticks([0, 1], ['No Rain', 'Rain'])
plt.yticks([0, 1], ['No Rain', 'Rain'])
for i in range(2):
    for j in range(2):
        plt.text(j, i, cm[i, j], ha='center', va='center', fontsize=14, fontweight='bold')
plt.ylabel('True')
plt.xlabel('Predicted')
plt.title('Confusion Matrix')
plt.tight_layout()
plt.show()


In [None]:
# Save model
model.save('weather_forecasting_model.h5')
print("Model saved!")
