# LSTM Forecasting for Household Power Consumption
## Energy Consumption Optimization using Deep Learning

This notebook builds an LSTM model to forecast household power consumption for optimizing energy usage in buildings and homes.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
import warnings
warnings.filterwarnings('ignore')

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

print("Libraries imported successfully!")

## 1. Load and Explore Data

In [None]:
# Load the household power consumption dataset
df = pd.read_csv('household_power_consumption.txt', sep=';', low_memory=False)

# Display basic information
print("Dataset shape:", df.shape)
print("\nFirst few rows:")
print(df.head())
print("\nData types:")
print(df.dtypes)
print("\nMissing values:")
print(df.isnull().sum())

## 2. Data Preprocessing

In [None]:
# Replace '?' with NaN and convert to numeric
df.replace('?', np.nan, inplace=True)

# Convert columns to numeric
for col in df.columns[2:]:
    df[col] = pd.to_numeric(df[col], errors='coerce')

# Handle missing values - forward fill then backward fill
df.fillna(method='ffill', inplace=True)
df.fillna(method='bfill', inplace=True)

# Create datetime column
df['DateTime'] = pd.to_datetime(df['Date'] + ' ' + df['Time'], format='%d/%m/%Y %H:%M:%S')
df.set_index('DateTime', inplace=True)
df.drop(['Date', 'Time'], axis=1, inplace=True)

print("Data after preprocessing:")
print(df.head())
print("\nShape:", df.shape)
print("\nMissing values after preprocessing:")
print(df.isnull().sum())

## 3. Feature Selection and Visualization

In [None]:
# Use Global Active Power as the target variable
data = df[['Global_active_power']].values

# Visualize the data
plt.figure(figsize=(14, 5))
plt.plot(data, linewidth=0.5)
plt.title('Household Global Active Power Consumption Over Time', fontsize=14, fontweight='bold')
plt.xlabel('Time Period')
plt.ylabel('Global Active Power (kW)')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

print(f"Data statistics:")
print(f"Mean: {data.mean():.2f} kW")
print(f"Std: {data.std():.2f} kW")
print(f"Min: {data.min():.2f} kW")
print(f"Max: {data.max():.2f} kW")

## 4. Normalize Data

In [None]:
# Normalize the data using MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data)

print(f"Scaled data shape: {scaled_data.shape}")
print(f"Scaled data range: [{scaled_data.min():.4f}, {scaled_data.max():.4f}]")

## 5. Create Sequences for LSTM

In [None]:
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length])
    return np.array(X), np.array(y)

# Create sequences with lookback window of 60 time steps
seq_length = 60
X, y = create_sequences(scaled_data, seq_length)

print(f"X shape: {X.shape}")
print(f"y shape: {y.shape}")

# Split into train and test sets (80-20 split)
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

print(f"\nTraining set size: {X_train.shape[0]}")
print(f"Test set size: {X_test.shape[0]}")

## 6. Build LSTM Model

In [None]:
# Build the LSTM model
model = Sequential([
    LSTM(50, activation='relu', input_shape=(seq_length, 1), return_sequences=True),
    Dropout(0.2),
    LSTM(50, activation='relu', return_sequences=True),
    Dropout(0.2),
    LSTM(25, activation='relu'),
    Dropout(0.2),
    Dense(1)
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=['mae'])

# Display model architecture
model.summary()

## 7. Train the Model

In [None]:
# Train the model
history = model.fit(
    X_train, y_train,
    epochs=20,
    batch_size=32,
    validation_split=0.1,
    verbose=1
)

print("\nTraining completed!")

## 8. Plot Training History

In [None]:
# Plot training history
fig, axes = plt.subplots(1, 2, figsize=(14, 4))

axes[0].plot(history.history['loss'], label='Training Loss')
axes[0].plot(history.history['val_loss'], label='Validation Loss')
axes[0].set_title('Model Loss', fontweight='bold')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

axes[1].plot(history.history['mae'], label='Training MAE')
axes[1].plot(history.history['val_mae'], label='Validation MAE')
axes[1].set_title('Model MAE', fontweight='bold')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('MAE')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 9. Make Predictions

In [None]:
# Make predictions on test set
y_pred = model.predict(X_test)

# Inverse transform to get actual values
y_test_actual = scaler.inverse_transform(y_test.reshape(-1, 1))
y_pred_actual = scaler.inverse_transform(y_pred)

print(f"Predictions shape: {y_pred_actual.shape}")
print(f"First 10 predictions: {y_pred_actual[:10].flatten()}")

## 10. Evaluate Model Performance

In [None]:
# Calculate metrics
mse = mean_squared_error(y_test_actual, y_pred_actual)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test_actual, y_pred_actual)
r2 = r2_score(y_test_actual, y_pred_actual)

print("Model Performance Metrics:")
print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f} kW")
print(f"Mean Absolute Error (MAE): {mae:.4f} kW")
print(f"R² Score: {r2:.4f}")

## 11. Visualize Predictions

In [None]:
# Plot actual vs predicted values
plt.figure(figsize=(14, 6))
plt.plot(y_test_actual, label='Actual', linewidth=2)
plt.plot(y_pred_actual, label='Predicted', linewidth=2, alpha=0.7)
plt.title('Actual vs Predicted Power Consumption', fontsize=14, fontweight='bold')
plt.xlabel('Time Period')
plt.ylabel('Global Active Power (kW)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Plot zoomed view
plt.figure(figsize=(14, 6))
plt.plot(y_test_actual[:500], label='Actual', linewidth=2)
plt.plot(y_pred_actual[:500], label='Predicted', linewidth=2, alpha=0.7)
plt.title('Actual vs Predicted Power Consumption (First 500 samples)', fontsize=14, fontweight='bold')
plt.xlabel('Time Period')
plt.ylabel('Global Active Power (kW)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## 12. Save Model and Scaler

In [None]:
import pickle

# Save the trained model
model.save('lstm_power_model.h5')
print("Model saved as 'lstm_power_model.h5'")

# Save the scaler
with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)
print("Scaler saved as 'scaler.pkl'")

# Save sequence length for reference
with open('seq_length.pkl', 'wb') as f:
    pickle.dump(seq_length, f)
print("Sequence length saved as 'seq_length.pkl'")

## 13. Energy Optimization Insights

In [None]:
# Calculate energy optimization metrics
avg_consumption = y_test_actual.mean()
peak_consumption = y_test_actual.max()
low_consumption = y_test_actual.min()

print("\n=== ENERGY CONSUMPTION ANALYSIS ===")
print(f"Average Power Consumption: {avg_consumption:.2f} kW")
print(f"Peak Power Consumption: {peak_consumption:.2f} kW")
print(f"Minimum Power Consumption: {low_consumption:.2f} kW")
print(f"Peak-to-Average Ratio: {peak_consumption/avg_consumption:.2f}x")

# Identify high consumption periods
high_threshold = avg_consumption * 1.5
high_periods = (y_test_actual > high_threshold).sum()
print(f"\nHigh consumption periods (>1.5x average): {high_periods} out of {len(y_test_actual)} ({high_periods/len(y_test_actual)*100:.1f}%)")

# Prediction error analysis
errors = np.abs(y_test_actual - y_pred_actual)
print(f"\n=== PREDICTION ACCURACY ===")
print(f"Mean Absolute Error: {errors.mean():.4f} kW")
print(f"Max Error: {errors.max():.4f} kW")
print(f"Error Std Dev: {errors.std():.4f} kW")