# Assignment 7 
## Objective
Build a Long Short-Term Memory (LSTM) recurrent neural network to predict the current global active power at the time step (t), given prior measurements at the time step (t-1).

# Recurrent Neural Network

## Part 1 - Data Preprocessing

### Importing the libraries

In [1]:
# Import required libraries
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import matplotlib.pyplot as plt

### Importing the training set

In [2]:
# Load the dataset
df = pd.read_csv('household_power_consumption.txt', sep=';', 
                            parse_dates={'DateTime': ['Date', 'Time']}, 
                            infer_datetime_format=True, 
                            low_memory=False,  # Prevent type inference issues
                            na_values=['?'])   # Treat '?' as NaN

# Display the first few rows
print("Dataset head:")
display(df.head())

Dataset head:


Unnamed: 0,DateTime,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3
0,2006-12-16 17:24:00,4.216,0.418,234.84,18.4,0.0,1.0,17.0
1,2006-12-16 17:25:00,5.36,0.436,233.63,23.0,0.0,1.0,16.0
2,2006-12-16 17:26:00,5.374,0.498,233.29,23.0,0.0,2.0,17.0
3,2006-12-16 17:27:00,5.388,0.502,233.74,23.0,0.0,1.0,17.0
4,2006-12-16 17:28:00,3.666,0.528,235.68,15.8,0.0,1.0,17.0


## Checking for missing data

In [3]:
# Check for missing values
print("\nMissing values:")
display(df.isnull().sum())

# Fill missing values (e.g., forward-fill)
df.fillna(method='ffill', inplace=True)


Missing values:


DateTime                     0
Global_active_power      25979
Global_reactive_power    25979
Voltage                  25979
Global_intensity         25979
Sub_metering_1           25979
Sub_metering_2           25979
Sub_metering_3           25979
dtype: int64

## Create Time Sequence

In [4]:
# Create sequences function
def prepare_sequences(data, seq_length):
    """Prepare sequences for time series prediction"""
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:(i + seq_length), 0])
        y.append(data[i + seq_length, 0])
    return np.array(X), np.array(y)

In [5]:
# Set parameters
seq_length = 400  # Number of time steps to look back
train_split = 0.8  # Training data percentage

# Extract and scale the data
data = df['Global_active_power'].values.reshape(-1, 1)
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(data)

# Create sequences
X, y = prepare_sequences(data_scaled, seq_length)
X = np.reshape(X, (X.shape[0], X.shape[1], 1))

## Split Train and Test size

In [6]:
# Split into train and test sets
train_size = int(len(X) * train_split)
X_train = X[:train_size]
y_train = y[:train_size]
X_test = X[train_size:]
y_test = y[train_size:]

print("Training set shape:", X_train.shape)
print("Test set shape:", X_test.shape)

Training set shape: (1659887, 400, 1)
Test set shape: (414972, 400, 1)


## Part 2 - Building and Training the RNN

In [17]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
import numpy as np

# Create a data generator to handle large datasets
class TimeSeriesGenerator(tf.keras.utils.Sequence):
    def __init__(self, data, targets, sequence_length, batch_size=128):
        self.data = data
        self.targets = targets
        self.sequence_length = sequence_length
        self.batch_size = batch_size
        self.indexes = np.arange(len(data) - sequence_length)
        
    def __len__(self):
        return (len(self.indexes) + self.batch_size - 1) // self.batch_size
    
    def __getitem__(self, index):
        start_idx = index * self.batch_size
        end_idx = min((index + 1) * self.batch_size, len(self.indexes))
        batch_indexes = self.indexes[start_idx:end_idx]
        
        batch_X = np.array([
            self.data[idx:idx + self.sequence_length] 
            for idx in batch_indexes
        ])
        batch_y = np.array([
            self.targets[idx + self.sequence_length - 1] 
            for idx in batch_indexes
        ])
        
        return batch_X, batch_y

# Data preparation
sequence_length = 24  # Adjust based on your needs
batch_size = 128     # Reduced batch size to manage memory

# Create generators for training and validation
# Assuming X_train and y_train are your original data arrays
train_size = int(0.8 * len(X_train))
X_train_data = X_train[:train_size]
y_train_data = y_train[:train_size]
X_val_data = X_train[train_size:]
y_val_data = y_train[train_size:]

train_generator = TimeSeriesGenerator(
    X_train_data, 
    y_train_data,
    sequence_length,
    batch_size=batch_size
)

val_generator = TimeSeriesGenerator(
    X_val_data,
    y_val_data,
    sequence_length,
    batch_size=batch_size
)

# Create the model
regressor = Sequential()

# First LSTM layer
regressor.add(LSTM(units=32,  # Reduced units
                  activation='tanh',
                  recurrent_activation='sigmoid',
                  return_sequences=True,
                  input_shape=(sequence_length, 1)))
regressor.add(BatchNormalization())
regressor.add(Dropout(0.2))

# Second LSTM layer
regressor.add(LSTM(units=16,  # Reduced units
                  activation='tanh',
                  recurrent_activation='sigmoid',
                  return_sequences=False))
regressor.add(BatchNormalization())
regressor.add(Dropout(0.2))

# Output layer
regressor.add(Dense(units=1))

# Compile with optimized settings
optimizer = Adam(learning_rate=0.001)
regressor.compile(optimizer=optimizer,
                 loss='mse',
                 metrics=['mae'])

# Callbacks
callbacks = [
    tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=5,
        restore_best_weights=True
    ),
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=3,
        min_lr=0.00001
    )
]

# Train the model using generators
history = regressor.fit(
    train_generator,
    validation_data=val_generator,
    epochs=100,
    callbacks=callbacks
)

# Function to make predictions (use when needed)
def predict_sequence(model, input_sequence):
    return model.predict(input_sequence.reshape(1, sequence_length, 1))

  super().__init__(**kwargs)
  self._warn_if_super_not_called()


Epoch 1/100


ValueError: Exception encountered when calling LSTMCell.call().

[1mDimensions must be equal, but are 400 and 1 for '{{node sequential_7_1/lstm_14_1/lstm_cell_1/MatMul}} = MatMul[T=DT_HALF, grad_a=false, grad_b=false, transpose_a=false, transpose_b=false](sequential_7_1/lstm_14_1/strided_slice_2, sequential_7_1/lstm_14_1/lstm_cell_1/Cast/Cast)' with input shapes: [?,400], [1,128].[0m

Arguments received by LSTMCell.call():
  • inputs=tf.Tensor(shape=(None, 400), dtype=float16)
  • states=('tf.Tensor(shape=(None, 32), dtype=float16)', 'tf.Tensor(shape=(None, 32), dtype=float16)')
  • training=True