In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/finaldata-energyconsumption/final_data.csv


In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Bidirectional, LSTM, Dense
from sklearn.metrics import mean_absolute_percentage_error

# Load the data
data = pd.read_csv('/kaggle/input/finaldata-energyconsumption/final_data.csv')

# Combine Date and Time into a single timestamp column
data['timestamp'] = pd.to_datetime(data['Date'] + ' ' + data['Time'])

# Drop the original Date and Time columns
data.drop(['Date', 'Time'], axis=1, inplace=True)

# Sort the data by timestamp
data = data.sort_values(by='timestamp')

# Set the 'timestamp' as the index
data.set_index('timestamp', inplace=True)

# Select the target and features
target = 'Global_active_power'
features = ['Global_reactive_power', 'Voltage', 'Global_intensity', 'Sub_metering_1', 'Sub_metering_2', 'Sub_metering_3']

# Ensure all features are numeric
data[features + [target]] = data[features + [target]].apply(pd.to_numeric, errors='coerce')

# Drop rows with missing or invalid values
data = data.dropna()

# Normalize the data
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(data[features + [target]])

# Create sequences for BiLSTM
def create_sequences(data, target_index, seq_length=24):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length, :-1])  # Exclude the target from features
        y.append(data[i+seq_length, target_index])
    return np.array(X), np.array(y)

seq_length = 24  # 24-hour lookback
target_index = len(features)  # Index of the target in the scaled data
X, y = create_sequences(data_scaled, target_index, seq_length)

# Split into train and test sets
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

# Build the BiLSTM model
model = Sequential([
    Bidirectional(LSTM(50, activation='tanh', input_shape=(seq_length, len(features)), return_sequences=True)),
    Bidirectional(LSTM(50, activation='tanh')),
    Dense(1)
])

model.compile(optimizer='adam', loss='mse')

# Train the model
model.fit(X_train, y_train, epochs=60, batch_size=256, validation_data=(X_test, y_test))

# Make predictions
y_pred = model.predict(X_test)

# Rescale predictions and true values
y_pred_rescaled = scaler.inverse_transform(
    np.concatenate([np.zeros((len(y_pred), len(features))), y_pred], axis=1)
)[:, target_index]
y_test_rescaled = scaler.inverse_transform(
    np.concatenate([np.zeros((len(y_test), len(features))), y_test.reshape(-1, 1)], axis=1)
)[:, target_index]

# Evaluate using MAPE
mape = mean_absolute_percentage_error(y_test_rescaled, y_pred_rescaled)
print(f'MAPE: {mape:.2f}%')

# Save predictions to CSV
predictions = pd.DataFrame({
    'timestamp': data.index[-len(y_test):],
    'actual': y_test_rescaled,
    'predicted': y_pred_rescaled
})
predictions.to_csv('predicted_energy_consumption.csv', index=False)
print("Predictions saved to 'predicted_energy_consumption.csv'.")

  data = pd.read_csv('/kaggle/input/finaldata-energyconsumption/final_data.csv')
  data['timestamp'] = pd.to_datetime(data['Date'] + ' ' + data['Time'])
  super().__init__(**kwargs)


Epoch 1/60
[1m6404/6404[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m73s[0m 10ms/step - loss: 8.8303e-04 - val_loss: 3.6673e-04
Epoch 2/60
[1m6404/6404[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 10ms/step - loss: 5.3968e-04 - val_loss: 3.5881e-04
Epoch 3/60
[1m6404/6404[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 10ms/step - loss: 5.1540e-04 - val_loss: 3.5392e-04
Epoch 4/60
[1m6404/6404[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 10ms/step - loss: 5.0185e-04 - val_loss: 3.4866e-04
Epoch 5/60
[1m6404/6404[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 10ms/step - loss: 4.8940e-04 - val_loss: 3.3945e-04
Epoch 6/60
[1m6404/6404[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 10ms/step - loss: 4.6641e-04 - val_loss: 3.4245e-04
Epoch 7/60
[1m6404/6404[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 10ms/step - loss: 4.5387e-04 - val_loss: 3.3632e-04
Epoch 8/60
[1m6404/6404[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s