In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn

In [None]:
df = pd.read_csv(
	"data/household_power_consumption.txt",
	sep=';',
	low_memory=False
)

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.info()

We can see that data has inappropriate format (Dtype). All the column except Date and Time should be float. Computer will not work with strings, we try to avoid this format.

In [None]:
columns = list(df.columns)
columns.remove("Date")
columns.remove("Time")

for column in columns:
	df[column] = pd.to_numeric(df[column], errors="coerce")

In [None]:
df.info()

To get away from string columns, we combine Date and Time columns and put them as index

In [None]:
df["full_time"] = pd.to_datetime(df["Date"] + ' ' + df["Time"], dayfirst=True)

df.set_index("full_time", inplace=True)

df.drop(columns=["Date", "Time"], inplace=True)

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
df.isna().sum(axis=0)

We have to fill these NaN values. We can do it by replacing them with median or mean of the column. I think median is more appropriate, because electricity consumptions distribution is very skewed. Mean is very vulnurable to outliers or skewed distributions

In [None]:
df.fillna(df.median(), inplace=True)

In [None]:
df.isna().sum(axis=0)

In [None]:
df.hist(figsize=(15, 10), bins=40)

from these graphs we can say that our data has a lot of outliers. Normal distribution has only Voltage feature.

In [None]:
sns.heatmap(df.corr(), annot=True, cmap="coolwarm")

Standard train_test_split would be a bad option for us, because we work with sequential data. We must cut the data chronologically;

In [None]:
split_id = int(len(df) * 0.8)

X = df.drop(columns=["Global_active_power"])
y = df["Global_active_power"]

X_train = X.iloc[:split_id, :]
X_test = X.iloc[split_id:, :]

y_train = y.iloc[:split_id]
y_test = y.iloc[split_id:]

In [None]:
print(X_train.shape)
print(y_train.shape)

In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
def create_sequences(X, y, seq_len=60):
    Xs, ys = [], []
    for i in range(len(X) - seq_len):
        Xs.append(X[i:i+seq_len])
        ys.append(y[i+seq_len])
    return np.array(Xs), np.array(ys)

An LSTM cannot learn from single time steps. It needs a sequence of past observations to predict a future value so we convert time-series into sliding windows

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
SEQ_LEN = 60

X_train_seq, y_train_seq = create_sequences(X_train, y_train.values, SEQ_LEN)
X_test_seq, y_test_seq = create_sequences(X_test, y_test.values, SEQ_LEN)

X_train_t = torch.tensor(X_train_seq, dtype=torch.float32)
y_train_t = torch.tensor(y_train_seq, dtype=torch.float32).unsqueeze(1).to(device)

X_test_t = torch.tensor(X_test_seq, dtype=torch.float32)
y_test_t = torch.tensor(y_test_seq, dtype=torch.float32).unsqueeze(1).to(device)

In [None]:
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = out[:, -1, :]
        out = self.fc(out)
        return out

In [None]:
model = LSTM(
    input_size=X_train_seq.shape[2],
    hidden_size=64,
    output_size=1,
    num_layers=2
).to(device)

In [None]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

As a loss function we use Mean Squared Error (MSE) because it is regression task. Also it penalizes larger errors more strongly.

In [None]:
for epoch in range(EPOCHS):
    model.train()
    start_time = time.time()
    permutation = torch.randperm(X_train_t.size(0))
    epoch_loss = 0

    for i in range(0, X_train_t.size(0), BATCH_SIZE):
        indices = permutation[i:i+BATCH_SIZE]

        batch_x = X_train_t[indices]
        batch_y = y_train_t[indices]

        optimizer.zero_grad()
        outputs = model(batch_x)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

        # ETA calculation
        batches_done = i + BATCH_SIZE
        batches_total = X_train_t.size(0)
        elapsed = time.time() - start_time

        progress = min(batches_done / batches_total, 1.0)
        eta = elapsed * (1 - progress) / progress if progress > 0 else 0

        print(
            f"\rEpoch {epoch+1}/{EPOCHS} "
            f"- Loss: {epoch_loss:.4f} "
            f"- ETA: {eta:.1f}s",
            end=""
        )

    print()

In [None]:
model.eval()
with torch.no_grad():
    y_pred = model(X_test_t).numpy()

rmse = np.sqrt(mean_squared_error(y_test_seq, y_pred))
mae = mean_absolute_error(y_test_seq, y_pred)

print("RMSE:", rmse)
print("MAE:", mae)

In [None]:
plt.figure(figsize=(12, 5))
plt.plot(y_test_seq[:1000], label="True")
plt.plot(y_pred[:1000], label="Predicted")
plt.legend()
plt.title("Global Active Power Prediction")
plt.show()