In [1]:
import pandas as pd
import seaborn as sns
from sklearn.metrics import mean_squared_error
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import make_scorer
import time
from sklearn.preprocessing import LabelEncoder

In [16]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, InputLayer, Dropout
from sklearn.preprocessing import MinMaxScaler

In [3]:
train = pd.read_csv("train_with_seasonal_cluster.csv")
test = pd.read_csv("test_with_seasonal_cluster.csv")

In [4]:
series = train['electricity_consumption'].values.reshape(-1, 1)
scaler = MinMaxScaler()
series_scaled = scaler.fit_transform(series)

In [5]:
series_test = test['electricity_consumption'].values.reshape(-1, 1) if 'electricity_consumption' in test.columns else None

In [6]:
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length])
    return np.array(X), np.array(y)

In [7]:
seq_length = 12  # Number of previous time steps to use
X, y = create_sequences(series_scaled, seq_length)

In [8]:
split_idx = len(train) - len(test)
X_train = X[:split_idx - seq_length]
y_train = y[:split_idx - seq_length]

In [17]:
model = Sequential([
    LSTM(100, activation='relu', input_shape=(seq_length, 1), return_sequences=True),
    Dropout(0.2),
    LSTM(50, activation='relu'),  
    Dropout(0.2),
    Dense(1)
])

model.compile(optimizer='adam', loss='mse')
model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=2)

Epoch 1/50
259/259 - 14s - 54ms/step - loss: 0.0329
Epoch 2/50
259/259 - 8s - 30ms/step - loss: 0.0055
Epoch 3/50
259/259 - 6s - 24ms/step - loss: 0.0041
Epoch 4/50
259/259 - 6s - 24ms/step - loss: 0.0034
Epoch 5/50
259/259 - 6s - 23ms/step - loss: 0.0029
Epoch 6/50
259/259 - 7s - 25ms/step - loss: 0.0025
Epoch 7/50
259/259 - 6s - 24ms/step - loss: 0.0022
Epoch 8/50
259/259 - 7s - 27ms/step - loss: 0.0022
Epoch 9/50
259/259 - 6s - 23ms/step - loss: 0.0020
Epoch 10/50
259/259 - 6s - 25ms/step - loss: 0.0019
Epoch 11/50
259/259 - 6s - 22ms/step - loss: 0.0018
Epoch 12/50
259/259 - 7s - 25ms/step - loss: 0.0017
Epoch 13/50
259/259 - 6s - 25ms/step - loss: 0.0017
Epoch 14/50
259/259 - 6s - 22ms/step - loss: 0.0016
Epoch 15/50
259/259 - 6s - 22ms/step - loss: 0.0016
Epoch 16/50
259/259 - 6s - 23ms/step - loss: 0.0014
Epoch 17/50
259/259 - 6s - 22ms/step - loss: 0.0015
Epoch 18/50
259/259 - 6s - 23ms/step - loss: 0.0014
Epoch 19/50
259/259 - 6s - 21ms/step - loss: 0.0014
Epoch 20/50
259/259 

<keras.src.callbacks.history.History at 0x13a75d19dc0>

In [18]:
input_seq = series_scaled[-seq_length:].reshape(1, seq_length, 1)
predictions = []

In [20]:
for _ in range(len(test)):
    pred = model.predict(input_seq, verbose=1)[0]
    predictions.append(pred)
    input_seq = np.append(input_seq[:, 1:, :], [[pred]], axis=1)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53

In [25]:
y_pred = scaler.inverse_transform(np.array(predictions[:len(test)]).reshape(-1, 1)).flatten()

In [26]:
print(len(test['ID']))
print(len(y_pred))

3400
3400


In [27]:
submission = pd.DataFrame({
    'ID': test['ID'].values,
    'electricity_consumption': y_pred
})

In [28]:
submission.to_csv('submission12.csv', index=False)