In [None]:
import pandas as pd
from IPython.display import display
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler


In [44]:
# Configuration
LOOKBACK = 24 * 4 * 3  # 3 days of history (15-min intervals)
FORECAST_HORIZON = 24 * 4  # 24 hours ahead (15-min intervals)
BATCH_SIZE = 32
EPOCHS = 50

In [45]:
def create_sequences(data, n_steps, horizon):
    """Create sequences with reduced memory footprint"""
    X, y = [], []
    for i in range(len(data) - n_steps - horizon):
        X.append(data[i:i + n_steps])
        y.append(data[i + n_steps:i + n_steps + horizon])
    return np.array(X), np.array(y)

In [46]:
def build_gru_model(input_shape, output_units):
    """Build memory-efficient GRU model"""
    model = Sequential([
        GRU(64, activation='tanh', return_sequences=True,
            input_shape=input_shape,
            kernel_initializer='glorot_uniform'),
        BatchNormalization(),
        Dropout(0.2),
        GRU(32, activation='tanh',
            kernel_regularizer=tf.keras.regularizers.l2(0.001)),
        BatchNormalization(),
        Dropout(0.2),
        Dense(output_units)
    ])
    
    optimizer = tf.keras.optimizers.Adam(
        learning_rate=0.001,
        clipnorm=1.0  # Gradient clipping
    )
    
    model.compile(optimizer=optimizer, loss='huber_loss', metrics=['mae'])
    return model

In [53]:
df = pd.read_csv('P_agg_data_for_prediction.csv', infer_datetime_format=True)
# Handle zeros and missing values
data = df['value'].replace(0, 0.001).ffill()

# 2. Scale data
scaler = RobustScaler()
scaled_data = scaler.fit_transform(data.values.reshape(-1, 1)).flatten()

  df = pd.read_csv('P_agg_data_for_prediction.csv', infer_datetime_format=True)


In [54]:
# 3. Create sequences (in memory-efficient chunks)
X, y = [], []
chunk_size = 10000  # Process data in chunks to avoid memory overload
for i in range(0, len(scaled_data), chunk_size):
    chunk = scaled_data[i:i + chunk_size + LOOKBACK + FORECAST_HORIZON]
    X_chunk, y_chunk = create_sequences(chunk, LOOKBACK, FORECAST_HORIZON)
    X.append(X_chunk)
    y.append(y_chunk)

X = np.concatenate(X)
y = np.concatenate(y)
print(X)
print(y)

[[-2.12201844e-02 -2.08305616e-02 -1.18312268e-02 ... -2.93642497e-03
  -1.32091609e-03 -9.31293357e-04]
 [-2.08305616e-02 -1.18312268e-02 -1.43780291e-02 ... -1.32091609e-03
  -9.31293357e-04 -2.00513162e-03]
 [-1.18312268e-02 -1.43780291e-02 -1.37413285e-02 ... -9.31293357e-04
  -2.00513162e-03 -3.46859261e-03]
 ...
 [ 2.44283949e-01  2.53368811e-01  2.35873800e-01 ...  8.12867053e+00
   8.10002851e+00  8.07670816e+00]
 [ 2.53368811e-01  2.35873800e-01  2.40758339e-01 ...  8.10002851e+00
   8.07670816e+00  8.06976147e+00]
 [ 2.35873800e-01  2.40758339e-01  2.46574171e-01 ...  8.07670816e+00
   8.06976147e+00  8.06873515e+00]]
[[-2.00513162e-03 -3.46859261e-03 -7.47885584e-03 ... -6.65209541e-03
  -5.37869429e-03 -5.33117932e-03]
 [-3.46859261e-03 -7.47885584e-03 -6.60458044e-03 ... -5.37869429e-03
  -5.33117932e-03 -4.55193386e-03]
 [-7.47885584e-03 -6.60458044e-03 -7.13674808e-03 ... -5.33117932e-03
  -4.55193386e-03 -6.88967025e-03]
 ...
 [ 8.06976147e+00  8.06873515e+00  8.0286515

In [55]:
# 4. Train-test split (maintaining temporal order)
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

In [58]:


# 5. Build and train GRU model
model = build_gru_model(
    input_shape=(LOOKBACK, 1),
    output_units=FORECAST_HORIZON
)

history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    callbacks=[
        EarlyStopping(patience=10, restore_best_weights=True),
        ReduceLROnPlateau(factor=0.5, patience=5)
    ],
    verbose=1
)

# 6. Generate and plot forecast
last_sequence = scaled_data[-LOOKBACK:]
forecast = model.predict(last_sequence.reshape(1, LOOKBACK, 1))
forecast = scaler.inverse_transform(forecast).flatten()

forecast_dates = pd.date_range(
    start=data.index[-1] + pd.Timedelta(minutes=15),
    periods=FORECAST_HORIZON,
    freq='15T'
)

# Plot training history
plt.figure(figsize=(12, 5))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training History')
plt.legend()
plt.grid()
plt.show()

# Plot forecast
plt.figure(figsize=(15, 6))
plt.plot(data.index[-LOOKBACK:], data[-LOOKBACK:], label='History')
plt.plot(forecast_dates, forecast, label='24h Forecast', color='red')
plt.title('Fridge Consumption Forecast (GRU Model)')
plt.xlabel('Time')
plt.ylabel('Consumption (W)')
plt.legend()
plt.grid()
plt.show()

KeyboardInterrupt: 

In [None]:
# Convert 'timestamp' to datetime
data['datetime'] = pd.to_datetime(data['timestamp'], unit='s')

# Set datetime as index for resampling
data.set_index('datetime', inplace=True)

# Resample to daily sum of 'value' in kilowatts (kWh)
daily_df = data.resample('5min').sum(numeric_only=True)
daily_df['value'] = daily_df['value'] / 1000  # Convert from Wh to kWh


# Reset index to turn datetime back into a column
daily_df = daily_df.reset_index()

# Add iteration column
daily_df['iteration'] = range(len(daily_df))

# Reorder columns
daily_df = daily_df[['iteration', 'datetime', 'value']]

# Final result
print(daily_df)


        iteration            datetime      value
0               0 2022-09-01 00:00:00   1.060808
1               1 2022-09-01 00:05:00   1.070527
2               2 2022-09-01 00:10:00   1.068840
3               3 2022-09-01 00:15:00   1.069467
4               4 2022-09-01 00:20:00   1.069426
...           ...                 ...        ...
105115     105115 2023-08-31 23:35:00  10.359788
105116     105116 2023-08-31 23:40:00  10.382458
105117     105117 2023-08-31 23:45:00  10.373878
105118     105118 2023-08-31 23:50:00  10.346762
105119     105119 2023-08-31 23:55:00  10.298206

[105120 rows x 3 columns]
