In [None]:
import os
import zipfile
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# -------------------------------
# 1. Setup: Create Directory for Plots
# -------------------------------
plot_dir = "/kaggle/working/Dataset_plots"
os.makedirs(plot_dir, exist_ok=True)  # Create directory if it does not exist

# -------------------------------
# 2. Load and Prepare the Dataset
# -------------------------------
file_path = "/kaggle/input/integrated-energy-management-and-forecasting/Integrated Energy Management and Forecasting Dataset.csv"
df = pd.read_csv(file_path)

# Convert 'Timestamp' column to datetime for time-series analysis
df['Timestamp'] = pd.to_datetime(df['Timestamp'])

# List of numerical columns for analysis
numerical_cols = ['Energy_Demand', 'Energy_Supply', 'Temperature', 'Grid_Load',
                  'Renewable_Source_Output', 'NonRenewable_Source_Output', 'Energy_Price']

# Set a common plot style
sns.set(style="whitegrid")

# -------------------------------
# 3. Function to Save Plots
# -------------------------------
def save_plot(filename):
    """Save the current plot in multiple formats."""
    for ext in ['png', 'eps', 'pdf']:
        plt.savefig(os.path.join(plot_dir, f"{filename}.{ext}"), format=ext, bbox_inches='tight')
    plt.close()

# -------------------------------
# 4. Time Series Analysis
# -------------------------------
plt.figure(figsize=(14, 6))
plt.plot(df['Timestamp'], df['Energy_Demand'], label='Energy Demand', color='blue')
plt.plot(df['Timestamp'], df['Energy_Supply'], label='Energy Supply', color='green')
plt.xlabel('Timestamp')
plt.ylabel('Value')
plt.title('Time Series of Energy Demand and Energy Supply')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
save_plot("time_series_energy_demand_supply")

# -------------------------------
# 5. Distribution Analysis
# -------------------------------
df[numerical_cols].hist(bins=15, figsize=(15, 10), layout=(3, 3))
plt.suptitle("Histograms of Numerical Features", fontsize=16)
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
save_plot("histograms_numerical_features")

plt.figure(figsize=(14, 8))
for col in numerical_cols:
    sns.kdeplot(data=df, x=col, fill=True, label=col)
plt.title('KDE Plot of Numerical Features')
plt.xlabel('Value')
plt.legend()
save_plot("kde_numerical_features")

# -------------------------------
# 6. Correlation Analysis
# -------------------------------
corr = df[numerical_cols].corr()
plt.figure(figsize=(10, 8))
sns.heatmap(corr, annot=True, cmap='coolwarm', fmt=".2f")
plt.title("Correlation Heatmap of Numerical Features")
save_plot("correlation_heatmap")

# -------------------------------
# 7. Categorical Analysis
# -------------------------------
plt.figure(figsize=(8, 4))
sns.countplot(x='Weather_Condition_x', data=df, palette='Set2')
plt.title('Count of Observations by Weather_Condition_x')
plt.xlabel('Weather Condition (x)')
plt.ylabel('Count')
save_plot("weather_condition_x_distribution")

plt.figure(figsize=(8, 4))
sns.countplot(x='Weather_Condition_y', data=df, palette='Set3')
plt.title('Count of Observations by Weather_Condition_y')
plt.xlabel('Weather Condition (y)')
plt.ylabel('Count')
save_plot("weather_condition_y_distribution")

plt.figure(figsize=(14, 6))
plt.subplot(1, 2, 1)
sns.boxplot(x='Weather_Condition_x', y='Energy_Demand', data=df, palette='pastel')
plt.title('Energy Demand by Weather_Condition_x')
plt.xticks(rotation=45)

plt.subplot(1, 2, 2)
sns.boxplot(x='Weather_Condition_x', y='Energy_Supply', data=df, palette='pastel')
plt.title('Energy Supply by Weather_Condition_x')
plt.xticks(rotation=45)

plt.tight_layout()
save_plot("energy_demand_supply_by_weather_condition_x")

# -------------------------------
# 8. Scatter Plots
# -------------------------------
plt.figure(figsize=(10, 6))
sns.scatterplot(x='Grid_Load', y='Energy_Demand', data=df, hue='Weather_Condition_x', palette='deep')
plt.title('Grid Load vs. Energy Demand')
plt.xlabel('Grid Load')
plt.ylabel('Energy Demand')
plt.legend(title='Weather Condition')
save_plot("scatter_grid_load_vs_energy_demand")

plt.figure(figsize=(10, 6))
sns.scatterplot(
    x='Renewable_Source_Output',
    y='NonRenewable_Source_Output',
    data=df,
    hue='Energy_Price',
    palette='viridis',
    legend=False
)
plt.title('Renewable vs NonRenewable Source Output Colored by Energy Price')
plt.xlabel('Renewable Source Output')
plt.ylabel('NonRenewable Source Output')

norm = plt.Normalize(df['Energy_Price'].min(), df['Energy_Price'].max())
sm = plt.cm.ScalarMappable(cmap='viridis', norm=norm)
sm.set_array([])
cbar = plt.colorbar(sm, label='Energy Price')
save_plot("scatter_renewable_vs_nonrenewable")

# -------------------------------
# 9. Zip the Directory for Download
# -------------------------------
zip_path = "/kaggle/working/Dataset_plots.zip"
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
    for root, _, files in os.walk(plot_dir):
        for file in files:
            zipf.write(os.path.join(root, file), arcname=file)

# -------------------------------
# 10. Provide Download Link
# -------------------------------
print(f"All plots have been saved in {plot_dir} and zipped as {zip_path}. You can download it from the Kaggle working directory.")


In [None]:
import os
import zipfile
from matplotlib import pyplot as plt

# Create a directory to save plots
output_dir = 'plots'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Function to save and close the plot
def save_plot(filename):
    plt.savefig(os.path.join(output_dir, filename), bbox_inches='tight')
    plt.close()

# -------------------------------
# 2. Time Series Analysis
# -------------------------------
plt.figure(figsize=(14, 6))
plt.plot(df['Timestamp'], df['Energy_Demand'], label='Energy Demand', color='blue')
plt.plot(df['Timestamp'], df['Energy_Supply'], label='Energy Supply', color='green')
plt.plot(df['Timestamp'], df['Temperature'], label='Temperature', color='red')
plt.xlabel('Timestamp')
plt.ylabel('Value')
plt.title('Time Series of Energy Demand, Energy Supply, and Temperature')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
save_plot('time_series_energy_temp.png')

# -------------------------------
# 3. Distribution Analysis: Histograms & KDE Plots
# -------------------------------
# Histograms for numerical features
df[numerical_cols].hist(bins=15, figsize=(15, 10), layout=(3, 3))
plt.suptitle("Histograms of Numerical Features", fontsize=16)
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
save_plot('histograms_numerical_features.png')

# KDE plots for numerical features
plt.figure(figsize=(14, 8))
for col in numerical_cols:
    sns.kdeplot(data=df, x=col, fill=True, label=col)
plt.title('KDE Plot of Numerical Features')
plt.xlabel('Value')
plt.legend()
save_plot('kde_numerical_features.png')

# -------------------------------
# 4. Correlation Analysis: Heatmap and Pairplot
# -------------------------------
# Correlation Heatmap
corr = df[numerical_cols].corr()
plt.figure(figsize=(10, 8))
sns.heatmap(corr, annot=True, cmap='coolwarm', fmt=".2f")
plt.title("Correlation Heatmap of Numerical Features")
save_plot('correlation_heatmap.png')

# Pairplot for pairwise relationships
sns.pairplot(df[numerical_cols])
plt.suptitle("Pairwise Relationships Between Numerical Features", y=1.02)
save_plot('pairplot_numerical_features.png')

# -------------------------------
# 5. Categorical Analysis: Weather Conditions
# -------------------------------
# Count plot for Weather_Condition_x
plt.figure(figsize=(8, 4))
sns.countplot(x='Weather_Condition_x', data=df, palette='Set2')
plt.title('Count of Observations by Weather_Condition_x')
plt.xlabel('Weather Condition (x)')
plt.ylabel('Count')
save_plot('countplot_weather_condition_x.png')

# Count plot for Weather_Condition_y
plt.figure(figsize=(8, 4))
sns.countplot(x='Weather_Condition_y', data=df, palette='Set3')
plt.title('Count of Observations by Weather_Condition_y')
plt.xlabel('Weather Condition (y)')
plt.ylabel('Count')
save_plot('countplot_weather_condition_y.png')

# Box plots for energy demand and supply by Weather_Condition_x
plt.figure(figsize=(14, 6))
plt.subplot(1, 2, 1)
sns.boxplot(x='Weather_Condition_x', y='Energy_Demand', data=df, palette='pastel')
plt.title('Energy Demand by Weather_Condition_x')
plt.xticks(rotation=45)

plt.subplot(1, 2, 2)
sns.boxplot(x='Weather_Condition_x', y='Energy_Supply', data=df, palette='pastel')
plt.title('Energy Supply by Weather_Condition_x')
plt.xticks(rotation=45)

plt.tight_layout()
save_plot('boxplot_weather_condition_x.png')

# -------------------------------
# 6. Scatter Plots: Exploring Relationships
# -------------------------------
# Scatter plot: Grid Load vs. Energy Demand
plt.figure(figsize=(10, 6))
sns.scatterplot(x='Grid_Load', y='Energy_Demand', data=df, hue='Weather_Condition_x', palette='deep')
plt.title('Grid Load vs. Energy Demand')
plt.xlabel('Grid Load')
plt.ylabel('Energy Demand')
plt.legend(title='Weather Condition')
save_plot('scatter_grid_load_vs_energy_demand.png')

# Scatter plot: Renewable vs NonRenewable Source Output
plt.figure(figsize=(10, 6))
scatter = sns.scatterplot(
    x='Renewable_Source_Output',
    y='NonRenewable_Source_Output',
    data=df,
    hue='Energy_Price',
    palette='viridis',
    legend=False
)
plt.title('Renewable vs NonRenewable Source Output Colored by Energy Price')
plt.xlabel('Renewable Source Output')
plt.ylabel('NonRenewable Source Output')

# Colorbar
norm = plt.Normalize(df['Energy_Price'].min(), df['Energy_Price'].max())
sm = plt.cm.ScalarMappable(cmap='viridis', norm=norm)
sm.set_array([])
plt.colorbar(sm, label='Energy Price')
save_plot('scatter_renewable_vs_nonrenewable.png')

# -------------------------------
# 7. Combined Time Series Plot with Dual Y-axis
# -------------------------------
fig, ax1 = plt.subplots(figsize=(14, 6))

ax1.plot(df['Timestamp'], df['Energy_Demand'], label='Energy Demand', color='blue')
ax1.plot(df['Timestamp'], df['Energy_Supply'], label='Energy Supply', color='green')
ax1.plot(df['Timestamp'], df['Temperature'], label='Temperature', color='red')
ax1.set_xlabel('Timestamp')
ax1.set_ylabel('Energy/Temperature Values')
ax1.tick_params(axis='x', rotation=45)
ax1.legend(loc='upper left')

# Secondary y-axis for Energy Price
ax2 = ax1.twinx()
ax2.plot(df['Timestamp'], df['Energy_Price'], label='Energy Price', color='purple', linestyle='--')
ax2.set_ylabel('Energy Price')
ax2.legend(loc='upper right')

plt.title("Combined Time Series Plot: Energy Metrics and Price")
plt.tight_layout()
save_plot('combined_time_series_dual_axis.png')

# -------------------------------
# Zipping the Plots
# -------------------------------
zip_filename = 'Dataset_files.zip'
with zipfile.ZipFile(zip_filename, 'w') as zipf:
    for root, dirs, files in os.walk(output_dir):
        for file in files:
            zipf.write(os.path.join(root, file), arcname=file)

print(f'All plots have been saved and zipped as {zip_filename}.')


In [None]:
import pandas as pd

# Specify the path to your dataset
file_path = "/kaggle/input/integrated-energy-management-and-forecasting/Integrated Energy Management and Forecasting Dataset.csv"

# Read the CSV file into a DataFrame
df = pd.read_csv(file_path)

# Print information about the DataFrame (such as column names, non-null counts, and data types)
print("Data Information:")
print(df.info())

# Print the first 5 rows of the DataFrame
print("\nFirst 5 Rows of Data:")
print(df.head())


In [None]:
# Check unique values in both columns
print(df[['Weather_Condition_x', 'Weather_Condition_y']].drop_duplicates())

# Compare if they are the same
print((df['Weather_Condition_x'] == df['Weather_Condition_y']).value_counts())


In [None]:
import pandas as pd

# Specify the path to your dataset
file_path = "/kaggle/input/integrated-energy-management-and-forecasting/Integrated Energy Management and Forecasting Dataset.csv"

# Read the CSV file into a DataFrame
df = pd.read_csv(file_path)

# Print basic DataFrame information
print("Data Information:")
df.info()  # This prints the info to stdout

# Print the first 5 rows of the DataFrame
print("\nFirst 5 Rows of Data:")
print(df.head())

# Convert the Timestamp column to datetime format for easier time-based operations
df['Timestamp'] = pd.to_datetime(df['Timestamp'])
print("\nData Types After Converting Timestamp:")
print(df.dtypes)

# Print summary statistics for numeric columns
print("\nSummary Statistics:")
print(df.describe())

# If you want to inspect the weather condition columns, you can print unique values
print("\nUnique values in 'Weather_Condition_x':")
print(df['Weather_Condition_x'].unique())

print("\nUnique values in 'Weather_Condition_y':")
print(df['Weather_Condition_y'].unique())


In [None]:
import matplotlib.pyplot as plt

# Plot Energy Demand over Time
plt.figure(figsize=(10, 6))
plt.plot(df['Timestamp'], df['Energy_Demand'], label='Energy Demand')
plt.xlabel('Time')
plt.ylabel('Energy Demand')
plt.title('Energy Demand Over Time')
plt.legend()
plt.show()


In [None]:
"""import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import random
import numpy as np

# -------------------------------
# 1. Load and Preprocess the Dataset
# -------------------------------
# Specify the path to your dataset (update the path as needed)
file_path = "/kaggle/input/integrated-energy-management-and-forecasting/Integrated Energy Management and Forecasting Dataset.csv"

# Read the CSV file into a DataFrame
df = pd.read_csv(file_path)

# Optionally, sort the data by timestamp if needed
df['Timestamp'] = pd.to_datetime(df['Timestamp'])
df = df.sort_values("Timestamp")

# Use the "Energy_Demand" column as the time series for forecasting
energy_series = df["Energy_Demand"].values.astype(np.float32)

# (Optional) Normalize or scale the data
# For example, here we simply convert it to a torch tensor.
time_series_data = torch.tensor(energy_series)  # Shape: [n_points]

print("Loaded time series of length:", len(time_series_data))


# -------------------------------
# 2. Define the Base Time Series Model (LSTM)
# -------------------------------
class TimeSeriesLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(TimeSeriesLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        
        # Save parameters for cloning later
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.output_size = output_size
        
    def forward(self, x):
        # x shape: (batch_size, seq_len, input_size)
        out, _ = self.lstm(x)
        # Use the output from the last time step
        out = out[:, -1, :]
        out = self.fc(out)
        return out

# -------------------------------
# 3. Define the MAML Framework
# -------------------------------
class MAML:
    def __init__(self, model, lr_inner=0.01, lr_meta=0.001, inner_steps=5):
        self.model = model
        self.lr_inner = lr_inner
        self.lr_meta = lr_meta
        self.inner_steps = inner_steps
        
        # Meta-optimizer to update the model's initialization
        self.meta_optimizer = optim.Adam(self.model.parameters(), lr=self.lr_meta)
    
    def inner_loop(self, support_x, support_y):
        
        Create a temporary copy of the model and perform a few gradient steps on the support set.
        
        # Create a new model instance and load current parameters
        fast_model = TimeSeriesLSTM(self.model.input_size,
                                    self.model.hidden_size,
                                    self.model.num_layers,
                                    self.model.output_size)
        fast_model.load_state_dict(self.model.state_dict())
        fast_model.train()
        
        inner_optimizer = optim.SGD(fast_model.parameters(), lr=self.lr_inner)
        criterion = nn.MSELoss()
        
        for _ in range(self.inner_steps):
            inner_optimizer.zero_grad()
            preds = fast_model(support_x)
            loss = criterion(preds, support_y)
            loss.backward()
            inner_optimizer.step()
            
        return fast_model

    def meta_update(self, tasks):
        
        Perform a meta-update over a batch of tasks.
        Each tak consists of a support and query set.
        
        self.meta_optimizer.zero_grad()
        criterion = nn.MSELoss()
        meta_loss = 0.0
        
        for support_x, support_y, query_x, query_y in tasks:
            # Obtain an adapted model for the current task
            fast_model = self.inner_loop(support_x, support_y)
            
            # Evaluate the adapted model on the query set
            fast_model.eval()
            preds = fast_model(query_x)
            loss = criterion(preds, query_y)
            meta_loss += loss
        
        # Average the meta loss across tasks
        meta_loss = meta_loss / len(tasks)
        meta_loss.backward()
        self.meta_optimizer.step()
        return meta_loss.item()

# -------------------------------
# 4. Define a Task Generator from the Dataset
# -------------------------------
def generate_time_series_task_from_data(seq_len=20, task_size=60):
    
    Generate a forecasting task from the global time_series_data.
    
    Args:
        seq_len (int): The length of the input sequence window.
        task_size (int): The total length of the task segment to sample.
                         Must be greater than seq_len.
                         
    Returns:
        support_x, support_y, query_x, query_y: Tensors for support and query sets.
    
    global time_series_data
    
    # Ensure there is enough data to sample a full task
    total_points = len(time_series_data)
    if total_points < task_size:
        raise ValueError("Not enough data points to sample the task.")
        
    # Randomly choose a starting index
    max_start = total_points - task_size
    start_idx = random.randint(0, max_start)
    task_segment = time_series_data[start_idx:start_idx + task_size]
    
    # Create sliding windows: each input is a window of length seq_len and target is the next value
    inputs = []
    targets = []
    for i in range(len(task_segment) - seq_len):
        window = task_segment[i:i+seq_len].unsqueeze(1)  # shape: (seq_len, 1)
        target = task_segment[i+seq_len].unsqueeze(0)      # shape: (1,)
        inputs.append(window)
        targets.append(target)
    
    inputs = torch.stack(inputs)   # shape: (num_windows, seq_len, 1)
    targets = torch.stack(targets) # shape: (num_windows, 1)
    
    # Split into support and query sets (e.g., first half support, second half query)
    split = inputs.shape[0] // 2
    support_x = inputs[:split]
    support_y = targets[:split]
    query_x = inputs[split:]
    query_y = targets[split:]
    
    return support_x, support_y, query_x, query_y

# -------------------------------
# 5. Set Up the Model and MAML Training
# -------------------------------
# Hyperparameters for the LSTM model
input_size = 1      # one feature (Energy_Demand)
hidden_size = 32
num_layers = 1
output_size = 1

# Create the base model instance
base_model = TimeSeriesLSTM(input_size, hidden_size, num_layers, output_size)

# Create the MAML meta-learner instance
maml = MAML(base_model, lr_inner=0.01, lr_meta=0.001, inner_steps=5)

# -------------------------------
# 6. Meta-Training Loop
# -------------------------------
num_meta_iterations = 1000   # Total meta-training iterations
tasks_per_meta_update = 4    # Number of tasks per meta-update

for iteration in range(num_meta_iterations):
    tasks = []
    for _ in range(tasks_per_meta_update):
        support_x, support_y, query_x, query_y = generate_time_series_task_from_data(seq_len=20, task_size=60)
        tasks.append((support_x, support_y, query_x, query_y))
        
    meta_loss = maml.meta_update(tasks)
    
    if iteration % 100 == 0:
        print(f"Iteration {iteration}, Meta Loss: {meta_loss:.4f}")

# -------------------------------
# 7. Evaluation on a New Task from the Dataset
# -------------------------------
# Generate a new task from the dataset
support_x, support_y, query_x, query_y = generate_time_series_task_from_data(seq_len=20, task_size=60)

# Fine-tune the base model on the support set of the new task
adapted_model = maml.inner_loop(support_x, support_y)

adapted_model.eval()
with torch.no_grad():
    predictions = adapted_model(query_x)
    test_loss = nn.MSELoss()(predictions, query_y)
    
print(f"Test Loss on the new task: {test_loss.item():.4f}")
"""


In [None]:
"""import pandas as pd

# Specify the path to your dataset
file_path = "/kaggle/input/predicting-electricity-consumption/train.csv"

# Read the CSV file into a DataFrame
df = pd.read_csv(file_path)

# Print basic DataFrame information
print("Data Information:")
df.info()  # This prints the info to stdout

# Print the first 5 rows of the DataFrame
print("\nFirst 5 Rows of Data:")
print(df.head())
"""

In [None]:
"""import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import random
import numpy as np

# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

# Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# -----------------------------------
# 1. Load and Preprocess the Dataset
# -----------------------------------
file_path = "/kaggle/input/predicting-electricity-consumption/train.csv"
df = pd.read_csv(file_path)

# Convert timestamp to datetime and sort by time
df['timestamp'] = pd.to_datetime(df['timestamp'])
df = df.sort_values("timestamp")

# Drop rows with missing meter_reading values
df = df.dropna(subset=["meter_reading"])

print("Data Information:")
df.info()

print("\nFirst 5 Rows of Data:")
print(df.head())

# Group the data by building_id (each building is its own time series)
building_groups = dict(tuple(df.groupby("building_id")))
print("\nTotal buildings in dataset:", len(building_groups))

# -----------------------------------
# 2. Data Sample Selection: Choose Training and Testing Buildings
# -----------------------------------
# Set the number of buildings you want to use for training and testing.
train_building_count = 100  # Adjust as desired
test_building_count = 50   # Adjust as desired

all_buildings = list(building_groups.keys())
# Randomly sample training buildings (without replacement)
train_buildings = random.sample(all_buildings, min(train_building_count, len(all_buildings)))
# For testing, use buildings not in the training set.
remaining_buildings = list(set(all_buildings) - set(train_buildings))
test_buildings = random.sample(remaining_buildings, min(test_building_count, len(remaining_buildings)))

print("Number of training buildings:", len(train_buildings))
print("Number of test buildings:", len(test_buildings))

# Compute global statistics for meter_reading for normalization
meter_mean = df["meter_reading"].mean()
meter_std = df["meter_reading"].std()

# -----------------------------------
# 3. Define the Enhanced Base Time Series Model (LSTM)
# -----------------------------------
class EnhancedTimeSeriesLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, dropout=0.3):
        super(EnhancedTimeSeriesLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, 
                            batch_first=True, dropout=dropout)
        # Batch normalization applied to the LSTM output features
        self.batch_norm = nn.BatchNorm1d(hidden_size)
        # Fully connected head
        self.fc = nn.Linear(hidden_size, output_size)
        
        # Save architecture details for cloning later
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.output_size = output_size
        
    def forward(self, x):
        # x: (batch_size, seq_len, input_size)
        lstm_out, _ = self.lstm(x)  
        # Use the output of the last time step
        last_out = lstm_out[:, -1, :]  # shape: (batch_size, hidden_size)
        # Batch normalization expects a 2D tensor
        normed = self.batch_norm(last_out)
        out = self.fc(normed)
        return out

# -----------------------------------
# 4. Define the MAML Framework with Gradient Clipping
# -----------------------------------
class MAML:
    def __init__(self, model, lr_inner=0.01, lr_meta=0.001, inner_steps=5, clip_grad=1.0):
        self.model = model.to(device)
        self.lr_inner = lr_inner
        self.lr_meta = lr_meta
        self.inner_steps = inner_steps
        self.clip_grad = clip_grad
        
        # Meta-optimizer for the model's initialization parameters
        self.meta_optimizer = optim.Adam(self.model.parameters(), lr=self.lr_meta)
    
    def inner_loop(self, support_x, support_y):
        
        Create a temporary copy of the model and perform a few gradient steps on the support set.
        
        # Clone the model architecture and load current weights
        fast_model = EnhancedTimeSeriesLSTM(self.model.input_size,
                                            self.model.hidden_size,
                                            self.model.num_layers,
                                            self.model.output_size).to(device)
        fast_model.load_state_dict(self.model.state_dict())
        fast_model.train()
        
        inner_optimizer = optim.SGD(fast_model.parameters(), lr=self.lr_inner)
        criterion = nn.MSELoss()
        
        for _ in range(self.inner_steps):
            inner_optimizer.zero_grad()
            preds = fast_model(support_x)
            loss = criterion(preds, support_y)
            loss.backward()
            # Clip gradients to avoid explosion
            torch.nn.utils.clip_grad_norm_(fast_model.parameters(), self.clip_grad)
            inner_optimizer.step()
            
        return fast_model

    def meta_update(self, tasks):
        
        Perform a meta-update over a batch of tasks.
        
        self.meta_optimizer.zero_grad()
        criterion = nn.MSELoss()
        meta_loss = 0.0
        
        for support_x, support_y, query_x, query_y in tasks:
            # Ensure tensors are on the device
            support_x, support_y = support_x.to(device), support_y.to(device)
            query_x, query_y = query_x.to(device), query_y.to(device)
            
            fast_model = self.inner_loop(support_x, support_y)
            # Do not call fast_model.eval() here! Keep it in training mode for backward compatibility.
            preds = fast_model(query_x)
            loss = criterion(preds, query_y)
            meta_loss += loss
        
        meta_loss = meta_loss / len(tasks)
        meta_loss.backward()
        self.meta_optimizer.step()
        return meta_loss.item()

# -----------------------------------
# 5. Define an Enhanced Task Generator Function
# -----------------------------------
def generate_time_series_task(seq_len=20, task_size=60, allowed_buildings=None):
    
    Generate a forecasting task by sampling a contiguous segment from a randomly selected building.
    The meter readings are normalized using global mean and std.
    
    Args:
        seq_len (int): Length of the input window.
        task_size (int): Total records in the task segment (must be > seq_len).
        allowed_buildings (list or None): List of building IDs to sample from. If None, all buildings are allowed.
    
    Returns:
        support_x, support_y, query_x, query_y: Tensors for the support and query sets.
    
    # Determine valid buildings based on allowed_buildings and sufficient data points
    if allowed_buildings is not None:
        valid_buildings = [bid for bid in allowed_buildings if len(building_groups[bid]) >= task_size]
    else:
        valid_buildings = [bid for bid, group in building_groups.items() if len(group) >= task_size]
    
    if not valid_buildings:
        raise ValueError("No building has enough data for the selected task_size.")
    
    building_id = random.choice(valid_buildings)
    group = building_groups[building_id].sort_values("timestamp")
    
    # Extract meter_readings and apply normalization
    meter_readings = group["meter_reading"].values.astype(np.float32)
    meter_readings = (meter_readings - meter_mean) / meter_std  # normalize globally
    
    total_points = len(meter_readings)
    max_start = total_points - task_size
    start_idx = random.randint(0, max_start)
    task_segment = meter_readings[start_idx:start_idx + task_size]
    
    # Convert to torch tensor
    task_segment = torch.tensor(task_segment, dtype=torch.float32)
    
    # Build sliding windows: input = window of seq_len, target = next value
    inputs = []
    targets = []
    for i in range(len(task_segment) - seq_len):
        window = task_segment[i:i+seq_len].unsqueeze(1)  # shape: (seq_len, 1)
        target = task_segment[i+seq_len].unsqueeze(0)      # shape: (1,)
        inputs.append(window)
        targets.append(target)
    
    inputs = torch.stack(inputs)   # shape: (num_windows, seq_len, 1)
    targets = torch.stack(targets) # shape: (num_windows, 1)
    
    # Split into support and query sets (50/50 split)
    split = inputs.shape[0] // 2
    support_x = inputs[:split]
    support_y = targets[:split]
    query_x = inputs[split:]
    query_y = targets[split:]
    
    return support_x, support_y, query_x, query_y

# -----------------------------------
# 6. Set Up the Enhanced Model and Meta-Learner
# -----------------------------------
# Hyperparameters for the enhanced LSTM model
input_size = 1       # one feature: normalized meter_reading
hidden_size = 64     # increased hidden size for better capacity
num_layers = 2       # two LSTM layers
output_size = 1
dropout = 0.3        # dropout for regularization

base_model = EnhancedTimeSeriesLSTM(input_size, hidden_size, num_layers, output_size, dropout)
maml = MAML(base_model, lr_inner=0.01, lr_meta=0.001, inner_steps=5, clip_grad=1.0)

# -----------------------------------
# 7. Enhanced Meta-Training Loop
# -----------------------------------
num_meta_iterations = 1000   # Total meta-training iterations
tasks_per_meta_update = 4    # Number of tasks per meta-update

for iteration in range(num_meta_iterations):
    tasks = []
    for _ in range(tasks_per_meta_update):
        # Use only the training buildings for meta-training
        support_x, support_y, query_x, query_y = generate_time_series_task(seq_len=20, task_size=60, allowed_buildings=train_buildings)
        tasks.append((support_x, support_y, query_x, query_y))
        
    meta_loss = maml.meta_update(tasks)
    
    if iteration % 100 == 0:
        print(f"Iteration {iteration}, Meta Loss: {meta_loss:.4f}")

# -----------------------------------
# 8. Evaluation on New Tasks from the Test Buildings
# -----------------------------------
# Generate a set of test tasks (e.g., 5 tasks) and report the average test loss.
test_tasks = []
num_test_tasks = 5
for _ in range(num_test_tasks):
    support_x, support_y, query_x, query_y = generate_time_series_task(seq_len=20, task_size=60, allowed_buildings=test_buildings)
    # Move evaluation data to the device
    support_x, support_y = support_x.to(device), support_y.to(device)
    query_x, query_y = query_x.to(device), query_y.to(device)
    test_tasks.append((support_x, support_y, query_x, query_y))

test_loss_total = 0.0
for support_x, support_y, query_x, query_y in test_tasks:
    adapted_model = maml.inner_loop(support_x, support_y)
    adapted_model.eval()  # For pure prediction here, it's okay to set eval mode after adaptation.
    with torch.no_grad():
        preds = adapted_model(query_x)
        loss = nn.MSELoss()(preds, query_y)
        test_loss_total += loss.item()

avg_test_loss = test_loss_total / num_test_tasks
print(f"Average Test Loss on {num_test_tasks} test tasks: {avg_test_loss:.4f}")
"""


# 1.  TCN LSTM GRUs Incremental Learning Architecture

In [None]:
"""import pandas as pd
import tensorflow as tf
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from collections import deque
import random
from tensorflow.keras.layers import Conv1D, BatchNormalization, ReLU, Add, Dropout
import time

# ===========================
# 1. Data Loading and Preprocessing
# ===========================
data = pd.read_csv('/kaggle/input/integrated-energy-management-and-forecasting/Integrated Energy Management and Forecasting Dataset.csv')

# Convert Timestamp to datetime and sort
data['Timestamp'] = pd.to_datetime(data['Timestamp'])
data.sort_values('Timestamp', inplace=True)

# Select the 'Energy_Demand' feature and normalize it
features = ['Energy_Demand']
scaler = MinMaxScaler()
data_features = scaler.fit_transform(data[features].values)

sequence_length = 48
prediction_length = 48

# Create sequences and labels using a sliding window approach
sequences, labels = [], []
for i in range(len(data_features) - sequence_length - prediction_length + 1):
    seq = data_features[i:i + sequence_length]
    label = data_features[i + sequence_length:i + sequence_length + prediction_length]
    sequences.append(seq)
    labels.append(label)

sequences = np.array(sequences)
labels = np.array(labels).astype(np.float32)

# ===========================
# 2. Replay Buffer for Incremental Learning
# ===========================
class ReplayBuffer:
    def __init__(self, max_size=1000):
        self.buffer = deque(maxlen=max_size)

    def add(self, sequence, label):
        self.buffer.append((sequence, label))

    def sample(self, batch_size):
        # If the replay buffer is not yet full, return all samples
        if len(self.buffer) < batch_size:
            return list(self.buffer)
        else:
            return random.sample(self.buffer, batch_size)

# ===========================
# 3. Define TCN Block and Build the Hybrid Model (TCN → GRU → LSTM)
# ===========================
def tcn_block(filters, kernel_size, dilation_rate, dropout):
    def block(x):
        # Residual connection via 1x1 convolution to match dimensions
        res = Conv1D(filters, kernel_size=1, padding="same")(x)
        # First convolution branch with causal padding
        conv1 = Conv1D(filters, kernel_size, dilation_rate=dilation_rate, padding="causal", activation=None)(x)
        norm1 = BatchNormalization()(conv1)
        act1 = ReLU()(norm1)
        drop1 = Dropout(dropout)(act1)
        # Second convolution branch with causal padding
        conv2 = Conv1D(filters, kernel_size, dilation_rate=dilation_rate, padding="causal", activation=None)(drop1)
        norm2 = BatchNormalization()(conv2)
        act2 = ReLU()(norm2)
        drop2 = Dropout(dropout)(act2)
        # Skip connection: add the residual and the processed path
        skip = Add()([res, drop2])
        return skip
    return block

def build_hybrid_model(seq_len=sequence_length, pred_len=prediction_length, dropout=0.4, hidden_dim=128):
    inputs = tf.keras.layers.Input(shape=(seq_len, len(features)))

    # Three TCN blocks with increasing dilation rates
    tcn_output = tcn_block(hidden_dim, kernel_size=3, dilation_rate=1, dropout=dropout)(inputs)
    #tcn_output = tcn_block(hidden_dim, kernel_size=3, dilation_rate=2, dropout=dropout)(tcn_output)
    #tcn_output = tcn_block(hidden_dim, kernel_size=3, dilation_rate=4, dropout=dropout)(tcn_output)

    # Bidirectional GRU layer
    gru_output = tf.keras.layers.Bidirectional(
        tf.keras.layers.GRU(hidden_dim, dropout=dropout, return_sequences=True, kernel_initializer='orthogonal')
    )(tcn_output)

    # Bidirectional LSTM layer
    lstm_output = tf.keras.layers.Bidirectional(
        tf.keras.layers.LSTM(hidden_dim, dropout=dropout, return_sequences=True, kernel_initializer='orthogonal')
    )(gru_output)

    # Use the first 'prediction_length' time steps of the LSTM output for prediction
    truncated = lstm_output[:, :pred_len, :]
    outputs = tf.keras.layers.Dense(1, activation='linear')(truncated)

    # Compile the model with Adam optimizer and mean squared error loss
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    model.compile(
        optimizer=tf.optimizers.Adam(learning_rate=0.001),
        loss='mse',
        metrics=['mae', 'mse', tf.keras.metrics.MeanAbsolutePercentageError(name='mape')]
    )
    return model

# Build and visualize the model architecture
hybrid_model_tcn_gru_lstm = build_hybrid_model()
hybrid_model_tcn_gru_lstm.summary()

tf.keras.utils.plot_model(
    hybrid_model_tcn_gru_lstm,
    to_file="hybrid_model_tcn_gru_lstm_architecture.png",
    show_shapes=True,
    show_layer_names=True
)

# ===========================
# 4. Incremental Online Training Loop with Per-Epoch Metrics
# ===========================
# Training configuration
chunk_batch_size = 20         # Number of new samples per update (online chunk)
epochs_per_chunk =  150         # Epochs per update
replay_sample_size = 32       # Number of samples to draw from replay buffer per update

# Dictionaries to store per-epoch metrics
metrics_history = {
    "epoch": [],
    "loss": [],
    "val_loss": [],
    "accuracy": [],
    "val_accuracy": []
}
# Also record training time per epoch
epoch_times = []

replay_buffer = ReplayBuffer(max_size=5000)
global_epoch = 0

# Process the sequences in an online manner (chunk by chunk)
num_chunks = int(np.ceil(len(sequences) / chunk_batch_size))
print(f"Starting online incremental training over {num_chunks} chunks...")

for chunk_idx in range(0, len(sequences), chunk_batch_size):
    # Get new incoming data for this chunk
    new_sequences = sequences[chunk_idx:chunk_idx + chunk_batch_size]
    new_labels = labels[chunk_idx:chunk_idx + chunk_batch_size]
    
    # Add new samples to the replay buffer
    for seq, lbl in zip(new_sequences, new_labels):
        replay_buffer.add(seq, lbl)
    
    # Sample replay data for this update
    replay_samples = replay_buffer.sample(replay_sample_size)
    replay_sequences, replay_labels = zip(*replay_samples)
    replay_sequences = np.array(replay_sequences)
    replay_labels = np.array(replay_labels)
    
    # Combine the new data with replay data to form a training mini-batch
    train_sequences = np.vstack((new_sequences, replay_sequences))
    train_labels = np.vstack((new_labels, replay_labels))
    
    # Train for a fixed number of epochs on this combined data (simulate online updates)
    start_time_chunk = time.time()
    history = hybrid_model_tcn_gru_lstm.fit(
        train_sequences, train_labels,
        batch_size=chunk_batch_size,
        epochs=epochs_per_chunk,
        verbose=0,
        validation_split=0.1
    )
    end_time_chunk = time.time()
    
    # Record per-epoch metrics for this chunk update
    chunk_time = end_time_chunk - start_time_chunk
    avg_epoch_time = chunk_time / epochs_per_chunk
    for epoch in range(epochs_per_chunk):
        global_epoch += 1
        loss = history.history['loss'][epoch]
        val_loss = history.history['val_loss'][epoch]
        # Define "accuracy" as 1 - loss (for visualization purposes)
        acc = 1 - loss
        val_acc = 1 - val_loss
        metrics_history["epoch"].append(global_epoch)
        metrics_history["loss"].append(loss)
        metrics_history["val_loss"].append(val_loss)
        metrics_history["accuracy"].append(acc)
        metrics_history["val_accuracy"].append(val_acc)
        epoch_times.append(avg_epoch_time)
    
    print(f"Chunk {chunk_idx//chunk_batch_size + 1}/{num_chunks}: "
          f"Avg Loss={np.mean(history.history['loss']):.4f}, "
          f"Avg Val Loss={np.mean(history.history['val_loss']):.4f}, "
          f"Time for chunk={chunk_time:.2f}s")

# Save the trained model
model_save_path = "hybrid_model_tcn_gru_lstm.h5"
hybrid_model_tcn_gru_lstm.save(model_save_path)
print(f"Model saved to {model_save_path}")

# ===========================
# 5. Visualization of Performance Metrics per Epoch
# ===========================
# Plot Training and Validation Loss per Epoch
plt.figure(figsize=(10, 6))
plt.plot(metrics_history["epoch"], metrics_history["loss"], label="Training Loss", marker="o")
plt.plot(metrics_history["epoch"], metrics_history["val_loss"], label="Validation Loss", marker="s", linestyle="--")
plt.title("Training and Validation Loss per Epoch")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.grid(True, linestyle="--", alpha=0.6)
plt.tight_layout()
plt.savefig("hybrid_loss_per_epoch.png")
plt.savefig("hybrid_loss_per_epoch.pdf")
plt.savefig("hybrid_loss_per_epoch.eps")
plt.show()

# Plot Training and Validation Accuracy per Epoch
plt.figure(figsize=(10, 6))
plt.plot(metrics_history["epoch"], metrics_history["accuracy"], label="Training Accuracy", marker="o")
plt.plot(metrics_history["epoch"], metrics_history["val_accuracy"], label="Validation Accuracy", marker="s", linestyle="--")
plt.title("Training and Validation Accuracy per Epoch")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.grid(True, linestyle="--", alpha=0.6)
plt.tight_layout()
plt.savefig("hybrid_accuracy_per_epoch.png")
plt.savefig("hybrid_accuracy_per_epoch.pdf")
plt.savefig("hybrid_accuracy_per_epoch.eps")
plt.show()

# Plot Training Time per Epoch
plt.figure(figsize=(10, 6))
plt.bar(metrics_history["epoch"], epoch_times, color="blue", alpha=0.7)
plt.title("Training Time per Epoch")
plt.xlabel("Epoch")
plt.ylabel("Time (seconds)")
plt.grid(True, linestyle="--", alpha=0.6)
plt.tight_layout()
plt.savefig("hybrid_time_per_epoch.png")
plt.savefig("hybrid_time_per_epoch.pdf")
plt.savefig("hybrid_time_per_epoch.eps")
plt.show()

# ===========================
# 6. Forecasting Visualization
# ===========================
# Generate forecast using the last available sequence
predicted_values = hybrid_model_tcn_gru_lstm.predict(np.expand_dims(sequences[-1], axis=0)).flatten()
actual_values = labels[-1].flatten()

plt.figure(figsize=(12, 8))
plt.plot(range(len(actual_values)), actual_values, label="Actual Energy Consumption", color="black", marker="o")
plt.plot(range(len(predicted_values)), predicted_values, label="Hybrid Model Forecast", color="blue", linestyle="--", marker="s")
plt.fill_between(range(len(actual_values)), actual_values, predicted_values, color="gray", alpha=0.3, label="Difference")
plt.title("Energy Forecasting: Actual vs Predicted (Hybrid Model)", fontsize=16, fontweight="bold")
plt.xlabel("Time Steps", fontsize=14)
plt.ylabel("Normalized Energy Consumption", fontsize=14)
plt.legend(fontsize=12, loc="best", title="Legend", title_fontsize=13)
plt.grid(True, linestyle="--", alpha=0.6)
plt.tight_layout()
plt.savefig("hybrid_forecasting_shading.png")
plt.savefig("hybrid_forecasting_shading.pdf")
plt.savefig("hybrid_forecasting_shading.eps")
plt.show()

# ===========================
# 7. Save the Performance Metrics for Later Comparison
# ===========================
np.savez("hybrid_metrics.npz",
         epochs=np.array(metrics_history["epoch"]),
         loss=np.array(metrics_history["loss"]),
         val_loss=np.array(metrics_history["val_loss"]),
         accuracy=np.array(metrics_history["accuracy"]),
         val_accuracy=np.array(metrics_history["val_accuracy"]),
         epoch_times=np.array(epoch_times),
         forecast_pred=np.array(predicted_values),
         actual=np.array(actual_values))
print("Hybrid model metrics saved to hybrid_metrics.npz")
"""

In [None]:
import pandas as pd
import tensorflow as tf
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from collections import deque
import random
import time

# ===========================
# 1. Data Loading and Preprocessing
# ===========================
data = pd.read_csv('/kaggle/input/integrated-energy-management-and-forecasting/Integrated Energy Management and Forecasting Dataset.csv')

# Convert Timestamp to datetime and sort
data['Timestamp'] = pd.to_datetime(data['Timestamp'])
data.sort_values('Timestamp', inplace=True)

# Select the 'Energy_Demand' feature and normalize it
features = ['Energy_Demand']
scaler = MinMaxScaler()
data_features = scaler.fit_transform(data[features].values)

sequence_length = 48
prediction_length = 48

# Create sequences and labels using a sliding window approach
sequences, labels = [], []
for i in range(len(data_features) - sequence_length - prediction_length + 1):
    seq = data_features[i:i + sequence_length]
    label = data_features[i + sequence_length:i + sequence_length + prediction_length]
    sequences.append(seq)
    labels.append(label)

sequences = np.array(sequences)
labels = np.array(labels).astype(np.float32)

# ===========================
# 2. Replay Buffer for Incremental Learning
# ===========================
class ReplayBuffer:
    def __init__(self, max_size=1000):
        self.buffer = deque(maxlen=max_size)

    def add(self, sequence, label):
        self.buffer.append((sequence, label))

    def sample(self, batch_size):
        if len(self.buffer) < batch_size:
            return list(self.buffer)
        else:
            return random.sample(self.buffer, batch_size)

# ===========================
# 3. Define a Deeper Lightweight Model Architecture
# ===========================
def build_lightweight_model(seq_len=sequence_length, pred_len=prediction_length, dropout=0.2, hidden_dim=90):
    inputs = tf.keras.layers.Input(shape=(seq_len, len(features)))
    
    # --- TCN Block 1 ---
    # Manual causal padding: pad on the left (kernel_size - 1)
    kernel_size = 3
    pad_size = kernel_size - 1
    x = tf.keras.layers.ZeroPadding1D(padding=(pad_size, 0))(inputs)
    x = tf.keras.layers.SeparableConv1D(filters=hidden_dim, kernel_size=kernel_size,
                                        padding="valid", activation='relu')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dropout(dropout)(x)
    
    # --- TCN Block 2 (deeper block) ---
    x = tf.keras.layers.ZeroPadding1D(padding=(pad_size, 0))(x)
    x = tf.keras.layers.SeparableConv1D(filters=hidden_dim, kernel_size=kernel_size,
                                        padding="valid", activation='relu')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dropout(dropout)(x)
    
    # --- Bidirectional GRU Layer ---
    x = tf.keras.layers.Bidirectional(
        tf.keras.layers.GRU(hidden_dim, dropout=dropout, return_sequences=True, kernel_initializer='orthogonal')
    )(x)
    
    # For forecasting, select the first 'pred_len' time steps
    x = x[:, :pred_len, :]
    outputs = tf.keras.layers.Dense(1, activation='linear')(x)
    
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss='mse',
        metrics=['mae', 'mse', tf.keras.metrics.MeanAbsolutePercentageError(name='mape')]
    )
    
    return model

# Build and visualize the deeper lightweight model
lightweight_model = build_lightweight_model()
lightweight_model.summary()

tf.keras.utils.plot_model(
    lightweight_model,
    to_file="hybrid_model_tcn_gru_lstm_architecture.png",
    show_shapes=True,
    show_layer_names=True
)

# ===========================
# 4. Incremental Online Training Loop with Per-Epoch Metrics
# ===========================
chunk_batch_size = 20         # Number of new samples per update (online chunk)
epochs_per_chunk = 150        # Epochs per update
replay_sample_size = 32       # Number of samples to draw from replay buffer per update

metrics_history = {
    "epoch": [],
    "loss": [],
    "val_loss": [],
    "accuracy": [],
    "val_accuracy": []
}
epoch_times = []

replay_buffer = ReplayBuffer(max_size=5000)
global_epoch = 0

num_chunks = int(np.ceil(len(sequences) / chunk_batch_size))
print(f"Starting online incremental training over {num_chunks} chunks...")

for chunk_idx in range(0, len(sequences), chunk_batch_size):
    new_sequences = sequences[chunk_idx:chunk_idx + chunk_batch_size]
    new_labels = labels[chunk_idx:chunk_idx + chunk_batch_size]
    
    for seq, lbl in zip(new_sequences, new_labels):
        replay_buffer.add(seq, lbl)
    
    replay_samples = replay_buffer.sample(replay_sample_size)
    replay_sequences, replay_labels = zip(*replay_samples)
    replay_sequences = np.array(replay_sequences)
    replay_labels = np.array(replay_labels)
    
    train_sequences = np.vstack((new_sequences, replay_sequences))
    train_labels = np.vstack((new_labels, replay_labels))
    
    start_time_chunk = time.time()
    history = lightweight_model.fit(
        train_sequences, train_labels,
        batch_size=chunk_batch_size,
        epochs=epochs_per_chunk,
        verbose=0,
        validation_split=0.1
    )
    end_time_chunk = time.time()
    
    chunk_time = end_time_chunk - start_time_chunk
    avg_epoch_time = chunk_time / epochs_per_chunk
    for epoch in range(epochs_per_chunk):
        global_epoch += 1
        loss = history.history['loss'][epoch]
        val_loss = history.history['val_loss'][epoch]
        acc = 1 - loss
        val_acc = 1 - val_loss
        metrics_history["epoch"].append(global_epoch)
        metrics_history["loss"].append(loss)
        metrics_history["val_loss"].append(val_loss)
        metrics_history["accuracy"].append(acc)
        metrics_history["val_accuracy"].append(val_acc)
        epoch_times.append(avg_epoch_time)
    
    print(f"Chunk {chunk_idx//chunk_batch_size + 1}/{num_chunks}: "
          f"Avg Loss={np.mean(history.history['loss']):.4f}, "
          f"Avg Val Loss={np.mean(history.history['val_loss']):.4f}, "
          f"Time for chunk={chunk_time:.2f}s")

model_save_path = "hybrid_model_tcn_gru_lstm.h5"
lightweight_model.save(model_save_path)
print(f"Model saved to {model_save_path}")

# ===========================
# 5. Visualization of Performance Metrics per Epoch
# ===========================
plt.figure(figsize=(10, 6))
plt.plot(metrics_history["epoch"], metrics_history["loss"], label="Training Loss", marker="o")
plt.plot(metrics_history["epoch"], metrics_history["val_loss"], label="Validation Loss", marker="s", linestyle="--")
plt.title("Training and Validation Loss per Epoch")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.grid(True, linestyle="--", alpha=0.6)
plt.tight_layout()
plt.savefig("hybrid_loss_per_epoch.png")
plt.savefig("hybrid_loss_per_epoch.pdf")
plt.savefig("hybrid_loss_per_epoch.eps")
plt.show()

plt.figure(figsize=(10, 6))
plt.plot(metrics_history["epoch"], metrics_history["accuracy"], label="Training Accuracy", marker="o")
plt.plot(metrics_history["epoch"], metrics_history["val_accuracy"], label="Validation Accuracy", marker="s", linestyle="--")
plt.title("Training and Validation Accuracy per Epoch")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.grid(True, linestyle="--", alpha=0.6)
plt.tight_layout()
plt.savefig("hybrid_accuracy_per_epoch.png")
plt.savefig("hybrid_accuracy_per_epoch.pdf")
plt.savefig("hybrid_accuracy_per_epoch.eps")
plt.show()

plt.figure(figsize=(10, 6))
plt.bar(metrics_history["epoch"], epoch_times, color="blue", alpha=0.7)
plt.title("Training Time per Epoch")
plt.xlabel("Epoch")
plt.ylabel("Time (seconds)")
plt.grid(True, linestyle="--", alpha=0.6)
plt.tight_layout()
plt.savefig("hybrid_time_per_epoch.png")
plt.savefig("hybrid_time_per_epoch.pdf")
plt.savefig("hybrid_time_per_epoch.eps")
plt.show()

# ===========================
# 6. Forecasting Visualization
# ===========================
predicted_values = lightweight_model.predict(np.expand_dims(sequences[-1], axis=0)).flatten()
actual_values = labels[-1].flatten()

plt.figure(figsize=(12, 8))
plt.plot(range(len(actual_values)), actual_values, label="Actual Energy Consumption", color="black", marker="o")
plt.plot(range(len(predicted_values)), predicted_values, label="Hybrid Model Forecast", color="blue", linestyle="--", marker="s")
plt.fill_between(range(len(actual_values)), actual_values, predicted_values, color="gray", alpha=0.3, label="Difference")
plt.title("Energy Forecasting: Actual vs Predicted (Hybrid Model)", fontsize=16, fontweight="bold")
plt.xlabel("Time Steps", fontsize=14)
plt.ylabel("Normalized Energy Consumption", fontsize=14)
plt.legend(fontsize=12, loc="best", title="Legend", title_fontsize=13)
plt.grid(True, linestyle="--", alpha=0.6)
plt.tight_layout()
plt.savefig("hybrid_forecasting_shading.png")
plt.savefig("hybrid_forecasting_shading.pdf")
plt.savefig("hybrid_forecasting_shading.eps")
plt.show()

# ===========================
# 7. Save the Performance Metrics for Later Comparison
# ===========================
np.savez("hybrid_metrics.npz",
         epochs=np.array(metrics_history["epoch"]),
         loss=np.array(metrics_history["loss"]),
         val_loss=np.array(metrics_history["val_loss"]),
         accuracy=np.array(metrics_history["accuracy"]),
         val_accuracy=np.array(metrics_history["val_accuracy"]),
         epoch_times=np.array(epoch_times),
         forecast_pred=np.array(predicted_values),
         actual=np.array(actual_values))
print("Hybrid model metrics saved to hybrid_metrics.npz")


In [7]:
pip install graphviz


Note: you may need to restart the kernel to use updated packages.


In [1]:
pip install visualkeras


Collecting visualkeras
  Downloading visualkeras-0.1.4-py3-none-any.whl.metadata (11 kB)
Collecting aggdraw>=1.3.11 (from visualkeras)
  Downloading aggdraw-1.3.19-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (655 bytes)
Downloading visualkeras-0.1.4-py3-none-any.whl (17 kB)
Downloading aggdraw-1.3.19-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (993 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m993.7/993.7 kB[0m [31m20.7 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hInstalling collected packages: aggdraw, visualkeras
Successfully installed aggdraw-1.3.19 visualkeras-0.1.4
Note: you may need to restart the kernel to use updated packages.


In [2]:
!pip install pyvis
!pip install Jinja2==2.11.3
!pip install markupsafe==2.0.1


Collecting pyvis
  Downloading pyvis-0.3.2-py3-none-any.whl.metadata (1.7 kB)
Downloading pyvis-0.3.2-py3-none-any.whl (756 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m756.0/756.0 kB[0m [31m17.1 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hInstalling collected packages: pyvis
Successfully installed pyvis-0.3.2
Collecting Jinja2==2.11.3
  Downloading Jinja2-2.11.3-py2.py3-none-any.whl.metadata (3.5 kB)
Downloading Jinja2-2.11.3-py2.py3-none-any.whl (125 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m125.7/125.7 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: Jinja2
  Attempting uninstall: Jinja2
    Found existing installation: Jinja2 3.1.4
    Uninstalling Jinja2-3.1.4:
      Successfully uninstalled Jinja2-3.1.4
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
branca 0.8.1 requir

In [4]:
!pip uninstall pyvis -y
!pip install pyvis


Found existing installation: pyvis 0.3.2
Uninstalling pyvis-0.3.2:
  Successfully uninstalled pyvis-0.3.2
Collecting pyvis
  Using cached pyvis-0.3.2-py3-none-any.whl.metadata (1.7 kB)
Using cached pyvis-0.3.2-py3-none-any.whl (756 kB)
Installing collected packages: pyvis
Successfully installed pyvis-0.3.2


In [5]:
from pyvis.network import Network

In [9]:
import graphviz

def visualize_model_diagram_for_double_column():
    """
    Creates a static, publication-ready diagram of the hybrid deep online learning model,
    optimized for double-column (two-column) research papers.
    This version adjusts the spacing to slightly increase the width and decrease the height
    without rotating the diagram.
    Output is saved as PNG, EPS, and PDF files.
    """
    dot = graphviz.Digraph(comment="Hybrid Deep Online Learning Model", format="png")
    
    # Keep a top-to-bottom layout. Adjust spacing:
    # - Increase nodesep to widen the layout (horizontal spacing between nodes)
    # - Decrease ranksep to reduce vertical space between ranks (height)
    dot.attr(rankdir="TB", nodesep="1.0", ranksep="0.5")
    
    # Set white background and node styles.
    dot.attr(bgcolor="#FFFFFF")
    dot.attr("node",
             shape="box",
             style="rounded,filled",
             color="#4B4B4B",
             fontname="Helvetica",
             fontsize="10")
    
    # ---------------------
    # Define the nodes
    # ---------------------
    dot.node("Data", "Data Preprocessing\n& Sequences", fillcolor="#D6EAF8")
    dot.node("Replay", "Replay Buffer\n(Incremental Storage)", fillcolor="#D5F5E3")
    dot.node("TrainingLoop", "Online Incremental\nTraining Loop\n(Chunks, Epochs, etc.)",
             shape="ellipse", fillcolor="#FDEBD0")
    dot.node("TCN1", "TCN Block 1\n(SeparableConv1D + BN + Dropout)", fillcolor="#FCF3CF")
    dot.node("TCN2", "TCN Block 2\n(SeparableConv1D + BN + Dropout)", fillcolor="#FCF3CF")
    dot.node("BiGRU", "Bidirectional GRU\n(90 units, dropout=0.2)", fillcolor="#FDEBD0")
    dot.node("Dense", "Dense Layer\n(1 neuron, linear)", fillcolor="#EBDEF0")
    dot.node("Output", "Forecast Output", fillcolor="#F9EBEA")
    
    # ---------------------
    # Define the edges
    # ---------------------
    edge_attrs = {
        "color": "#7B7B7B",
        "arrowhead": "normal",
        "penwidth": "1.3"
    }
    dot.edge("Data", "Replay", label="store sequences", **edge_attrs)
    dot.edge("Data", "TrainingLoop", label="new chunk", **edge_attrs)
    dot.edge("Replay", "TrainingLoop", label="sample batch", **edge_attrs)
    dot.edge("TrainingLoop", "TCN1", label="train/update weights", **edge_attrs)
    dot.edge("TCN1", "TCN2", **edge_attrs)
    dot.edge("TCN2", "BiGRU", **edge_attrs)
    dot.edge("BiGRU", "Dense", **edge_attrs)
    dot.edge("Dense", "Output", label="predictions", **edge_attrs)
    
    # ---------------------
    # Render as PNG
    # ---------------------
    png_filename = dot.render("hybrid_deep_online_model_diagram", format="png", cleanup=True, view=False)
    print("PNG saved to:", png_filename)
    
    # ---------------------
    # Render as EPS
    # ---------------------
    dot.format = "eps"
    eps_filename = dot.render("hybrid_deep_online_model_diagram", cleanup=True, view=False)
    print("EPS saved to:", eps_filename)
    
    # ---------------------
    # Render as PDF
    # ---------------------
    dot.format = "pdf"
    pdf_filename = dot.render("hybrid_deep_online_model_diagram", cleanup=True, view=False)
    print("PDF saved to:", pdf_filename)

if __name__ == "__main__":
    visualize_model_diagram_for_double_column()


PNG saved to: hybrid_deep_online_model_diagram.png
EPS saved to: hybrid_deep_online_model_diagram.eps
PDF saved to: hybrid_deep_online_model_diagram.pdf


# 2. CNN Model Incremental Learning

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from collections import deque
import random
import time
import matplotlib.pyplot as plt

# -----------------------------------------------------------------------------
# 1. Data Loading and Preprocessing
# -----------------------------------------------------------------------------
data = pd.read_csv('/kaggle/input/integrated-energy-management-and-forecasting/Integrated Energy Management and Forecasting Dataset.csv')
data['Timestamp'] = pd.to_datetime(data['Timestamp'])
data.sort_values('Timestamp', inplace=True)

features = ['Energy_Demand']
scaler = MinMaxScaler()
data_features = scaler.fit_transform(data[features].values)

sequence_length = 48
prediction_length = 48

# Create sequences and labels using a sliding window approach
sequences, labels = [], []
for i in range(len(data_features) - sequence_length - prediction_length + 1):
    seq = data_features[i:i + sequence_length]
    label = data_features[i + sequence_length:i + sequence_length + prediction_length]
    sequences.append(seq)
    labels.append(label)

sequences = np.array(sequences)
labels = np.array(labels).astype(np.float32)

# -----------------------------------------------------------------------------
# 2. Replay Buffer for Incremental Learning
# -----------------------------------------------------------------------------
class ReplayBuffer:
    def __init__(self, max_size=1000):
        self.buffer = deque(maxlen=max_size)
    
    def add(self, sequence, label):
        self.buffer.append((sequence, label))
    
    def sample(self, batch_size):
        # Return all samples if the buffer is not full
        if len(self.buffer) < batch_size:
            return list(self.buffer)
        return random.sample(self.buffer, batch_size)

def incremental_learning_update(model, new_sequences, new_labels, replay_buffer, batch_size=32, epochs=1):
    # Add new samples to the replay buffer.
    for seq, lbl in zip(new_sequences, new_labels):
        replay_buffer.add(seq, lbl)
    
    # Sample a batch from the replay buffer.
    replay_samples = replay_buffer.sample(batch_size)
    replay_sequences, replay_labels = zip(*replay_samples)
    replay_sequences = np.array(replay_sequences)
    replay_labels = np.array(replay_labels)
    
    # Combine new data with replayed samples.
    combined_sequences = np.vstack((new_sequences, replay_sequences))
    combined_labels = np.vstack((new_labels, replay_labels))
    
    # Incremental training on the combined data.
    history = model.fit(combined_sequences, combined_labels,
                        batch_size=batch_size,
                        epochs=epochs,
                        verbose=0,
                        validation_split=0.1)
    return history

# -----------------------------------------------------------------------------
# 3. CNN Model Definition
# -----------------------------------------------------------------------------
def build_cnn_model(seq_len=sequence_length, pred_len=prediction_length, hidden_dim=128):
    inputs = tf.keras.layers.Input(shape=(seq_len, len(features)))
    
    # Three convolutional layers
    conv1 = tf.keras.layers.Conv1D(filters=hidden_dim, kernel_size=3, activation='relu', padding='same')(inputs)
    conv2 = tf.keras.layers.Conv1D(filters=hidden_dim, kernel_size=3, activation='relu', padding='same')(conv1)
    conv3 = tf.keras.layers.Conv1D(filters=hidden_dim, kernel_size=3, activation='relu', padding='same')(conv2)
    
    # Global average pooling to aggregate the features across time steps
    pool = tf.keras.layers.GlobalAveragePooling1D()(conv3)
    dense1 = tf.keras.layers.Dense(64, activation='relu')(pool)
    
    # Output layer produces 'prediction_length' outputs
    outputs = tf.keras.layers.Dense(pred_len, activation='linear')(dense1)
    
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer=tf.optimizers.Adam(learning_rate=0.001),
                  loss='mse',
                  metrics=['mae', 'mse', tf.keras.metrics.MeanAbsolutePercentageError(name='mape')])
    return model

# Build and summarize the CNN model
cnn_model = build_cnn_model()
cnn_model.summary()

# -----------------------------------------------------------------------------
# 4. Incremental Online Training Loop with Per-Epoch Metrics for CNN Model
# -----------------------------------------------------------------------------
# Training configuration
chunk_batch_size = 20         # New samples per update (online chunk)
epochs_per_chunk = 150         # Number of epochs per update
replay_sample_size = 32       # Number of samples drawn from the replay buffer per update

# Dictionaries to record per-epoch metrics and training times
cnn_metrics_history = {
    "epoch": [],
    "loss": [],
    "val_loss": [],
    "accuracy": [],
    "val_accuracy": []
}
cnn_epoch_times = []

replay_buffer_cnn = ReplayBuffer(max_size=1000)
global_epoch = 0

num_chunks = int(np.ceil(len(sequences) / chunk_batch_size))
print(f"Starting online incremental training for CNN model over {num_chunks} chunks...")

for chunk_idx in range(0, len(sequences), chunk_batch_size):
    # Get the new data chunk
    new_sequences = sequences[chunk_idx:chunk_idx + chunk_batch_size]
    new_labels = labels[chunk_idx:chunk_idx + chunk_batch_size]
    
    # Add new data to the replay buffer
    for seq, lbl in zip(new_sequences, new_labels):
        replay_buffer_cnn.add(seq, lbl)
    
    # Sample replay data for this update
    replay_samples = replay_buffer_cnn.sample(replay_sample_size)
    replay_sequences, replay_labels = zip(*replay_samples)
    replay_sequences = np.array(replay_sequences)
    replay_labels = np.array(replay_labels)
    
    # Combine new data with replayed samples to form a training mini-batch
    train_sequences = np.vstack((new_sequences, replay_sequences))
    train_labels = np.vstack((new_labels, replay_labels))
    
    # Train the model for a fixed number of epochs on the combined data (simulate online update)
    start_time_chunk = time.time()
    history = cnn_model.fit(
        train_sequences, train_labels,
        batch_size=chunk_batch_size,
        epochs=epochs_per_chunk,
        verbose=0,
        validation_split=0.1
    )
    end_time_chunk = time.time()
    
    chunk_time = end_time_chunk - start_time_chunk
    avg_epoch_time = chunk_time / epochs_per_chunk
    
    # Record per-epoch metrics for this chunk update
    for epoch in range(epochs_per_chunk):
        global_epoch += 1
        loss = history.history['loss'][epoch]
        val_loss = history.history['val_loss'][epoch]
        # "Accuracy" is defined here as 1 - loss for visualization purposes.
        acc = 1 - loss
        val_acc = 1 - val_loss
        cnn_metrics_history["epoch"].append(global_epoch)
        cnn_metrics_history["loss"].append(loss)
        cnn_metrics_history["val_loss"].append(val_loss)
        cnn_metrics_history["accuracy"].append(acc)
        cnn_metrics_history["val_accuracy"].append(val_acc)
        cnn_epoch_times.append(avg_epoch_time)
    
    print(f"Chunk {chunk_idx//chunk_batch_size + 1}/{num_chunks}: "
          f"Avg Loss={np.mean(history.history['loss']):.4f}, "
          f"Avg Val Loss={np.mean(history.history['val_loss']):.4f}, "
          f"Time for chunk={chunk_time:.2f}s")

# -----------------------------------------------------------------------------
# 5. Visualization of Performance Metrics per Epoch for CNN Model
# -----------------------------------------------------------------------------
plt.figure(figsize=(10, 6))
plt.plot(cnn_metrics_history["epoch"], cnn_metrics_history["loss"], label="Training Loss", marker="o")
plt.plot(cnn_metrics_history["epoch"], cnn_metrics_history["val_loss"], label="Validation Loss", marker="s", linestyle="--")
plt.title("CNN Model: Training and Validation Loss per Epoch")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.grid(True, linestyle="--", alpha=0.6)
plt.tight_layout()
plt.savefig("cnn_loss_per_epoch.png")
plt.show()

plt.figure(figsize=(10, 6))
plt.plot(cnn_metrics_history["epoch"], cnn_metrics_history["accuracy"], label="Training Accuracy", marker="o")
plt.plot(cnn_metrics_history["epoch"], cnn_metrics_history["val_accuracy"], label="Validation Accuracy", marker="s", linestyle="--")
plt.title("CNN Model: Training and Validation Accuracy per Epoch")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.grid(True, linestyle="--", alpha=0.6)
plt.tight_layout()
plt.savefig("cnn_accuracy_per_epoch.png")
plt.show()

plt.figure(figsize=(10, 6))
plt.bar(cnn_metrics_history["epoch"], cnn_epoch_times, color="blue", alpha=0.7)
plt.title("CNN Model: Training Time per Epoch")
plt.xlabel("Epoch")
plt.ylabel("Time (seconds)")
plt.grid(True, linestyle="--", alpha=0.6)
plt.tight_layout()
plt.savefig("cnn_time_per_epoch.png")
plt.show()

# -----------------------------------------------------------------------------
# 6. Forecasting Visualization: Compare Actual vs. Predicted Values for CNN Model
# -----------------------------------------------------------------------------
# Select the last sequence from the dataset and its corresponding actual labels
last_sequence = sequences[-1]
actual_values = labels[-1].flatten()

# Generate forecast using the trained CNN model.
# Note: The CNN model output shape is (batch_size, prediction_length). We use the first (and only) sample.
predicted_values = cnn_model.predict(np.expand_dims(last_sequence, axis=0)).flatten()

plt.figure(figsize=(12, 8))
plt.plot(range(len(actual_values)), actual_values, label="Actual Energy Consumption", color="black", marker="o")
plt.plot(range(len(predicted_values)), predicted_values, label="CNN Model Forecast", color="blue", linestyle="--", marker="s")
plt.fill_between(range(len(actual_values)), actual_values, predicted_values, 
                 color="gray", alpha=0.3, label="Difference")
plt.title("Energy Forecasting: Actual vs Predicted (CNN Model)", fontsize=16, fontweight="bold")
plt.xlabel("Time Steps", fontsize=14)
plt.ylabel("Normalized Energy Consumption", fontsize=14)
plt.legend(fontsize=12, loc="best", title="Legend", title_fontsize=13)
plt.grid(True, linestyle="--", alpha=0.6)
plt.tight_layout()
plt.savefig("cnn_forecasting_actual_vs_predicted.png")
plt.show()

# -----------------------------------------------------------------------------
# 7. Save the Trained CNN Model and Performance Metrics for Later Comparison
# -----------------------------------------------------------------------------
model_save_path = "cnn_model_incremental.h5"
cnn_model.save(model_save_path)
print(f"CNN model saved to {model_save_path}")

# Save performance metrics to a file for later comparison
np.savez("cnn_metrics.npz",
         epochs=np.array(cnn_metrics_history["epoch"]),
         loss=np.array(cnn_metrics_history["loss"]),
         val_loss=np.array(cnn_metrics_history["val_loss"]),
         accuracy=np.array(cnn_metrics_history["accuracy"]),
         val_accuracy=np.array(cnn_metrics_history["val_accuracy"]),
         epoch_times=np.array(cnn_epoch_times),
         forecast_pred=np.array(predicted_values),
         actual=np.array(actual_values))
print("CNN model metrics saved to cnn_metrics.npz")


# 3. LSTM Modeling Incremental Learning

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from collections import deque
import random
import time
import matplotlib.pyplot as plt

# -----------------------------------------------------------------------------
# 1. Data Loading and Preprocessing
# -----------------------------------------------------------------------------
data = pd.read_csv('/kaggle/input/integrated-energy-management-and-forecasting/Integrated Energy Management and Forecasting Dataset.csv')
data['Timestamp'] = pd.to_datetime(data['Timestamp'])
data.sort_values('Timestamp', inplace=True)

features = ['Energy_Demand']
scaler = MinMaxScaler()
data_features = scaler.fit_transform(data[features].values)

sequence_length = 48
prediction_length = 48

# Create sequences and labels using a sliding window approach
sequences, labels = [], []
for i in range(len(data_features) - sequence_length - prediction_length + 1):
    seq = data_features[i:i + sequence_length]
    label = data_features[i + sequence_length:i + sequence_length + prediction_length]
    sequences.append(seq)
    labels.append(label)

sequences = np.array(sequences)
labels = np.array(labels).astype(np.float32)

# -----------------------------------------------------------------------------
# 2. Replay Buffer for Incremental Learning
# -----------------------------------------------------------------------------
class ReplayBuffer:
    def __init__(self, max_size=1000):
        self.buffer = deque(maxlen=max_size)
    
    def add(self, sequence, label):
        self.buffer.append((sequence, label))
    
    def sample(self, batch_size):
        # Return all samples if the buffer is not full
        if len(self.buffer) < batch_size:
            return list(self.buffer)
        return random.sample(self.buffer, batch_size)

def incremental_learning_update(model, new_sequences, new_labels, replay_buffer, batch_size=32, epochs=1):
    # Add new samples to the replay buffer.
    for seq, lbl in zip(new_sequences, new_labels):
        replay_buffer.add(seq, lbl)
    
    # Sample a batch from the replay buffer.
    replay_samples = replay_buffer.sample(batch_size)
    replay_sequences, replay_labels = zip(*replay_samples)
    replay_sequences = np.array(replay_sequences)
    replay_labels = np.array(replay_labels)
    
    # Combine new data with the replayed samples.
    combined_sequences = np.vstack((new_sequences, replay_sequences))
    combined_labels = np.vstack((new_labels, replay_labels))
    
    # Incremental training on the combined data.
    history = model.fit(combined_sequences, combined_labels,
                        batch_size=batch_size,
                        epochs=epochs,
                        verbose=0,
                        validation_split=0.1)
    return history

# -----------------------------------------------------------------------------
# 3. Define the LSTM Model
# -----------------------------------------------------------------------------
def build_lstm_model(seq_len=sequence_length, pred_len=prediction_length, hidden_dim=128):
    inputs = tf.keras.layers.Input(shape=(seq_len, len(features)))
    
    # LSTM layers with dropout (architecture as provided)
    lstm1 = tf.keras.layers.LSTM(hidden_dim, return_sequences=True, dropout=0.3)(inputs)
    lstm2 = tf.keras.layers.LSTM(hidden_dim, return_sequences=True, dropout=0.3)(lstm1)
    lstm3 = tf.keras.layers.LSTM(hidden_dim, return_sequences=False, dropout=0.3)(lstm2)
    
    # Dense layers for final prediction
    dense1 = tf.keras.layers.Dense(64, activation='relu')(lstm3)
    outputs = tf.keras.layers.Dense(pred_len, activation='linear')(dense1)
    
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer=tf.optimizers.Adam(learning_rate=0.001),
                  loss='mse',
                  metrics=['mae', 'mse', tf.keras.metrics.MeanAbsolutePercentageError(name='mape')])
    return model

# Build and summarize the LSTM model
lstm_model = build_lstm_model()
lstm_model.summary()

# -----------------------------------------------------------------------------
# 4. Incremental Online Training Loop with Per-Epoch Metrics for the LSTM Model
# -----------------------------------------------------------------------------
# Training configuration
chunk_batch_size = 20        # Number of new samples per update (online chunk)
epochs_per_chunk = 150       # Number of epochs per update
replay_sample_size = 32      # Number of samples drawn from the replay buffer per update

# Dictionaries to record per-epoch metrics and training times
lstm_metrics_history = {
    "epoch": [],
    "loss": [],
    "val_loss": [],
    "accuracy": [],
    "val_accuracy": []
}
lstm_epoch_times = []

replay_buffer_lstm = ReplayBuffer(max_size=1000)
global_epoch = 0

num_chunks = int(np.ceil(len(sequences) / chunk_batch_size))
print(f"Starting online incremental training for LSTM model over {num_chunks} chunks...")

for chunk_idx in range(0, len(sequences), chunk_batch_size):
    # Get the new data chunk
    new_sequences = sequences[chunk_idx:chunk_idx + chunk_batch_size]
    new_labels = labels[chunk_idx:chunk_idx + chunk_batch_size]
    
    # Add new data to the replay buffer
    for seq, lbl in zip(new_sequences, new_labels):
        replay_buffer_lstm.add(seq, lbl)
    
    # Sample replay data for this update
    replay_samples = replay_buffer_lstm.sample(replay_sample_size)
    replay_sequences, replay_labels = zip(*replay_samples)
    replay_sequences = np.array(replay_sequences)
    replay_labels = np.array(replay_labels)
    
    # Combine new data with replayed samples to form a training mini-batch
    train_sequences = np.vstack((new_sequences, replay_sequences))
    train_labels = np.vstack((new_labels, replay_labels))
    
    # Train the model for a fixed number of epochs on the combined data (simulate online update)
    start_time_chunk = time.time()
    history = incremental_learning_update(
        lstm_model,
        new_sequences,
        new_labels,
        replay_buffer_lstm,
        batch_size=chunk_batch_size,
        epochs=epochs_per_chunk
    )
    end_time_chunk = time.time()
    
    chunk_time = end_time_chunk - start_time_chunk
    avg_epoch_time = chunk_time / epochs_per_chunk
    
    # Record per-epoch metrics for this chunk update
    for epoch in range(epochs_per_chunk):
        global_epoch += 1
        loss = history.history['loss'][epoch]
        val_loss = history.history['val_loss'][epoch]
        # "Accuracy" is defined here as 1 - loss for visualization purposes.
        acc = 1 - loss
        val_acc = 1 - val_loss
        lstm_metrics_history["epoch"].append(global_epoch)
        lstm_metrics_history["loss"].append(loss)
        lstm_metrics_history["val_loss"].append(val_loss)
        lstm_metrics_history["accuracy"].append(acc)
        lstm_metrics_history["val_accuracy"].append(val_acc)
        lstm_epoch_times.append(avg_epoch_time)
    
    print(f"Chunk {chunk_idx//chunk_batch_size + 1}/{num_chunks}: "
          f"Avg Loss={np.mean(history.history['loss']):.4f}, "
          f"Avg Val Loss={np.mean(history.history['val_loss']):.4f}, "
          f"Time for chunk={chunk_time:.2f}s")

# -----------------------------------------------------------------------------
# 5. Visualization of Performance Metrics per Epoch for the LSTM Model
# -----------------------------------------------------------------------------
plt.figure(figsize=(10, 6))
plt.plot(lstm_metrics_history["epoch"], lstm_metrics_history["loss"], label="Training Loss", marker="o")
plt.plot(lstm_metrics_history["epoch"], lstm_metrics_history["val_loss"], label="Validation Loss", marker="s", linestyle="--")
plt.title("LSTM Model: Training and Validation Loss per Epoch")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.grid(True, linestyle="--", alpha=0.6)
plt.tight_layout()
plt.savefig("lstm_loss_per_epoch.png")
plt.show()

plt.figure(figsize=(10, 6))
plt.plot(lstm_metrics_history["epoch"], lstm_metrics_history["accuracy"], label="Training Accuracy", marker="o")
plt.plot(lstm_metrics_history["epoch"], lstm_metrics_history["val_accuracy"], label="Validation Accuracy", marker="s", linestyle="--")
plt.title("LSTM Model: Training and Validation Accuracy per Epoch")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.grid(True, linestyle="--", alpha=0.6)
plt.tight_layout()
plt.savefig("lstm_accuracy_per_epoch.png")
plt.show()

plt.figure(figsize=(10, 6))
plt.bar(lstm_metrics_history["epoch"], lstm_epoch_times, color="blue", alpha=0.7)
plt.title("LSTM Model: Training Time per Epoch")
plt.xlabel("Epoch")
plt.ylabel("Time (seconds)")
plt.grid(True, linestyle="--", alpha=0.6)
plt.tight_layout()
plt.savefig("lstm_time_per_epoch.png")
plt.show()

# -----------------------------------------------------------------------------
# 6. Forecasting Visualization: Compare Actual vs. Predicted Values for LSTM Model
# -----------------------------------------------------------------------------
# Select the last sequence from the dataset and its corresponding actual labels
last_sequence = sequences[-1]
actual_values = labels[-1].flatten()

# Generate forecast using the trained LSTM model.
# Note: The model output shape is (batch_size, prediction_length). We use the first sample.
predicted_values = lstm_model.predict(np.expand_dims(last_sequence, axis=0)).flatten()

plt.figure(figsize=(12, 8))
plt.plot(range(len(actual_values)), actual_values, label="Actual Energy Consumption", color="black", marker="o")
plt.plot(range(len(predicted_values)), predicted_values, label="LSTM Model Forecast", color="blue", linestyle="--", marker="s")
plt.fill_between(range(len(actual_values)), actual_values, predicted_values, 
                 color="gray", alpha=0.3, label="Difference")
plt.title("Energy Forecasting: Actual vs Predicted (LSTM Model)", fontsize=16, fontweight="bold")
plt.xlabel("Time Steps", fontsize=14)
plt.ylabel("Normalized Energy Consumption", fontsize=14)
plt.legend(fontsize=12, loc="best", title="Legend", title_fontsize=13)
plt.grid(True, linestyle="--", alpha=0.6)
plt.tight_layout()
plt.savefig("lstm_forecasting_actual_vs_predicted.png")
plt.show()

# -----------------------------------------------------------------------------
# 7. Save the Trained LSTM Model and Performance Metrics for Later Comparison
# -----------------------------------------------------------------------------
lstm_model_save_path = "lstm_model_incremental.h5"
lstm_model.save(lstm_model_save_path)
print(f"LSTM model saved to {lstm_model_save_path}")

# Save performance metrics to a file for later comparison
np.savez("lstm_metrics.npz",
         epochs=np.array(lstm_metrics_history["epoch"]),
         loss=np.array(lstm_metrics_history["loss"]),
         val_loss=np.array(lstm_metrics_history["val_loss"]),
         accuracy=np.array(lstm_metrics_history["accuracy"]),
         val_accuracy=np.array(lstm_metrics_history["val_accuracy"]),
         epoch_times=np.array(lstm_epoch_times),
         forecast_pred=np.array(predicted_values),
         actual=np.array(actual_values))
print("LSTM model metrics saved to lstm_metrics.npz")


# 4. LSTM Model in Non-Incremental Fashion.

In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
import time
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

# -----------------------------------------------------------------------------
# 1. Data Loading and Preprocessing
# -----------------------------------------------------------------------------
data = pd.read_csv('/kaggle/input/integrated-energy-management-and-forecasting/Integrated Energy Management and Forecasting Dataset.csv')

# Convert Timestamp to datetime and sort
data['Timestamp'] = pd.to_datetime(data['Timestamp'])
data.sort_values('Timestamp', inplace=True)

# Use 'Energy_Demand' as the feature for prediction and normalize
features = ['Energy_Demand']
scaler = MinMaxScaler()
data_features = scaler.fit_transform(data[features].values)

sequence_length = 48
prediction_length = 48

# Create sequences and labels
sequences, labels = [], []
for i in range(len(data_features) - sequence_length - prediction_length + 1):
    seq = data_features[i:i + sequence_length]
    label = data_features[i + sequence_length:i + sequence_length + prediction_length]
    sequences.append(seq)
    labels.append(label)

sequences = np.array(sequences).astype(np.float32)
labels = np.array(labels).astype(np.float32)

# Split into training and testing sets
split_idx = int(len(sequences) * 0.8)
x_train, x_test = sequences[:split_idx], sequences[split_idx:]
y_train, y_test = labels[:split_idx], labels[split_idx:]

print(f"Training sequences shape: {x_train.shape}")
print(f"Training labels shape: {y_train.shape}")
print(f"Testing sequences shape: {x_test.shape}")
print(f"Testing labels shape: {y_test.shape}")

# -----------------------------------------------------------------------------
# 2. Define the LSTM Model
# -----------------------------------------------------------------------------
def build_lstm_model(seq_len, pred_len, hidden_dim=128, feature_dim=1):
    inputs = tf.keras.layers.Input(shape=(seq_len, feature_dim))

    # LSTM layers
    lstm1 = tf.keras.layers.LSTM(hidden_dim, return_sequences=True, dropout=0.3)(inputs)
    lstm2 = tf.keras.layers.LSTM(hidden_dim, return_sequences=True, dropout=0.3)(lstm1)
    lstm3 = tf.keras.layers.LSTM(hidden_dim, return_sequences=False, dropout=0.3)(lstm2)

    # Dense layers
    dense1 = tf.keras.layers.Dense(64, activation='relu')(lstm3)
    outputs = tf.keras.layers.Dense(pred_len, activation='linear')(dense1)

    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    model.compile(
        optimizer=tf.optimizers.Adam(learning_rate=0.001),
        loss='mse',
        metrics=['mae', 'mse', tf.keras.metrics.MeanAbsolutePercentageError(name='mape')]
    )
    return model

# Build the model
lstm_model_nonil = build_lstm_model(seq_len=sequence_length, pred_len=prediction_length, feature_dim=1)
lstm_model_nonil.summary()

# -----------------------------------------------------------------------------
# 3. Initialize Metrics Storage
# -----------------------------------------------------------------------------
training_times = []
losses = []
val_losses = []
avg_accuracy = []    # Using MAE as a proxy for "accuracy" (lower MAE is better)
val_accuracy = []    # Using validation MAE as well

# -----------------------------------------------------------------------------
# 4. Training Loop and Metrics Collection (Non-Incremental)
# -----------------------------------------------------------------------------
batch_size = 32
epochs = 3000
for epoch in range(epochs):
    print(f"Starting epoch {epoch + 1}/{epochs}")
    epoch_start_time = time.time()
    history = lstm_model_nonil.fit(
        x_train,
        y_train,
        batch_size=batch_size,
        epochs=1,  # Train one epoch at a time to capture metrics
        validation_split=0.2,
        shuffle=True,
        verbose=1
    )
    epoch_end_time = time.time()

    # Record training time for this epoch
    training_times.append(epoch_end_time - epoch_start_time)

    # Record loss and validation loss
    losses.append(history.history['loss'][0])
    val_losses.append(history.history['val_loss'][0])

    # Record average accuracy and validation accuracy (using MAE as proxy)
    avg_accuracy.append(history.history['mae'][0])
    val_accuracy.append(history.history['val_mae'][0])

# -----------------------------------------------------------------------------
# 5. Save the Trained LSTM Model (Non-Incremental)
# -----------------------------------------------------------------------------
lstm_model_nonil_save_path = "lstm_model_nonil.h5"
lstm_model_nonil.save(lstm_model_nonil_save_path)
print(f"Model saved to {lstm_model_nonil_save_path}")

# -----------------------------------------------------------------------------
# 6. Forecasting Visualization: Generate Final Forecast on Test Data
# -----------------------------------------------------------------------------
# For forecasting, we select the last sample from the test set.
forecast_pred = lstm_model_nonil.predict(np.expand_dims(x_test[-1], axis=0)).flatten()
actual = y_test[-1].flatten()

# Plot the forecast
plt.figure(figsize=(12, 8))
plt.plot(range(len(actual)), actual, label="Actual Energy Demand", color="black", marker="o")
plt.plot(range(len(forecast_pred)), forecast_pred, label="LSTM Forecast (Non-Incremental)", color="blue", linestyle="--", marker="s")
plt.fill_between(range(len(actual)), actual, forecast_pred, color="gray", alpha=0.3, label="Difference")
plt.title("Energy Forecasting: Actual vs Predicted (LSTM Non-Incremental Model)", fontsize=16, fontweight="bold")
plt.xlabel("Time Steps", fontsize=14)
plt.ylabel("Normalized Energy Consumption", fontsize=14)
plt.legend(fontsize=12, loc="best", title="Legend", title_fontsize=13)
plt.grid(True, linestyle="--", alpha=0.6)
plt.tight_layout()
plt.savefig("lstm_nonil_forecasting.png")
plt.show()

# -----------------------------------------------------------------------------
# 7. Save Performance Metrics for Later Comparison
# -----------------------------------------------------------------------------
# Create an array for epoch numbers (1 to epochs)
epoch_numbers = np.arange(1, epochs + 1)

np.savez("lstm_nonil_metrics.npz",
         epochs=epoch_numbers,
         loss=np.array(losses),
         val_loss=np.array(val_losses),
         accuracy=np.array(avg_accuracy),
         val_accuracy=np.array(val_accuracy),
         epoch_times=np.array(training_times),
         forecast_pred=forecast_pred,
         actual=actual)
print("LSTM Non-Incremental model metrics saved to lstm_nonil_metrics.npz")


**xLSTM Modeling Importing**

In [None]:
# Clone the XLSTM repository
!git clone https://github.com/NX-AI/xlstm.git

# Navigate to the xlstm directory
%cd xlstm




In [None]:
# Install PyTorch
!pip install torch torchvision torchaudio

# Install additional dependencies
!pip install numpy pandas scikit-learn matplotlib

# Install the xLSTM package
!pip install xlstm


In [None]:
#!pip install seaborn


In [None]:
from xlstm.xlstm.blocks.mlstm.layer import mLSTMLayerConfig
from xlstm.xlstm.blocks.slstm.layer import sLSTMLayerConfig
from xlstm.xlstm.xlstm_block_stack import xLSTMBlockStack, xLSTMBlockStackConfig
from xlstm.xlstm.blocks.mlstm.block import mLSTMBlockConfig
from xlstm.xlstm.blocks.slstm.block import sLSTMBlockConfig
from xlstm.xlstm.components.feedforward import FeedForwardConfig

In [None]:
!apt-get install ninja-build -y


In [None]:
!ninja --version


In [None]:
!nvcc --version


In [None]:
#!rm -rf /root/.cache/torch_extensions/


In [None]:
import torch
print(torch.__version__)
print(torch.cuda.is_available())


In [None]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121


In [None]:
!pip install torchinfo


In [None]:
try:
    from torchinfo import summary  # For model summary
except ImportError:
    !pip install torchinfo
    from torchinfo import summary


# 5. xLSTM Incremental Learning Model

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset, Subset
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import time
from collections import deque
import random
import matplotlib.pyplot as plt
from torchinfo import summary

# Import XLSTM components
from xlstm.xlstm.blocks.slstm.layer import sLSTMLayerConfig
from xlstm.xlstm.blocks.slstm.block import sLSTMBlockConfig
from xlstm.xlstm.blocks.mlstm.layer import mLSTMLayerConfig
from xlstm.xlstm.blocks.mlstm.block import mLSTMBlockConfig
from xlstm.xlstm.xlstm_block_stack import xLSTMBlockStack, xLSTMBlockStackConfig

# -----------------------------------------------------------------------------
# 1. Data Loading and Preprocessing
# -----------------------------------------------------------------------------
def load_energy_dataset(file_path, sequence_length=48, prediction_length=48):
    data = pd.read_csv(file_path)
    data['Datetime'] = pd.to_datetime(data['Timestamp'])
    # Drop unneeded columns
    data = data.drop(columns=['Timestamp', 'Weather_Condition_x', 'Weather_Condition_y'])
    data = data.sort_values(by='Datetime').reset_index(drop=True)
    numeric_columns = ['Energy_Demand', 'Energy_Supply', 'Temperature', 'Grid_Load',
                       'Renewable_Source_Output', 'NonRenewable_Source_Output', 'Energy_Price']
    data = data[['Datetime'] + numeric_columns]
    data_hourly = data.set_index('Datetime').resample('h').mean().ffill()

    # Scale the 'Energy_Demand' column
    energy_demand = data_hourly['Energy_Demand'].values.reshape(-1, 1)
    scaler = MinMaxScaler()
    data_scaled = scaler.fit_transform(energy_demand)

    # Prepare sequences:
    # Each sample: X of shape (sequence_length, 1) and y of shape (prediction_length, 1)
    X, y = [], []
    for i in range(len(data_scaled) - sequence_length - prediction_length + 1):
        X.append(data_scaled[i : i + sequence_length])
        y.append(data_scaled[i + sequence_length : i + sequence_length + prediction_length])
    X = torch.tensor(X, dtype=torch.float32)    # (num_samples, 48, 1)
    # Squeeze last dimension so that y has shape (num_samples, 48)
    y = torch.tensor(y, dtype=torch.float32).squeeze(-1)
    return DataLoader(TensorDataset(X, y), batch_size=32, shuffle=True), scaler, data_hourly

# -----------------------------------------------------------------------------
# 2. Split Data into Chunks for Incremental Training
# -----------------------------------------------------------------------------
def split_data_into_chunks(data_loader, chunk_size):
    data_chunks = []
    X, y = data_loader.dataset.tensors
    for i in range(0, len(X), chunk_size):
        chunk_X = X[i : i + chunk_size]
        chunk_y = y[i : i + chunk_size]
        if len(chunk_X) > 0:
            data_chunks.append(DataLoader(TensorDataset(chunk_X, chunk_y), batch_size=32, shuffle=True))
    return data_chunks

# -----------------------------------------------------------------------------
# 3. Define XLSTM Model
# -----------------------------------------------------------------------------
class XLSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, slstm_blocks=1, mlstm_blocks=1):
        """
        output_size is set equal to prediction_length (e.g., 48).
        """
        super(XLSTMModel, self).__init__()
        self.input_embedding = nn.Linear(input_size, hidden_size)
        self.blocks = nn.ModuleList()
        block_idx = 0
        # Add sLSTM blocks first
        while slstm_blocks > 0:
            slstm_layer_config = sLSTMLayerConfig(
                hidden_size=hidden_size,
                num_heads=4,
                num_states=4,
                backend='cuda',
                function='slstm',
                dropout=0.0
            )
            slstm_block_config = sLSTMBlockConfig(
                slstm=slstm_layer_config,
                _num_blocks=1,
                _block_idx=block_idx
            )
            self.blocks.append(
                xLSTMBlockStack(config=xLSTMBlockStackConfig(
                    slstm_block=slstm_block_config,
                    num_blocks=1,
                    context_length=48,
                    embedding_dim=hidden_size,
                    dropout=0.0,
                    add_post_blocks_norm=False
                ))
            )
            slstm_blocks -= 1
            block_idx += 1
        # Then add mLSTM blocks
        while mlstm_blocks > 0:
            mlstm_layer_config = mLSTMLayerConfig(
                num_heads=4,
                embedding_dim=hidden_size,
                dropout=0.0
            )
            mlstm_block_config = mLSTMBlockConfig(
                mlstm=mlstm_layer_config,
                _num_blocks=1,
                _block_idx=block_idx
            )
            self.blocks.append(
                xLSTMBlockStack(config=xLSTMBlockStackConfig(
                    mlstm_block=mlstm_block_config,
                    num_blocks=1,
                    context_length=48,
                    embedding_dim=hidden_size,
                    dropout=0.0,
                    add_post_blocks_norm=False
                ))
            )
            mlstm_blocks -= 1
            block_idx += 1
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # x: (batch_size, context_length, input_size)
        x = self.input_embedding(x)  # (batch_size, 48, hidden_size)
        for block in self.blocks:
            x = block(x)
        # Use output from the final time step for prediction; final shape: (batch_size, output_size)
        x = self.fc(x[:, -1, :])
        return x

# -----------------------------------------------------------------------------
# 4. Incremental Training (with Training & Validation split per chunk)
# -----------------------------------------------------------------------------
def train_one_epoch(model, train_loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for batch_X, batch_y in train_loader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(train_loader)

def evaluate(model, val_loader, criterion, device):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for batch_X, batch_y in val_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            total_loss += loss.item()
    return total_loss / len(val_loader) if len(val_loader) > 0 else 0

def split_train_val(chunk_loader, val_ratio=0.1):
    dataset = chunk_loader.dataset
    total_samples = len(dataset)
    indices = list(range(total_samples))
    split = int(val_ratio * total_samples)
    # Ensure at least one validation sample if possible
    if total_samples > 1 and split < 1:
        split = 1
    random.shuffle(indices)
    val_indices = indices[:split]
    train_indices = indices[split:]
    # If no training samples remain, use the whole dataset for both
    if len(train_indices) == 0:
        train_indices = indices
        val_indices = indices
    train_subset = Subset(dataset, train_indices)
    val_subset = Subset(dataset, val_indices)
    train_loader = DataLoader(train_subset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_subset, batch_size=32, shuffle=False)
    return train_loader, val_loader

# -----------------------------------------------------------------------------
# 5. Main Incremental Training Script for XLSTM Model
# -----------------------------------------------------------------------------
file_path = '/kaggle/input/integrated-energy-management-and-forecasting/Integrated Energy Management and Forecasting Dataset.csv'
data_loader, scaler, hourly_data = load_energy_dataset(file_path, sequence_length=48, prediction_length=48)
chunk_size = 20  # same as in other models
data_chunks = split_data_into_chunks(data_loader, chunk_size=chunk_size)

# Model configuration
input_size = 1
hidden_size = 64
output_size = 48  # prediction_length
slstm_blocks = 1
mlstm_blocks = 1

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
modelxlstm = XLSTMModel(input_size, hidden_size, output_size,
                         slstm_blocks=slstm_blocks, mlstm_blocks=mlstm_blocks).to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(modelxlstm.parameters(), lr=0.001)

# Print model summary (using dummy input)
print(summary(modelxlstm, input_size=(32, 48, 1)))

# Training configuration: epochs per chunk
chunk_epochs = 150

# Dictionaries to record per-epoch metrics (global epoch indexing)
xlstm_metrics_history = {
    "epoch": [],
    "loss": [],
    "val_loss": [],
    "accuracy": [],
    "val_accuracy": []
}
xlstm_epoch_times = []
global_epoch = 0
chunk_training_times = []

print("Starting incremental training on XLSTM model...")
for i, chunk_loader in enumerate(data_chunks):
    print(f"Training on chunk {i + 1}/{len(data_chunks)}...")
    # Split current chunk into training and validation sets (90/10 split)
    train_loader, val_loader = split_train_val(chunk_loader, val_ratio=0.1)
    chunk_start = time.time()
    # For each epoch in the current chunk update:
    for epoch in range(chunk_epochs):
        start_epoch = time.time()
        train_loss = train_one_epoch(modelxlstm, train_loader, optimizer, criterion, device)
        val_loss = evaluate(modelxlstm, val_loader, criterion, device)
        end_epoch = time.time()
        global_epoch += 1
        xlstm_metrics_history["epoch"].append(global_epoch)
        xlstm_metrics_history["loss"].append(train_loss)
        xlstm_metrics_history["val_loss"].append(val_loss)
        # For visualization, define "accuracy" as (1 - loss)
        xlstm_metrics_history["accuracy"].append(1 - train_loss)
        xlstm_metrics_history["val_accuracy"].append(1 - val_loss)
        xlstm_epoch_times.append(end_epoch - start_epoch)
        if (epoch + 1) % 25 == 0:
            print(f"  Global Epoch {global_epoch}, Chunk Epoch {epoch + 1}/{chunk_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")
    chunk_end = time.time()
    chunk_training_times.append(chunk_end - chunk_start)
    
    # Optionally, evaluate on entire chunk
    modelxlstm.eval()
    all_preds, all_targets = [], []
    with torch.no_grad():
        for batch_X, batch_y in chunk_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            preds = modelxlstm(batch_X).cpu().numpy()
            all_preds.extend(preds)
            all_targets.extend(batch_y.cpu().numpy())
    mae = mean_absolute_error(all_targets, all_preds)
    mse = mean_squared_error(all_targets, all_preds)
    r2 = r2_score(all_targets, all_preds)
    print(f"Chunk {i + 1} Evaluation - MAE: {mae:.4f}, MSE: {mse:.4f}, R²: {r2:.4f}, Chunk Time: {chunk_training_times[-1]:.2f}s")

print("Incremental training complete.\n")


os.chdir('/kaggle/working/')
import os
print(os.getcwd())

# -----------------------------------------------------------------------------
# 6. Visualization: Plot Training & Validation Loss and Accuracy per Epoch, and Training Time
# -----------------------------------------------------------------------------
epochs = xlstm_metrics_history["epoch"]
plt.figure(figsize=(10, 6))
plt.plot(epochs, xlstm_metrics_history["loss"], label="Training Loss", marker="o")
plt.plot(epochs, xlstm_metrics_history["val_loss"], label="Validation Loss", marker="s", linestyle="--")
plt.title("XLSTM Model: Training and Validation Loss per Epoch")
plt.xlabel("Global Epoch")
plt.ylabel("Loss")
plt.legend()
plt.grid(True, linestyle="--", alpha=0.6)
plt.tight_layout()
plt.savefig("xlstm_loss_per_epoch.png")
plt.show()

plt.figure(figsize=(10, 6))
plt.plot(epochs, xlstm_metrics_history["accuracy"], label="Training Accuracy", marker="o")
plt.plot(epochs, xlstm_metrics_history["val_accuracy"], label="Validation Accuracy", marker="s", linestyle="--")
plt.title("XLSTM Model: Training and Validation Accuracy per Epoch")
plt.xlabel("Global Epoch")
plt.ylabel("Accuracy (1 - Loss)")
plt.legend()
plt.grid(True, linestyle="--", alpha=0.6)
plt.tight_layout()
plt.savefig("xlstm_accuracy_per_epoch.png")
plt.show()

plt.figure(figsize=(10, 6))
plt.bar(epochs, xlstm_epoch_times, color="blue", alpha=0.7)
plt.title("XLSTM Model: Training Time per Epoch")
plt.xlabel("Global Epoch")
plt.ylabel("Time (seconds)")
plt.grid(True, linestyle="--", alpha=0.6)
plt.tight_layout()
plt.savefig("xlstm_time_per_epoch.png")
plt.show()

# -----------------------------------------------------------------------------
# 7. Forecasting Visualization: Compare Actual vs. Predicted Sequences
# -----------------------------------------------------------------------------
# Use the last available sequence from the full dataset for forecasting.
X_all, y_all = data_loader.dataset.tensors
last_sequence = X_all[-1]  # shape: (48, 1)
actual_values = y_all[-1].flatten().numpy()  # shape: (48,)

modelxlstm.eval()
with torch.no_grad():
    predicted_values = modelxlstm(last_sequence.unsqueeze(0).to(device)).cpu().numpy().flatten()  # shape: (48,)

# Inverse transform the predicted and actual values to original scale.
predicted_values_orig = scaler.inverse_transform(predicted_values.reshape(-1, 1)).flatten()
actual_values_orig = scaler.inverse_transform(actual_values.reshape(-1, 1)).flatten()

plt.figure(figsize=(12, 8))
plt.plot(range(len(actual_values_orig)), actual_values_orig, label="Actual Energy Demand", color="black", marker="o")
plt.plot(range(len(predicted_values_orig)), predicted_values_orig, label="XLSTM Forecast", color="blue", linestyle="--", marker="s")
plt.fill_between(range(len(actual_values_orig)), actual_values_orig, predicted_values_orig, color="gray", alpha=0.3, label="Difference")
plt.title("XLSTM Model: Actual vs. Predicted Energy Demand", fontsize=16, fontweight="bold")
plt.xlabel("Time Steps", fontsize=14)
plt.ylabel("Energy Demand", fontsize=14)
plt.legend(fontsize=12, loc="best", title="Legend", title_fontsize=13)
plt.grid(True, linestyle="--", alpha=0.6)
plt.tight_layout()
plt.savefig("xlstm_forecasting_actual_vs_predicted.png")
plt.show()

# -----------------------------------------------------------------------------
# 8. Save the Trained XLSTM Model and Performance Metrics for Later Comparison
# -----------------------------------------------------------------------------
model_save_path = "xlstm_model_incremental.pt"
torch.save(modelxlstm.state_dict(), model_save_path)
print(f"XLSTM model saved to {model_save_path}")

# Save performance metrics to a file for later comparison
np.savez("xlstm_metrics.npz",
         epochs=np.array(xlstm_metrics_history["epoch"]),
         loss=np.array(xlstm_metrics_history["loss"]),
         val_loss=np.array(xlstm_metrics_history["val_loss"]),
         accuracy=np.array(xlstm_metrics_history["accuracy"]),
         val_accuracy=np.array(xlstm_metrics_history["val_accuracy"]),
         epoch_times=np.array(xlstm_epoch_times),
         forecast_pred=np.array(predicted_values),
         actual=np.array(actual_values))
print("XLSTM model metrics saved to xlstm_metrics.npz")


# Comparison of all models

In [None]:
#os.chdir('/kaggle/working/')


In [None]:
#os.chdir('/kaggle/working/')
#import os
print(os.getcwd())


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# ---------------------------
# Load Metrics for Each Model
# ---------------------------
hybrid_data = np.load("hybrid_metrics.npz")
cnn_data    = np.load("cnn_metrics.npz")
lstm_data   = np.load("lstm_metrics.npz")
xlstm_data  = np.load("xlstm_metrics.npz")

# Extract variables for the Hybrid model
hybrid_epochs      = hybrid_data["epochs"]
hybrid_loss        = hybrid_data["loss"]
hybrid_val_loss    = hybrid_data["val_loss"]
hybrid_accuracy    = hybrid_data["accuracy"]
hybrid_val_accuracy= hybrid_data["val_accuracy"]
hybrid_epoch_times = hybrid_data["epoch_times"]
hybrid_forecast    = hybrid_data["forecast_pred"]
hybrid_actual      = hybrid_data["actual"]

# Extract variables for the CNN model
cnn_epochs         = cnn_data["epochs"]
cnn_loss           = cnn_data["loss"]
cnn_val_loss       = cnn_data["val_loss"]
cnn_accuracy       = cnn_data["accuracy"]
cnn_val_accuracy   = cnn_data["val_accuracy"]
cnn_epoch_times    = cnn_data["epoch_times"]
cnn_forecast       = cnn_data["forecast_pred"]
cnn_actual         = cnn_data["actual"]

# Extract variables for the LSTM model
lstm_epochs        = lstm_data["epochs"]
lstm_loss          = lstm_data["loss"]
lstm_val_loss      = lstm_data["val_loss"]
lstm_accuracy      = lstm_data["accuracy"]
lstm_val_accuracy  = lstm_data["val_accuracy"]
lstm_epoch_times   = lstm_data["epoch_times"]
lstm_forecast      = lstm_data["forecast_pred"]
lstm_actual        = lstm_data["actual"]

# Extract variables for the XLSTM model
xlstm_epochs       = xlstm_data["epochs"]
xlstm_loss         = xlstm_data["loss"]
xlstm_val_loss     = xlstm_data["val_loss"]
xlstm_accuracy     = xlstm_data["accuracy"]
xlstm_val_accuracy = xlstm_data["val_accuracy"]
xlstm_epoch_times  = xlstm_data["epoch_times"]
xlstm_forecast     = xlstm_data["forecast_pred"]
xlstm_actual       = xlstm_data["actual"]

# ---------------------------
# Create Subplots for Comparison
# ---------------------------
fig, axs = plt.subplots(2, 2, figsize=(16, 14))

# --- Subplot 1: Training and Validation Loss vs. Epoch ---
axs[0, 0].plot(hybrid_epochs, hybrid_loss, label='Hybrid Train Loss', color='blue')
axs[0, 0].plot(hybrid_epochs, hybrid_val_loss, label='Hybrid Val Loss', color='blue', linestyle='--')

axs[0, 0].plot(cnn_epochs, cnn_loss, label='CNN Train Loss', color='green')
axs[0, 0].plot(cnn_epochs, cnn_val_loss, label='CNN Val Loss', color='green', linestyle='--')

axs[0, 0].plot(lstm_epochs, lstm_loss, label='LSTM Train Loss', color='red')
axs[0, 0].plot(lstm_epochs, lstm_val_loss, label='LSTM Val Loss', color='red', linestyle='--')

axs[0, 0].plot(xlstm_epochs, xlstm_loss, label='XLSTM Train Loss', color='purple')
axs[0, 0].plot(xlstm_epochs, xlstm_val_loss, label='XLSTM Val Loss', color='purple', linestyle='--')

axs[0, 0].set_title('Training & Validation Loss')
axs[0, 0].set_xlabel('Global Epoch')
axs[0, 0].set_ylabel('Loss')
axs[0, 0].legend()
axs[0, 0].grid(True, linestyle='--', alpha=0.6)

# --- Subplot 2: Training and Validation Accuracy vs. Epoch ---
axs[0, 1].plot(hybrid_epochs, hybrid_accuracy, label='Hybrid Train Accuracy', color='blue')
axs[0, 1].plot(hybrid_epochs, hybrid_val_accuracy, label='Hybrid Val Accuracy', color='blue', linestyle='--')

axs[0, 1].plot(cnn_epochs, cnn_accuracy, label='CNN Train Accuracy', color='green')
axs[0, 1].plot(cnn_epochs, cnn_val_accuracy, label='CNN Val Accuracy', color='green', linestyle='--')

axs[0, 1].plot(lstm_epochs, lstm_accuracy, label='LSTM Train Accuracy', color='red')
axs[0, 1].plot(lstm_epochs, lstm_val_accuracy, label='LSTM Val Accuracy', color='red', linestyle='--')

axs[0, 1].plot(xlstm_epochs, xlstm_accuracy, label='XLSTM Train Accuracy', color='purple')
axs[0, 1].plot(xlstm_epochs, xlstm_val_accuracy, label='XLSTM Val Accuracy', color='purple', linestyle='--')

axs[0, 1].set_title('Training & Validation Accuracy')
axs[0, 1].set_xlabel('Global Epoch')
axs[0, 1].set_ylabel('Accuracy (1 - Loss)')
axs[0, 1].legend()
axs[0, 1].grid(True, linestyle='--', alpha=0.6)

# --- Subplot 3: Training Time per Epoch ---
axs[1, 0].plot(hybrid_epochs, hybrid_epoch_times, label='Hybrid', color='blue')
axs[1, 0].plot(cnn_epochs, cnn_epoch_times, label='CNN', color='green')
axs[1, 0].plot(lstm_epochs, lstm_epoch_times, label='LSTM', color='red')
axs[1, 0].plot(xlstm_epochs, xlstm_epoch_times, label='XLSTM', color='purple')

axs[1, 0].set_title('Training Time per Epoch')
axs[1, 0].set_xlabel('Global Epoch')
axs[1, 0].set_ylabel('Time (seconds)')
axs[1, 0].legend()
axs[1, 0].grid(True, linestyle='--', alpha=0.6)

# --- Subplot 4: Forecasting Comparison (48-step) ---
# Here, we assume that the actual forecast target is the same across models.
# (If different, you might average or pick one, since ideally they should match.)
time_steps = np.arange(48)
axs[1, 1].plot(time_steps, hybrid_actual, label='Actual', color='black', marker='o')
axs[1, 1].plot(time_steps, hybrid_forecast, label='Hybrid Forecast', color='blue', linestyle='--', marker='s')
axs[1, 1].plot(time_steps, cnn_forecast, label='CNN Forecast', color='green', linestyle='--', marker='s')
axs[1, 1].plot(time_steps, lstm_forecast, label='LSTM Forecast', color='red', linestyle='--', marker='s')
axs[1, 1].plot(time_steps, xlstm_forecast, label='XLSTM Forecast', color='purple', linestyle='--', marker='s')

axs[1, 1].set_title('48-step Forecasting Comparison')
axs[1, 1].set_xlabel('Time Step')
axs[1, 1].set_ylabel('Energy Demand')
axs[1, 1].legend()
axs[1, 1].grid(True, linestyle='--', alpha=0.6)

plt.tight_layout()
plt.savefig("model_comparison.png")
plt.show()


The above results show that after some time, incremental learning keeps learning and batch-based learning stops learning, and after a while it keeps the lines straight, which means the model doesn't have more capacity to learn new data points, but the streaming model also keeps the old training and learning new data points, which is a good gesture to the natural learning process.

# Comparison of 5 Models, including LSTM Non-Incremental

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# -----------------------------
# Load Metrics for Each Model
# -----------------------------
hybrid_data    = np.load("hybrid_metrics.npz")
cnn_data       = np.load("cnn_metrics.npz")
lstm_data      = np.load("lstm_metrics.npz")
lstm_nonil_data= np.load("lstm_nonil_metrics.npz")
xlstm_data     = np.load("xlstm_metrics.npz")

# Extract arrays for the Hybrid TCN-GRU-LSTM model
hybrid_epochs       = hybrid_data["epochs"]
hybrid_loss         = hybrid_data["loss"]
hybrid_val_loss     = hybrid_data["val_loss"]
hybrid_accuracy     = hybrid_data["accuracy"]
hybrid_val_accuracy = hybrid_data["val_accuracy"]
hybrid_epoch_times  = hybrid_data["epoch_times"]
hybrid_forecast     = hybrid_data["forecast_pred"]
hybrid_actual       = hybrid_data["actual"]

# Extract arrays for the CNN model
cnn_epochs          = cnn_data["epochs"]
cnn_loss            = cnn_data["loss"]
cnn_val_loss        = cnn_data["val_loss"]
cnn_accuracy        = cnn_data["accuracy"]
cnn_val_accuracy    = cnn_data["val_accuracy"]
cnn_epoch_times     = cnn_data["epoch_times"]
cnn_forecast        = cnn_data["forecast_pred"]
cnn_actual          = cnn_data["actual"]

# Extract arrays for the Incremental LSTM model
lstm_epochs         = lstm_data["epochs"]
lstm_loss           = lstm_data["loss"]
lstm_val_loss       = lstm_data["val_loss"]
lstm_accuracy       = lstm_data["accuracy"]
lstm_val_accuracy   = lstm_data["val_accuracy"]
lstm_epoch_times    = lstm_data["epoch_times"]
lstm_forecast       = lstm_data["forecast_pred"]
lstm_actual         = lstm_data["actual"]

# Extract arrays for the Non-Incremental LSTM model
lstm_nonil_epochs         = lstm_nonil_data["epochs"]
lstm_nonil_loss           = lstm_nonil_data["loss"]
lstm_nonil_val_loss       = lstm_nonil_data["val_loss"]
lstm_nonil_accuracy       = lstm_nonil_data["accuracy"]
lstm_nonil_val_accuracy   = lstm_nonil_data["val_accuracy"]
lstm_nonil_epoch_times    = lstm_nonil_data["epoch_times"]
lstm_nonil_forecast       = lstm_nonil_data["forecast_pred"]
lstm_nonil_actual         = lstm_nonil_data["actual"]

# Extract arrays for the XLSTM model
xlstm_epochs       = xlstm_data["epochs"]
xlstm_loss         = xlstm_data["loss"]
xlstm_val_loss     = xlstm_data["val_loss"]
xlstm_accuracy     = xlstm_data["accuracy"]
xlstm_val_accuracy = xlstm_data["val_accuracy"]
xlstm_epoch_times  = xlstm_data["epoch_times"]
xlstm_forecast     = xlstm_data["forecast_pred"]
xlstm_actual       = xlstm_data["actual"]

# For forecasting, we assume that the actual target is the same across models.
# (If not, you can choose one of them. Here, we choose the Hybrid model's actual.)
actual_forecast = hybrid_actual  # shape: (48,)

# -----------------------------
# Create Subplots for Comparison (3 rows x 2 columns)
# -----------------------------
fig, axs = plt.subplots(3, 2, figsize=(20, 18))

# Subplot 1: Training Loss vs. Epoch
axs[0, 0].plot(hybrid_epochs, hybrid_loss, label='Hybrid Train Loss', color='blue')
axs[0, 0].plot(cnn_epochs, cnn_loss, label='CNN Train Loss', color='green')
axs[0, 0].plot(lstm_epochs, lstm_loss, label='LSTM Incremental Train Loss', color='red')
axs[0, 0].plot(lstm_nonil_epochs, lstm_nonil_loss, label='LSTM Non-Incremental Train Loss', color='orange')
axs[0, 0].plot(xlstm_epochs, xlstm_loss, label='XLSTM Train Loss', color='purple')
axs[0, 0].set_title('Training Loss vs. Epoch')
axs[0, 0].set_xlabel('Global Epoch')
axs[0, 0].set_ylabel('Loss')
axs[0, 0].legend(fontsize=9)
axs[0, 0].grid(True, linestyle='--', alpha=0.6)

# Subplot 2: Validation Loss vs. Epoch
axs[0, 1].plot(hybrid_epochs, hybrid_val_loss, label='Hybrid Val Loss', color='blue', linestyle='--')
axs[0, 1].plot(cnn_epochs, cnn_val_loss, label='CNN Val Loss', color='green', linestyle='--')
axs[0, 1].plot(lstm_epochs, lstm_val_loss, label='LSTM Incremental Val Loss', color='red', linestyle='--')
axs[0, 1].plot(lstm_nonil_epochs, lstm_nonil_val_loss, label='LSTM Non-Incremental Val Loss', color='orange', linestyle='--')
axs[0, 1].plot(xlstm_epochs, xlstm_val_loss, label='XLSTM Val Loss', color='purple', linestyle='--')
axs[0, 1].set_title('Validation Loss vs. Epoch')
axs[0, 1].set_xlabel('Global Epoch')
axs[0, 1].set_ylabel('Loss')
axs[0, 1].legend(fontsize=9)
axs[0, 1].grid(True, linestyle='--', alpha=0.6)

# Subplot 3: Training Accuracy vs. Epoch
axs[1, 0].plot(hybrid_epochs, hybrid_accuracy, label='Hybrid Train Acc', color='blue')
axs[1, 0].plot(cnn_epochs, cnn_accuracy, label='CNN Train Acc', color='green')
axs[1, 0].plot(lstm_epochs, lstm_accuracy, label='LSTM Incremental Train Acc', color='red')
axs[1, 0].plot(lstm_nonil_epochs, lstm_nonil_accuracy, label='LSTM Non-Incremental Train Acc', color='orange')
axs[1, 0].plot(xlstm_epochs, xlstm_accuracy, label='XLSTM Train Acc', color='purple')
axs[1, 0].set_title('Training Accuracy (1 - Loss) vs. Epoch')
axs[1, 0].set_xlabel('Global Epoch')
axs[1, 0].set_ylabel('Accuracy')
axs[1, 0].legend(fontsize=9)
axs[1, 0].grid(True, linestyle='--', alpha=0.6)

# Subplot 4: Validation Accuracy vs. Epoch
axs[1, 1].plot(hybrid_epochs, hybrid_val_accuracy, label='Hybrid Val Acc', color='blue', linestyle='--')
axs[1, 1].plot(cnn_epochs, cnn_val_accuracy, label='CNN Val Acc', color='green', linestyle='--')
axs[1, 1].plot(lstm_epochs, lstm_val_accuracy, label='LSTM Incremental Val Acc', color='red', linestyle='--')
axs[1, 1].plot(lstm_nonil_epochs, lstm_nonil_val_accuracy, label='LSTM Non-Incremental Val Acc', color='orange', linestyle='--')
axs[1, 1].plot(xlstm_epochs, xlstm_val_accuracy, label='XLSTM Val Acc', color='purple', linestyle='--')
axs[1, 1].set_title('Validation Accuracy (1 - Loss) vs. Epoch')
axs[1, 1].set_xlabel('Global Epoch')
axs[1, 1].set_ylabel('Accuracy')
axs[1, 1].legend(fontsize=9)
axs[1, 1].grid(True, linestyle='--', alpha=0.6)

# Subplot 5: Training Time per Epoch
axs[2, 0].plot(hybrid_epochs, hybrid_epoch_times, label='Hybrid', color='blue')
axs[2, 0].plot(cnn_epochs, cnn_epoch_times, label='CNN', color='green')
axs[2, 0].plot(lstm_epochs, lstm_epoch_times, label='LSTM Incremental', color='red')
axs[2, 0].plot(lstm_nonil_epochs, lstm_nonil_epoch_times, label='LSTM Non-Incremental', color='orange')
axs[2, 0].plot(xlstm_epochs, xlstm_epoch_times, label='XLSTM', color='purple')
axs[2, 0].set_title('Training Time per Epoch')
axs[2, 0].set_xlabel('Global Epoch')
axs[2, 0].set_ylabel('Time (seconds)')
axs[2, 0].legend(fontsize=9)
axs[2, 0].grid(True, linestyle='--', alpha=0.6)

# Subplot 6: Forecasting Comparison (48-step)
time_steps = np.arange(48)
axs[2, 1].plot(time_steps, actual_forecast, label='Actual', color='black', marker='o')
axs[2, 1].plot(time_steps, hybrid_forecast, label='Hybrid Forecast', color='blue', linestyle='--', marker='s')
axs[2, 1].plot(time_steps, cnn_forecast, label='CNN Forecast', color='green', linestyle='--', marker='s')
axs[2, 1].plot(time_steps, lstm_forecast, label='LSTM Incremental Forecast', color='red', linestyle='--', marker='s')
axs[2, 1].plot(time_steps, lstm_nonil_forecast, label='LSTM Non-Incremental Forecast', color='orange', linestyle='--', marker='s')
axs[2, 1].plot(time_steps, xlstm_forecast, label='XLSTM Forecast', color='purple', linestyle='--', marker='s')
axs[2, 1].set_title('48-step Forecasting Comparison')
axs[2, 1].set_xlabel('Time Step')
axs[2, 1].set_ylabel('Energy Demand')
axs[2, 1].legend(fontsize=9)
axs[2, 1].grid(True, linestyle='--', alpha=0.6)

plt.tight_layout()
plt.savefig("model_comparison_all.png")
plt.savefig("model_comparison_all.eps")
plt.savefig("model_comparison_all.pdf")
plt.show()


In [None]:
import os
import zipfile
import numpy as np
import matplotlib.pyplot as plt

# Create the output directory if it doesn't exist
output_dir = "Comparsion_plotting"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# -----------------------------
# Load Metrics for Each Model
# -----------------------------
hybrid_data     = np.load("hybrid_metrics.npz")
cnn_data        = np.load("cnn_metrics.npz")
lstm_data       = np.load("lstm_metrics.npz")
lstm_nonil_data = np.load("lstm_nonil_metrics.npz")
xlstm_data      = np.load("xlstm_metrics.npz")

# Extract arrays for all models
models = {
    "Hybrid": hybrid_data,
    "CNN": cnn_data,
    "LSTM_Incremental": lstm_data,
    "LSTM_Non_Incremental": lstm_nonil_data,
    "XLSTM": xlstm_data
}

metrics = ["epochs", "loss", "val_loss", "accuracy", "val_accuracy", "epoch_times", "forecast_pred", "actual"]

# Extract all model data
model_data = {
    name: {metric: data[metric] for metric in metrics}
    for name, data in models.items()
}

# Define plots to save individually
plots = [
    ("training_loss_vs_epoch", "Training Loss vs. Epoch", "loss", "Loss"),
    ("validation_loss_vs_epoch", "Validation Loss vs. Epoch", "val_loss", "Loss"),
    ("training_accuracy_vs_epoch", "Training Accuracy vs. Epoch", "accuracy", "Accuracy"),
    ("validation_accuracy_vs_epoch", "Validation Accuracy vs. Epoch", "val_accuracy", "Accuracy"),
    ("training_time_per_epoch", "Training Time per Epoch", "epoch_times", "Time (seconds)"),
    ("forecasting_comparison_all_models", "48-step Forecasting Comparison", "forecast_pred", "Energy Demand")
]

# Define fixed color scheme
color_scheme = {
    "Hybrid": "blue",
    "CNN": "green",
    "LSTM_Incremental": "red",
    "LSTM_Non_Incremental": "orange",
    "XLSTM": "purple",
    "Actual": "black"
}

# Loop through each plot type
for filename, title, metric, ylabel in plots:
    
    # Special handling for the forecast comparison plot
    if metric == "forecast_pred":
        # Create a figure and main axis
        fig, ax = plt.subplots(figsize=(10, 6))
        
        # x-values for forecast (48 steps)
        x_range = np.arange(48)

        # 1) Plot Actual first (zorder=1) so it appears behind other curves
        ax.plot(x_range,
                model_data["Hybrid"]["actual"],
                label="Actual",
                color=color_scheme["Actual"],
                marker="o",
                markersize=5,
                zorder=1)
        
        # 2) Plot other models, with Hybrid and XLSTM on top and XLSTM thinner
        model_order = ["Hybrid", "XLSTM", "CNN", "LSTM_Incremental", "LSTM_Non_Incremental"]
        for model_name in model_order:
            z = 2
            lw = 2.0  # default thickness
            if model_name == "Hybrid":
                z = 3
            if model_name == "XLSTM":
                z = 4
                lw = 1.0  # make XLSTM thinner

            ax.plot(x_range,
                    model_data[model_name]["forecast_pred"],
                    label=f"{model_name} Forecast",
                    linestyle="--",
                    marker="s",
                    markersize=5,
                    color=color_scheme[model_name],
                    linewidth=lw,
                    zorder=z)

        # Basic labeling
        ax.set_title(title)
        ax.set_xlabel("Time Step")
        ax.set_ylabel(ylabel)
        ax.legend(fontsize=9)
        ax.grid(True, linestyle='--', alpha=0.6)

        # Save the figure in PNG, PDF, and EPS formats
        plt.savefig(os.path.join(output_dir, f"{filename}.png"), format="png", dpi=300)
        plt.savefig(os.path.join(output_dir, f"{filename}.pdf"), format="pdf", dpi=300)
        plt.savefig(os.path.join(output_dir, f"{filename}.eps"), format="eps", dpi=300)
        
        plt.close()
    
    else:
        # For the non-forecast plots, use your original approach
        plt.figure(figsize=(10, 6))

        for model_name, data in model_data.items():
            plt.plot(data["epochs"],
                     data[metric],
                     label=model_name,
                     color=color_scheme[model_name])
        
        plt.title(title)
        plt.xlabel("Epoch")
        plt.ylabel(ylabel)
        plt.legend(fontsize=9)
        plt.grid(True, linestyle='--', alpha=0.6)

        # Save the figure
        plt.savefig(os.path.join(output_dir, f"{filename}.png"), format="png", dpi=300)
        plt.savefig(os.path.join(output_dir, f"{filename}.pdf"), format="pdf", dpi=300)
        plt.savefig(os.path.join(output_dir, f"{filename}.eps"), format="eps", dpi=300)

        plt.close()

print("All subplots saved individually in PNG, PDF, and EPS formats in the 'Comparsion_plotting' directory.")

# Create a zip file of the output directory
zip_filename = "Comparsion_plotting.zip"
with zipfile.ZipFile(zip_filename, "w", zipfile.ZIP_DEFLATED) as zipf:
    for foldername, subfolders, filenames in os.walk(output_dir):
        for file in filenames:
            file_path = os.path.join(foldername, file)
            arcname = os.path.relpath(file_path, output_dir)
            zipf.write(file_path, arcname=arcname)

print(f"Zip file '{zip_filename}' created containing all plots.")

# If you're running this in a Jupyter Notebook, display a download link
try:
    from IPython.display import FileLink, display
    display(FileLink(zip_filename))
except ImportError:
    print("Run to display a download link.")


In [None]:
!pip install dataframe_image

In [None]:
!pip install nest_asyncio

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# -----------------------------
# Load Metrics for Each Model (using absolute paths)
# -----------------------------
hybrid_data     = np.load("/kaggle/working/hybrid_metrics.npz")
cnn_data        = np.load("/kaggle/working/cnn_metrics.npz")
lstm_data       = np.load("/kaggle/working/lstm_metrics.npz")
lstm_nonil_data = np.load("/kaggle/working/lstm_nonil_metrics.npz")
xlstm_data      = np.load("/kaggle/working/xlstm_metrics.npz")

# Extract variables for the Hybrid TCN-GRU-LSTM model
hybrid_epochs       = hybrid_data["epochs"]
hybrid_loss         = hybrid_data["loss"]
hybrid_val_loss     = hybrid_data["val_loss"]
hybrid_accuracy     = hybrid_data["accuracy"]
hybrid_val_accuracy = hybrid_data["val_accuracy"]
hybrid_epoch_times  = hybrid_data["epoch_times"]
hybrid_forecast     = hybrid_data["forecast_pred"]
hybrid_actual       = hybrid_data["actual"]

# Extract variables for the CNN model
cnn_epochs          = cnn_data["epochs"]
cnn_loss            = cnn_data["loss"]
cnn_val_loss        = cnn_data["val_loss"]
cnn_accuracy        = cnn_data["accuracy"]
cnn_val_accuracy    = cnn_data["val_accuracy"]
cnn_epoch_times     = cnn_data["epoch_times"]
cnn_forecast        = cnn_data["forecast_pred"]
cnn_actual          = cnn_data["actual"]

# Extract variables for the Incremental LSTM model
lstm_epochs         = lstm_data["epochs"]
lstm_loss           = lstm_data["loss"]
lstm_val_loss       = lstm_data["val_loss"]
lstm_accuracy       = lstm_data["accuracy"]
lstm_val_accuracy   = lstm_data["val_accuracy"]
lstm_epoch_times    = lstm_data["epoch_times"]
lstm_forecast       = lstm_data["forecast_pred"]
lstm_actual         = lstm_data["actual"]

# Extract variables for the Non-Incremental LSTM model
lstm_nonil_epochs         = lstm_nonil_data["epochs"]
lstm_nonil_loss           = lstm_nonil_data["loss"]
lstm_nonil_val_loss       = lstm_nonil_data["val_loss"]
lstm_nonil_accuracy       = lstm_nonil_data["accuracy"]
lstm_nonil_val_accuracy   = lstm_nonil_data["val_accuracy"]
lstm_nonil_epoch_times    = lstm_nonil_data["epoch_times"]
lstm_nonil_forecast       = lstm_nonil_data["forecast_pred"]
lstm_nonil_actual         = lstm_nonil_data["actual"]

# Extract variables for the XLSTM model
xlstm_epochs       = xlstm_data["epochs"]
xlstm_loss         = xlstm_data["loss"]
xlstm_val_loss     = xlstm_data["val_loss"]
xlstm_accuracy     = xlstm_data["accuracy"]
xlstm_val_accuracy = xlstm_data["val_accuracy"]
xlstm_epoch_times  = xlstm_data["epoch_times"]
xlstm_forecast     = xlstm_data["forecast_pred"]
xlstm_actual       = xlstm_data["actual"]

# For forecasting, we assume that the actual target is identical across models.
# (If not, adjust accordingly; here we use Hybrid's actual.)
actual_forecast = hybrid_actual  # shape: (48,)

# -----------------------------
# Compute Mean Metrics for Each Model
# -----------------------------
def forecast_mae(forecast, actual):
    return np.mean(np.abs(forecast - actual))

hybrid_mean_train_loss = np.mean(hybrid_loss)
hybrid_mean_val_loss   = np.mean(hybrid_val_loss)
hybrid_mean_train_acc  = np.mean(hybrid_accuracy)
hybrid_mean_val_acc    = np.mean(hybrid_val_accuracy)
hybrid_mean_time       = np.mean(hybrid_epoch_times)
hybrid_forecast_mae    = forecast_mae(hybrid_forecast, hybrid_actual)

cnn_mean_train_loss = np.mean(cnn_loss)
cnn_mean_val_loss   = np.mean(cnn_val_loss)
cnn_mean_train_acc  = np.mean(cnn_accuracy)
cnn_mean_val_acc    = np.mean(cnn_val_accuracy)
cnn_mean_time       = np.mean(cnn_epoch_times)
cnn_forecast_mae    = forecast_mae(cnn_forecast, cnn_actual)

lstm_mean_train_loss = np.mean(lstm_loss)
lstm_mean_val_loss   = np.mean(lstm_val_loss)
lstm_mean_train_acc  = np.mean(lstm_accuracy)
lstm_mean_val_acc    = np.mean(lstm_val_accuracy)
lstm_mean_time       = np.mean(lstm_epoch_times)
lstm_forecast_mae    = forecast_mae(lstm_forecast, lstm_actual)

lstm_nonil_mean_train_loss = np.mean(lstm_nonil_loss)
lstm_nonil_mean_val_loss   = np.mean(lstm_nonil_val_loss)
lstm_nonil_mean_train_acc  = np.mean(lstm_nonil_accuracy)
lstm_nonil_mean_val_acc    = np.mean(lstm_nonil_val_accuracy)
lstm_nonil_mean_time       = np.mean(lstm_nonil_epoch_times)
lstm_nonil_forecast_mae    = forecast_mae(lstm_nonil_forecast, lstm_nonil_actual)

xlstm_mean_train_loss = np.mean(xlstm_loss)
xlstm_mean_val_loss   = np.mean(xlstm_val_loss)
xlstm_mean_train_acc  = np.mean(xlstm_accuracy)
xlstm_mean_val_acc    = np.mean(xlstm_val_accuracy)
xlstm_mean_time       = np.mean(xlstm_epoch_times)
xlstm_forecast_mae    = forecast_mae(xlstm_forecast, xlstm_actual)

# -----------------------------
# Create a Comparison DataFrame
# -----------------------------
df = pd.DataFrame({
    "Model": ["proposed iTGB-Net", "CNN", "LSTM Incremental", "LSTM Non-Incremental", "XLSTM"],
    "Train Loss": [hybrid_mean_train_loss, cnn_mean_train_loss, lstm_mean_train_loss, lstm_nonil_mean_train_loss, xlstm_mean_train_loss],
    "Val Loss": [hybrid_mean_val_loss, cnn_mean_val_loss, lstm_mean_val_loss, lstm_nonil_mean_val_loss, xlstm_mean_val_loss],
    "Train Acc": [hybrid_mean_train_acc, cnn_mean_train_acc, lstm_mean_train_acc, lstm_nonil_mean_train_acc, xlstm_mean_train_acc],
    "Val Acc": [hybrid_mean_val_acc, cnn_mean_val_acc, lstm_mean_val_acc, lstm_nonil_mean_val_acc, xlstm_mean_val_acc],
    "Epoch Time (s)": [hybrid_mean_time, cnn_mean_time, lstm_mean_time, lstm_nonil_mean_time, xlstm_mean_time],
    "Forecast MAE": [hybrid_forecast_mae, cnn_forecast_mae, lstm_forecast_mae, lstm_nonil_forecast_mae, xlstm_forecast_mae]
})

# -----------------------------
# Highlight Best Values Using LaTeX Bold Formatting
# -----------------------------
# For columns where lower is better: Train Loss, Val Loss, Epoch Time (s), Forecast MAE
# For columns where higher is better: Train Acc, Val Acc
def format_cell(val, best, higher_is_better, decimals=6):
    fmt_val = f"{val:.{decimals}f}"
    if higher_is_better:
        if np.isclose(val, best, atol=1e-6):
            return r"$\mathbf{" + fmt_val + "}$"
    else:
        if np.isclose(val, best, atol=1e-6):
            return r"$\mathbf{" + fmt_val + "}$"
    return fmt_val

df_formatted = df.copy()
for col in df.columns:
    if col == "Model":
        continue
    if col in ["Train Acc", "Val Acc"]:
        best = df[col].max()
        df_formatted[col] = df[col].apply(lambda x: format_cell(x, best, higher_is_better=True))
    else:
        best = df[col].min()
        df_formatted[col] = df[col].apply(lambda x: format_cell(x, best, higher_is_better=False))

# Create a 2D list for the table including headers
table_data = [df_formatted.columns.tolist()] + df_formatted.values.tolist()

# -----------------------------
# Create a Matplotlib Table and Save as PNG and EPS
# -----------------------------
fig, ax = plt.subplots(figsize=(12, 3))
ax.axis('tight')
ax.axis('off')
the_table = ax.table(cellText=table_data, colLabels=None, loc='center', cellLoc='center')
the_table.auto_set_font_size(False)
the_table.set_fontsize(10)
fig.tight_layout()
plt.savefig("/kaggle/working/model_comparison_table.png", dpi=300)
plt.savefig("/kaggle/working/model_comparison_table.eps", format='eps')
plt.show()

# Also print the plain DataFrame (without LaTeX formatting) for reference
print(df)
