## Data loading

In [1]:
# Load data and packages
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

directory_path = os.getcwd() + "\\Data\\sorting_event_volumes_2023.csv"

df = pd.read_csv(directory_path)

## Data cleaning

In [2]:
# Data cleaning
print("Number of rows original dataset is: " + str(df.shape[0]))

df = df.loc[df["event_type"] == "LAJ", :]
df.drop(['event_location', 'input_belt', 'position'], axis=1, inplace = True)
df.dropna(inplace = True)
df['output_belt'] = df['output_belt'].astype(int)
df = df.groupby(['sorting_center_name', 'scanning_date', 'output_belt'], as_index = False)['no_of_events'].sum()
df['scanning_date'] = pd.to_datetime(df['scanning_date'])

print("Number of rows cleaned dataset is: " + str(df.shape[0]))

Number of rows original dataset is: 8949721
Number of rows cleaned dataset is: 188628


## Data preparation

In [3]:
# Data preparation
df['day'] = df['scanning_date'].dt.day
df['month'] = df['scanning_date'].dt.month
df['weekday'] = df['scanning_date'].dt.day_of_week + 1
df['week'] = df['scanning_date'].dt.day_of_year // 7 + 1
df['yearday'] = df['scanning_date'].dt.day_of_year
df['yearday_sin'] = np.sin(df['yearday'] / 7 * 2 * np.pi)
df['yearday_cos'] = np.cos(df['yearday'] / 7 * 2 * np.pi)

sorting_center_names = df["sorting_center_name"].unique()
df["sorting_center_name"].value_counts()

VANTAA       44006
TAMPERE      41481
LIETO        35434
OULU         31037
KUOPIO       27888
SEINÄJOKI     8782
Name: sorting_center_name, dtype: int64

In [4]:
#aggregating data
adf = df.groupby(['scanning_date','sorting_center_name'])['no_of_events'].sum().reset_index()
adf['day'] = adf['scanning_date'].dt.day
adf['month'] = adf['scanning_date'].dt.month
adf['weekday'] = adf['scanning_date'].dt.day_of_week + 1
adf['week'] = adf['scanning_date'].dt.day_of_year // 7 + 1
adf['yearday'] = adf['scanning_date'].dt.day_of_year
adf['yearday_sin'] = np.sin(adf['yearday'] / 7 * 2 * np.pi)
adf['yearday_cos'] = np.cos(adf['yearday'] / 7 * 2 * np.pi)
adf

Unnamed: 0,scanning_date,sorting_center_name,no_of_events,day,month,weekday,week,yearday,yearday_sin,yearday_cos
0,2023-01-01,LIETO,3650,1,1,7,1,1,7.818315e-01,0.623490
1,2023-01-01,OULU,1441,1,1,7,1,1,7.818315e-01,0.623490
2,2023-01-01,TAMPERE,1458,1,1,7,1,1,7.818315e-01,0.623490
3,2023-01-02,KUOPIO,23812,2,1,1,1,2,9.749279e-01,-0.222521
4,2023-01-02,LIETO,44598,2,1,1,1,2,9.749279e-01,-0.222521
...,...,...,...,...,...,...,...,...,...,...
1876,2023-12-29,OULU,19831,29,12,5,52,363,-7.818315e-01,0.623490
1877,2023-12-29,SEINÄJOKI,15100,29,12,5,52,363,-7.818315e-01,0.623490
1878,2023-12-29,TAMPERE,34499,29,12,5,52,363,-7.818315e-01,0.623490
1879,2023-12-29,VANTAA,129349,29,12,5,52,363,-7.818315e-01,0.623490


In [5]:
import torch
print(torch.cuda.device_count())  # 查看有多少个 CUDA 设备
for i in range(torch.cuda.device_count()):
    print(f"Device {i}: {torch.cuda.get_device_name(i)}")



1
Device 0: NVIDIA GeForce RTX 4070


In [6]:
# Plot should be difference between predicted and reality
# I'm not sure if scaling on the number of events is good, because you are trying to predict that
# Do LSTMs work per row or not?
# Split the data by date, don't use random split

In [7]:
#In this project, we used Optuna to automatically find the best set of hyperparameters that optimize the performance of our LSTM model. 
#Hyperparameters such as the hidden size, number of LSTM layers, learning rate, and sequence length have a significant impact on how well the model performs.
#Instead of manually tuning these parameters, which is time-consuming and often inefficient, Optuna allows us to automate this process and find the optimal combination.
#Optuna works by conducting multiple trials where it suggests different hyperparameter values and evaluates the model's performance based on a target metric—in our case, the root mean square error (RMSE). 
#The process begins with defining an objective function, which includes training the model using the hyperparameters suggested by Optuna. 
#After training, the function computes the RMSE on the validation set, which is then passed back to Optuna. Optuna uses this feedback to refine its suggestions, gradually improving its search for the best parameters.
#It utilizes a technique called Tree-structured Parzen Estimator (TPE), which is more efficient than traditional methods like grid search or random search because it focuses on the most promising areas of the hyperparameter space. 
#After running multiple trials, Optuna identifies the best-performing set of hyperparameters, which we can then use to retrain the model for final evaluation. This method saves time, improves model performance, and removes much of the guesswork from manual tuning, making it a powerful tool for optimizing machine learning models.

In [9]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torch.cuda.amp import GradScaler
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, RobustScaler
import pywt
from torch.optim.lr_scheduler import CosineAnnealingLR

# Device setup
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
torch.backends.cudnn.benchmark = True

# Define hyperparameters
sequence_length = 15
num_blocks = 3
hidden_channels = 32
kernel_size = 3
batch_size = 256
num_epochs = 50
learning_rate = 0.01
weight_decay = 1e-4
momentum = 0.9
dropout_rate = 0.3

# Create GRU input sequences
def create_sequences(data, sequence_length):
    X, Y = [], []
    for i in range(len(data) - sequence_length):
        X.append(data[i:i + sequence_length])
        Y.append(data[i + sequence_length])
    return np.array(X), np.array(Y)

# Wavelet-based decomposition function (GPU-friendly version)
def decompose_signal_wavelet(signal, wavelet='db1', level=None):
    if level is None:
        level = min(3, int(np.log2(len(signal))))
    coeffs = pywt.wavedec(signal, wavelet=wavelet, level=level)
    coeffs = [torch.tensor(c, dtype=torch.float32).to(device) for c in coeffs]
    return coeffs

# ResNet-like block for time series
class ResNetBlock1D(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, dropout_rate=0.3):
        super(ResNetBlock1D, self).__init__()
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=kernel_size // 2)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout_rate)
        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size=kernel_size, stride=1, padding=kernel_size // 2)
        if in_channels != out_channels:
            self.skip = nn.Conv1d(in_channels, out_channels, kernel_size=1, stride=stride)
        else:
            self.skip = None

    def forward(self, x):
        identity = x
        out = self.conv1(x)
        out = self.relu(out)
        out = self.dropout(out)
        out = self.conv2(out)
        if self.skip is not None:
            identity = self.skip(identity)
        out += identity
        out = self.relu(out)
        return out

# Full ResNet model for time series
class ResNet1D(nn.Module):
    def __init__(self, input_channels, num_blocks, hidden_channels, kernel_size, output_size):
        super(ResNet1D, self).__init__()
        layers = []
        in_channels = input_channels
        for _ in range(num_blocks):
            layers.append(ResNetBlock1D(in_channels, hidden_channels, kernel_size))
            in_channels = hidden_channels
        self.resnet_blocks = nn.Sequential(*layers)
        self.global_avg_pool = nn.AdaptiveAvgPool1d(1)
        self.fc = nn.Linear(hidden_channels, output_size)

    def forward(self, x):
        out = self.resnet_blocks(x)
        out = self.global_avg_pool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out

# Data augmentation for sparse data
def augment_data(signal, num_augments=10, noise_level=0.02):
    augmented_signals = [signal]
    for _ in range(num_augments):
        noise = noise_level * np.random.randn(len(signal))
        shifted_signal = np.roll(signal, shift=np.random.randint(-5, 5))
        augmented_signal = shifted_signal + noise
        augmented_signals.append(augmented_signal)
    return np.array(augmented_signals)

# Define function to map belt ID to sorting center
# Updated to ensure sorting center names are consistent with actual data
def get_sorting_center(belt_id):
    sorting_center_mapping = {
        0: "VANTAA",
        1: "TAMPERE",
        2: "LIETO",
        3: "OULU",
        4: "KUOPIO",
        5: "SEINÄJOKI"
    }
    return sorting_center_mapping.get(belt_id % len(sorting_center_mapping), "UNKNOWN")

# Initialize model weights
def init_weights(m):
    if isinstance(m, nn.Conv1d) or isinstance(m, nn.Linear):
        nn.init.kaiming_normal_(m.weight, nonlinearity='relu')
        if m.bias is not None:
            nn.init.constant_(m.bias, 0)

# Load and preprocess dataset
df['scanning_date'] = pd.to_datetime(df['scanning_date'])
df = df.sort_values(by='scanning_date').reset_index(drop=True)

# Train/Test split
train_size = int(0.75 * len(df))
test_size = int(0.05 * len(df))
train_df = df[:train_size]
test_df = df[train_size:train_size + test_size]

# Data preprocessing
feature_columns = ['day', 'month', 'weekday', 'week', 'yearday_sin', 'yearday_cos']

# Fill missing values with median values for robustness
train_df.loc[:, feature_columns] = train_df[feature_columns].fillna(train_df[feature_columns].median(numeric_only=True))
test_df.loc[:, feature_columns] = test_df[feature_columns].fillna(test_df[feature_columns].median(numeric_only=True))

# Fit scalers with valid data only
scaler_features = RobustScaler()
train_features = scaler_features.fit_transform(train_df[feature_columns])
test_features = scaler_features.transform(test_df[feature_columns])

scaler_target = StandardScaler()
train_targets = scaler_target.fit_transform(train_df['no_of_events'].values.reshape(-1, 1))
test_targets = scaler_target.transform(test_df['no_of_events'].values.reshape(-1, 1))

# Decompose each output belt's signal using Wavelet-based method
output_belt_ids = train_df['output_belt'].unique()
imfs_dict = {}
for belt in output_belt_ids:
    belt_signal = train_df[train_df['output_belt'] == belt]['no_of_events'].values
    if len(belt_signal) == 0:
        continue
    augmented_signals = augment_data(belt_signal)
    imfs = []
    for augmented_signal in augmented_signals:
        imfs.extend(decompose_signal_wavelet(augmented_signal))
    imfs_dict[belt] = imfs

# Assign sorting centers to belts and create models for each sorting center
sorting_center_models = {}
sorting_center_belts = {}
for belt in output_belt_ids:
    sorting_center = get_sorting_center(belt)
    if sorting_center not in sorting_center_belts:
        sorting_center_belts[sorting_center] = []
    sorting_center_belts[sorting_center].append(belt)
    
    # Create a model for each sorting center
    if sorting_center not in sorting_center_models:
        model = ResNet1D(input_channels=1, num_blocks=num_blocks, hidden_channels=hidden_channels, kernel_size=kernel_size, output_size=1).to(device)
        model.apply(init_weights)
        sorting_center_models[sorting_center] = model

# Training loop for each sorting center model
belt_predictions_actuals = {}
for sorting_center, belts in sorting_center_belts.items():
    model = sorting_center_models[sorting_center]
    optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum, weight_decay=weight_decay)  # Changed to SGD with momentum
    scheduler = CosineAnnealingLR(optimizer, T_max=num_epochs)  # Cosine annealing scheduler to dynamically adjust the learning rate
    
    all_X_train, all_Y_train = [], []
    belt_losses = {}
    for belt in belts:
        imfs = imfs_dict[belt]
        belt_losses[belt] = []  # Ensure initialization of belt_losses before using it
        for imf in imfs:
            X_train, Y_train = create_sequences(imf.cpu().numpy(), sequence_length=sequence_length)
            if len(X_train) > 0:
                all_X_train.append(X_train)
                all_Y_train.append(Y_train)
    
    if len(all_X_train) > 0:
        X_train_tensor = torch.tensor(np.concatenate(all_X_train), dtype=torch.float32).unsqueeze(1)
        Y_train_tensor = torch.tensor(np.concatenate(all_Y_train), dtype=torch.float32).unsqueeze(1)
        train_dataset = TensorDataset(X_train_tensor, Y_train_tensor)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        
        # Training loop
        for epoch in range(num_epochs):
            model.train()
            running_loss = 0.0
            for batch_X, batch_Y in train_loader:
                batch_X, batch_Y = batch_X.to(device), batch_Y.to(device)
                optimizer.zero_grad()
                outputs = model(batch_X)
                loss = nn.MSELoss()(outputs.view(-1), batch_Y.view(-1))
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                optimizer.step()
                running_loss += loss.item()
                
                # Track loss per belt
                for belt in belts:
                    belt_losses[belt].append(loss.item())

            # Step scheduler
            scheduler.step()

            print(f"Sorting Center: {sorting_center}, Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}")

            # Early stopping condition could be added here if required

        # Store predictions and actuals for each belt
        model.eval()
        with torch.no_grad():
            for belt in belts:
                imfs = imfs_dict[belt]
                belt_predictions = []
                belt_actuals = []

                for imf in imfs:
                    X_test, Y_test = create_sequences(imf.cpu().numpy(), sequence_length=sequence_length)
                    if len(X_test) > 0:
                        X_test_tensor = torch.tensor(X_test, dtype=torch.float32).unsqueeze(1).to(device)
                        Y_test_tensor = torch.tensor(Y_test, dtype=torch.float32).unsqueeze(1)
                        
                        predictions = model(X_test_tensor).cpu().numpy().flatten()
                        actuals = Y_test_tensor.cpu().numpy().flatten()
                        
                        belt_predictions.extend(predictions)
                        belt_actuals.extend(actuals)

                # Store for KPI calculation
                belt_predictions_actuals[belt] = {'predictions': belt_predictions, 'actuals': belt_actuals}

# Calculate KPIs for each sorting center
sorting_center_metrics = {}
for sorting_center, belts in sorting_center_belts.items():
    daily_errors = {}
    for belt in belts:
        predictions = belt_predictions_actuals[belt]['predictions']
        actuals = belt_predictions_actuals[belt]['actuals']
        scanning_dates = test_df['scanning_date'][sequence_length:sequence_length + len(predictions)]

        # Calculate squared deviation for each day
        for date, actual, predicted in zip(scanning_dates, actuals, predictions):
            squared_deviation = (actual - predicted) ** 2
            if date not in daily_errors:
                daily_errors[date] = []
            daily_errors[date].append(squared_deviation)

    # Calculate daily average squared error across belts
    daily_mse = {date: np.mean(errors) for date, errors in daily_errors.items() if len(errors) > 0}
    mse = np.mean(list(daily_mse.values())) if len(daily_mse) > 0 else np.nan
    vse = np.var(list(daily_mse.values()), ddof=1) if len(daily_mse) > 1 else np.nan

    sorting_center_metrics[sorting_center] = {'MSE': mse, 'VSE': vse}

# Print the results
print("\nOverall Metrics for Each Sorting Center:")
for sorting_center, metrics in sorting_center_metrics.items():
    mse = metrics['MSE']
    vse = metrics['VSE']
    if not np.isnan(mse) and not np.isnan(vse):
        print(f"Sorting Center {sorting_center}: MSE = {mse:.4f}, VSE = {vse:.4f}")
    else:
        print(f"Sorting Center {sorting_center}: No valid predictions available.")


Using device: cuda


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_df.loc[:, feature_columns] = train_df[feature_columns].fillna(train_df[feature_columns].median(numeric_only=True))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df.loc[:, feature_columns] = test_df[feature_columns].fillna(test_df[feature_columns].median(numeric_only=True))


Sorting Center: VANTAA, Epoch [1/50], Loss: 247705.0548
Sorting Center: VANTAA, Epoch [2/50], Loss: 170393.1472
Sorting Center: VANTAA, Epoch [3/50], Loss: 167241.2654
Sorting Center: VANTAA, Epoch [4/50], Loss: 166170.9721
Sorting Center: VANTAA, Epoch [5/50], Loss: 165510.0107
Sorting Center: VANTAA, Epoch [6/50], Loss: 164100.8055
Sorting Center: VANTAA, Epoch [7/50], Loss: 162548.3946
Sorting Center: VANTAA, Epoch [8/50], Loss: 161976.5683
Sorting Center: VANTAA, Epoch [9/50], Loss: 161027.6242
Sorting Center: VANTAA, Epoch [10/50], Loss: 160752.4679
Sorting Center: VANTAA, Epoch [11/50], Loss: 159360.5493
Sorting Center: VANTAA, Epoch [12/50], Loss: 159761.1351
Sorting Center: VANTAA, Epoch [13/50], Loss: 158576.2099
Sorting Center: VANTAA, Epoch [14/50], Loss: 157945.4815
Sorting Center: VANTAA, Epoch [15/50], Loss: 157428.3375
Sorting Center: VANTAA, Epoch [16/50], Loss: 157204.5810
Sorting Center: VANTAA, Epoch [17/50], Loss: 156943.7621
Sorting Center: VANTAA, Epoch [18/50], L

In [13]:
from PyEMD import EMD
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, RobustScaler
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.cuda.amp import GradScaler, autocast
import pywt
# Device setup
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
torch.backends.cudnn.benchmark = True

# Define hyperparameters
sequence_length = 15
num_blocks = 2  # 减少网络的深度
hidden_channels = 16  # 减少隐藏层通道数
kernel_size = 3
batch_size = 256
num_epochs = 20
learning_rate = 0.001
weight_decay = 1e-4
momentum = 0.9
dropout_rate = 0.3

# Create GRU input sequences
def create_sequences(data, sequence_length):
    X, Y = [], []
    for i in range(len(data) - sequence_length):
        X.append(data[i:i + sequence_length])
        Y.append(data[i + sequence_length])
    return np.array(X), np.array(Y)

# Wavelet-based decomposition function (GPU-friendly)


def decompose_signal_wavelet(signal, wavelets=['db1', 'haar', 'sym5'], level=None):
    if level is None:
        level = min(1, int(np.log2(len(signal))))  # 动态选择合适的分解级别
    coeffs_list = []
    for wavelet in wavelets:
        coeffs = pywt.wavedec(signal, wavelet=wavelet, level=level)
        coeffs = [torch.tensor(c, dtype=torch.float32).to(device) for c in coeffs]
        coeffs_list.extend(coeffs)
    return coeffs_list




# Empirical Mode Decomposition (EMD) based decomposition
def decompose_signal_emd(signal):
    emd = EMD()  # 初始化 EMD 类
    imfs = emd.emd(signal)  # 对信号进行 EMD 分解
    imfs = [torch.tensor(imf, dtype=torch.float32).to(device) for imf in imfs]
    return imfs

# ResNet-like block for time series (Reduced complexity)
class ResNetBlock1D(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, dropout_rate=0.3):
        super(ResNetBlock1D, self).__init__()
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=kernel_size // 2)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout_rate)
        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size=kernel_size, stride=1, padding=kernel_size // 2)
        if in_channels != out_channels:
            self.skip = nn.Conv1d(in_channels, out_channels, kernel_size=1, stride=stride)
        else:
            self.skip = None

    def forward(self, x):
        identity = x
        out = self.conv1(x)
        out = self.relu(out)
        out = self.dropout(out)
        out = self.conv2(out)
        if self.skip is not None:
            identity = self.skip(identity)
        out += identity
        out = self.relu(out)
        return out

# Full ResNet model for time series (Reduced complexity)
class ResNet1D(nn.Module):
    def __init__(self, input_channels, num_blocks, hidden_channels, kernel_size, output_size):
        super(ResNet1D, self).__init__()
        layers = []
        in_channels = input_channels
        for _ in range(num_blocks):
            layers.append(ResNetBlock1D(in_channels, hidden_channels, kernel_size))
            in_channels = hidden_channels
        self.resnet_blocks = nn.Sequential(*layers)
        self.global_avg_pool = nn.AdaptiveAvgPool1d(1)
        self.fc = nn.Linear(hidden_channels, output_size)

    def forward(self, x):
        out = self.resnet_blocks(x)
        out = self.global_avg_pool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out

# Data augmentation for sparse data
def augment_data(signal, num_augments=10, noise_level=0.02):
    augmented_signals = [signal]
    for _ in range(num_augments):
        noise = noise_level * np.random.randn(len(signal))
        shifted_signal = np.roll(signal, shift=np.random.randint(-5, 5))
        augmented_signal = shifted_signal + noise
        augmented_signals.append(augmented_signal)
    return np.array(augmented_signals)

# Initialize model weights
def init_weights(m):
    if isinstance(m, nn.Conv1d) or isinstance(m, nn.Linear):
        nn.init.kaiming_normal_(m.weight, nonlinearity='relu')
        if m.bias is not None:
            nn.init.constant_(m.bias, 0)


# Load and preprocess dataset
df['scanning_date'] = pd.to_datetime(df['scanning_date'])
df = df.sort_values(by='scanning_date').reset_index(drop=True)

# Train/Test split
train_size = int(0.75 * len(df))
test_size = int(0.05 * len(df))
train_df = df[:train_size]
test_df = df[train_size:train_size + test_size]

# Data preprocessing
feature_columns = ['day', 'month', 'weekday', 'week', 'yearday_sin', 'yearday_cos']

# Fill missing values with median values for robustness



train_df = train_df.copy()
test_df = test_df.copy()

train_df[feature_columns] = train_df[feature_columns].fillna(train_df[feature_columns].median(numeric_only=True))
test_df[feature_columns] = test_df[feature_columns].fillna(test_df[feature_columns].median(numeric_only=True))



# Fit scalers with valid data only
scaler_features = RobustScaler()
train_features = scaler_features.fit_transform(train_df[feature_columns])
test_features = scaler_features.transform(test_df[feature_columns])

scaler_target = StandardScaler()
train_targets = scaler_target.fit_transform(train_df['no_of_events'].values.reshape(-1, 1))
test_targets = scaler_target.transform(test_df['no_of_events'].values.reshape(-1, 1))

# Decompose each output belt's signal using Wavelet and EMD-based method
output_belt_ids = train_df['output_belt'].unique()
imfs_dict = {}
for belt in output_belt_ids:
    belt_signal = train_df[train_df['output_belt'] == belt]['no_of_events'].values
    if len(belt_signal) == 0:
        continue
    augmented_signals = augment_data(belt_signal)
    imfs = []
    for augmented_signal in augmented_signals:
        # Apply both Wavelet and EMD decomposition
        wavelet_coeffs = decompose_signal_wavelet(augmented_signal)
        emd_imfs = decompose_signal_emd(augmented_signal)
        imfs.extend(wavelet_coeffs)
        imfs.extend(emd_imfs)
    imfs_dict[belt] = imfs






# Assign sorting centers to belts and create models for each sorting center
sorting_center_models = {}
sorting_center_belts = train_df.groupby('sorting_center_name')['output_belt'].unique().to_dict()

# Create models for each sorting center
for sorting_center, belts in sorting_center_belts.items():
    if sorting_center not in sorting_center_models:
        model = ResNet1D(input_channels=1, num_blocks=num_blocks, hidden_channels=hidden_channels, kernel_size=kernel_size, output_size=1).to(device)
        model.apply(init_weights)
        sorting_center_models[sorting_center] = model

# Training loop for each sorting center model
belt_predictions_actuals = {}
for sorting_center, belts in sorting_center_belts.items():
    model = sorting_center_models[sorting_center]
    optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum, weight_decay=weight_decay)
    scheduler = CosineAnnealingLR(optimizer, T_max=num_epochs)

    all_X_train, all_Y_train = [], []
    belt_losses = {}
    for belt in belts:
        imfs = imfs_dict.get(belt, [])
        belt_losses[belt] = []  # Ensure initialization of belt_losses before using it
        for imf in imfs:
            X_train, Y_train = create_sequences(imf.cpu().numpy(), sequence_length=sequence_length)
            if len(X_train) > 0:
                all_X_train.append(X_train)
                all_Y_train.append(Y_train)

    if len(all_X_train) > 0:
        X_train_tensor = torch.tensor(np.concatenate(all_X_train), dtype=torch.float32).unsqueeze(1)
        Y_train_tensor = torch.tensor(np.concatenate(all_Y_train), dtype=torch.float32).unsqueeze(1)
        train_dataset = TensorDataset(X_train_tensor, Y_train_tensor)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

        # Training loop
        for epoch in range(num_epochs):
            model.train()
            running_loss = 0.0
            for batch_X, batch_Y in train_loader:
                batch_X, batch_Y = batch_X.to(device), batch_Y.to(device)
                optimizer.zero_grad()
                outputs = model(batch_X)
                loss = nn.MSELoss()(outputs.view(-1), batch_Y.view(-1))
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                optimizer.step()
                running_loss += loss.item()

                # Track loss per belt
                for belt in belts:
                    belt_losses[belt].append(loss.item())

            # Step scheduler
            scheduler.step()

            print(f"Sorting Center: {sorting_center}, Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}")

           

        # Store predictions and actuals for each belt
        model.eval()
        with torch.no_grad():
            for belt in belts:
                imfs = imfs_dict.get(belt, [])
                belt_predictions = []
                belt_actuals = []

                for imf in imfs:
                    X_test, Y_test = create_sequences(imf.cpu().numpy(), sequence_length=sequence_length)
                    if len(X_test) > 0:
                        X_test_tensor = torch.tensor(X_test, dtype=torch.float32).unsqueeze(1).to(device)
                        Y_test_tensor = torch.tensor(Y_test, dtype=torch.float32).unsqueeze(1)

                        predictions = model(X_test_tensor).cpu().numpy().flatten()
                        actuals = Y_test_tensor.cpu().numpy().flatten()

                        belt_predictions.extend(predictions)
                        belt_actuals.extend(actuals)

                # Store for KPI calculation
                belt_predictions_actuals[belt] = {'predictions': belt_predictions, 'actuals': belt_actuals}

# Calculate KPIs for each sorting center
sorting_center_metrics = {}
for sorting_center, belts in sorting_center_belts.items():
    daily_errors = {}
    for belt in belts:
        if belt not in belt_predictions_actuals:
            continue
        predictions = belt_predictions_actuals[belt]['predictions']
        actuals = belt_predictions_actuals[belt]['actuals']
        scanning_dates = test_df['scanning_date'][sequence_length:sequence_length + len(predictions)]

        # Calculate squared deviation for each day
        for date, actual, predicted in zip(scanning_dates, actuals, predictions):
            squared_deviation = (actual - predicted) ** 2
            if date not in daily_errors:
                daily_errors[date] = []
            daily_errors[date].append(squared_deviation)

    # Calculate daily average squared error across belts
    daily_mse = {date: np.mean(errors) for date, errors in daily_errors.items() if len(errors) > 0}
    mse = np.mean(list(daily_mse.values())) if len(daily_mse) > 0 else np.nan
    vse = np.var(list(daily_mse.values()), ddof=1) if len(daily_mse) > 1 else np.nan

    sorting_center_metrics[sorting_center] = {'MSE': mse, 'VSE': vse}

# Print the results
print("\nOverall Metrics for Each Sorting Center:")
for sorting_center, metrics in sorting_center_metrics.items():
    mse = metrics['MSE']
    vse = metrics['VSE']
    if not np.isnan(mse) and not np.isnan(vse):
        print(f"Sorting Center {sorting_center}: MSE = {mse:.4f}, VSE = {vse:.4f}")
    else:
        print(f"Sorting Center {sorting_center}: No valid predictions available.")


Using device: cuda
Sorting Center: KUOPIO, Epoch [1/20], Loss: 26198.2852
Sorting Center: KUOPIO, Epoch [2/20], Loss: 25478.2021
Sorting Center: KUOPIO, Epoch [3/20], Loss: 25144.9563
Sorting Center: KUOPIO, Epoch [4/20], Loss: 24909.9939
Sorting Center: KUOPIO, Epoch [5/20], Loss: 24692.6867
Sorting Center: KUOPIO, Epoch [6/20], Loss: 24459.6826
Sorting Center: KUOPIO, Epoch [7/20], Loss: 24269.4436
Sorting Center: KUOPIO, Epoch [8/20], Loss: 24105.4111
Sorting Center: KUOPIO, Epoch [9/20], Loss: 23957.1353
Sorting Center: KUOPIO, Epoch [10/20], Loss: 23893.0572
Sorting Center: KUOPIO, Epoch [11/20], Loss: 23808.8379
Sorting Center: KUOPIO, Epoch [12/20], Loss: 23700.2117
Sorting Center: KUOPIO, Epoch [13/20], Loss: 23645.4987
Sorting Center: KUOPIO, Epoch [14/20], Loss: 23599.2139
Sorting Center: KUOPIO, Epoch [15/20], Loss: 23579.5194
Sorting Center: KUOPIO, Epoch [16/20], Loss: 23555.6343
Sorting Center: KUOPIO, Epoch [17/20], Loss: 23527.7475
Sorting Center: KUOPIO, Epoch [18/20],