In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import os

import joblib
import json
import tensorflow as tf
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Conv1D, Dropout, Flatten, Dense, BatchNormalization, Activation, GlobalAveragePooling1D, Add, LSTM, Bidirectional
from tensorflow.keras.optimizers import Adam

import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import classification_report, accuracy_score, f1_score, precision_score, recall_score, hamming_loss
import numpy as np
from sklearn.preprocessing import OneHotEncoder
import itertools



class ModelFactory:

    class DataPreparation:
        
        def __init__(self):
            """
            Description: 
                Initializes the class and prepares a dictionary to store fitted encoders.

            Parameters: 
                None
            
            Returns: 
                None
            """
            self.encoders={}
            pass

        def _sort(self, df):
            """
            Description: 
                Cleans column names, converts data types, removes the Simtime column if it exists, and sorts the DataFrame by Timestamp and Machine.

            Parameters:
                df (pd.DataFrame): Raw input data.

            Returns:
                df_sorted (pd.DataFrame): Cleaned and sorted DataFrame.

            Shape change: 
                input shape: 9
                output shape: 8
                Index is reset.
            """
            df = self._clean_columns(df)
            df = self._clean_datatypes(df)
            if 'Simtime' in df.columns:
                df = df.drop(columns=['Simtime'])
            return df.sort_values(by=['Timestamp', 'Machine']).reset_index(drop=True)

        def _clean_columns(self, df):
            """
            Description: 
                Strips whitespace and capitalizes column names.

            Parameters:
                df (pd.DataFrame): Input DataFrame.

            Returns:
                df (pd.DataFrame): DataFrame with standardized column names.

            Shape change: 
                None
            """
            df.columns = df.columns.str.strip().str.capitalize()
            return df
        
        def _clean_datatypes(self, df):
            """
            Description: 
                Applies helper functions to convert date, boolean, and numeric fields to proper types.

            Parameters:
                df (pd.DataFrame): Input DataFrame.

            Returns:
                df (pd.DataFrame): Type-corrected DataFrame.
            
            Shape change: 
                None
            """
            df = self._date_data(df)
            df = self._boolean_data(df)
            df = self._numeric_data(df)
            return df
        
        def _date_data(self, df):
            """
            Description: 
                Converts Timestamp column to datetime.

            Parameters:
                df (pd.DataFrame): DataFrame with a Timestamp column.

            Returns:
                df (pd.DataFrame): Updated DataFrame with Timestamp as datetime.

            Shape change: 
            None
            """
            df["Timestamp"] = pd.to_datetime(df["Timestamp"])
            return df
        
        def _boolean_data(self,df,target_columns=['Tool_warning', 'Tool_replaced']):
            """
            Description: 
                Converts specified boolean columns to integers (0 or 1).

            Parameters:
                - df (pd.DataFrame): Input DataFrame.

                - target_columns (list): List of column names to convert (default: ['Tool_warning', 'Tool_replaced']).

            Returns:
                df (pd.DataFrame): DataFrame with converted boolean fields.

            Shape change:
                None
            """
            for col in target_columns:
                if col in df.columns:
                    df[col] = df[col].astype(int)
            return df
        
        def _numeric_data(self,df,target_columns=['Processed_parts', 'Scrapped_parts', 'Quality_counter']):
            """
            Description: 
                Converts specified numeric columns to numeric dtype, coercing errors.

            Parameters:
                - df (pd.DataFrame): Input DataFrame.

                - target_columns (list): List of column names to convert (default: ['Processed_parts', 'Scrapped_parts', 'Quality_counter']).

            Returns:
                df (pd.DataFrame): Updated DataFrame with numeric values.

            Shape change:
                None
            """
            for col in target_columns:
                if col in df.columns:
                    df[col] = pd.to_numeric(df[col], errors='coerce')
            return df
        
        def transform_state_columns(self, df):
            """
            Description: 
                Transforms previously fitted state columns using stored OneHotEncoder objects.

            Parameters:
                df (pd.DataFrame): DataFrame with encoded categorical columns (e.g., 'MC1_State').

            Returns:
                df (pd.DataFrame): DataFrame with categorical columns replaced by one-hot encoded columns.

            Shape change: 
                Adds one-hot columns and drops originals.

            """
            df = df.copy()
            state_cols = [col for col in df.columns if col in self.encoders]
        
            for col in state_cols:
                encoder = self.encoders[col]
                encoded = encoder.transform(df[[col]])
        
                categories = encoder.categories_[0]
                encoded_col_names = [f"{col}_{cat}" for cat in categories]
        
                encoded_df = pd.DataFrame(encoded, columns=encoded_col_names, index=df.index)
        
                df = pd.concat([df.drop(columns=[col]), encoded_df], axis=1)
        
            return df

        def encode_state_columns(self, df):
            df = df.copy()
            state_cols = [col for col in df.columns if col.endswith('_State')]
        
            for col in state_cols:
                encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
                encoded = encoder.fit_transform(df[[col]])
        
                categories = encoder.categories_[0]
                encoded_col_names = [f"{col}_{cat}" for cat in categories]
        
                encoded_df = pd.DataFrame(encoded, columns=encoded_col_names, index=df.index)
        
                df = pd.concat([df.drop(columns=[col]), encoded_df], axis=1)
        
                self.encoders[col] = encoder
        
            return df
        
        def pivot_features(self, df, index_col='Timestamp', machine_col='Machine'):
            df = df.copy()
    
            # Remove columns that shouldn't be pivoted
            non_feature_cols = [index_col, machine_col]
            feature_cols = [col for col in df.columns if col not in non_feature_cols]
        
            # Pivot to [timestamp × (machine_feature)]
            pivot_df = df.pivot(index=index_col, columns=machine_col, values=feature_cols)
        
            # Flatten multi-index columns: (feature, machine) → 'Machine_Feature'
            pivot_df.columns = [f"{machine}_{feature}" for feature, machine in pivot_df.columns]
        
            # Ensure Timestamp is a column again (optional)
            pivot_df = pivot_df.reset_index()
        
            return pivot_df
        
        def _generate_labels(self, df):
            df = df.copy()
            df['Tool_change_label'] = 0
            df['Quality_label'] = 0
            for machine in df['Machine'].unique():
                machine_df = df[df['Machine'] == machine]
                tool_replaced_indices = machine_df.index[machine_df['Tool_replaced'] == 1]

                for idx in tool_replaced_indices:
                    prev_idx = machine_df.index[machine_df.index < idx].max()
                    if pd.notna(prev_idx):
                        df.at[prev_idx, 'Tool_change_label'] = 1

            df.loc[df['Quality_counter'] == 1, 'Quality_label'] = 1

            columns_to_drop = ['Scrapped_parts', 'Quality_counter', 'Tool_replaced']
            df.drop(columns=[col for col in columns_to_drop if col in df.columns], inplace=True)

            return df
        
        def get_train_data(self, data):
            """
            Prepares training data by sorting, labeling, pivoting, and encoding the input dataset.
        
            Processing steps include:
                - Sorting the data chronologically and by machine.
                - Generating labels based on task-specific logic.
                - Pivoting features into a time-aligned format.
                - Encoding machine state-related columns.
        
            Parameters:
                data (pd.DataFrame): Raw input data containing timestamped machine signals.
        
            Returns:
                features (pd.DataFrame): Feature set with encoded values, excluding label columns.
                labels (pd.DataFrame): Label set containing the timestamp and task-related labels.
                label_names (list): List of column names corresponding to the labels.
            """
            data = self._sort(data)
            data = self._generate_labels(data)
            data = self.pivot_features(data)
            data = self.encode_state_columns(data)
            
            selected_columns = []
            for col in data.columns:
                if col == 'Timestamp' or 'label' in col.lower():
                    selected_columns.append(col)

            labels = data[selected_columns].copy()
            features = data.drop(columns=[col for col in selected_columns if col != 'Timestamp'])
            
            features = features.reset_index(drop=True)
            labels = labels.reset_index(drop=True)
            label_names = labels.columns.tolist()

            return features, labels, label_names
    
        def get_predict_row(self, df):
            data = self.sort(df)
            columns_to_drop = ['Scrapped_parts', 'Quality_counter', 'Tool_replaced']
            data.drop(columns=[col for col in columns_to_drop if col in df.columns], inplace=True)
            data = self.pivot_features(data)
            data = self.transform_state_columns(data)
            return data.to_numpy().flatten()

        def split_data(self, df, train_size,validate_size):
            """
            Splits the preprocessed dataset into training, validation, and test sets.

            Parameters:
                df (pd.DataFrame): The input dataframe containing preprocessed data.
                train_size (float): Proportion of the dataset to include in the training set (0 < train_size < 1).
                validate_size (float): Proportion of the dataset to include in the validation set (0 < validate_size < 1).
                    The remaining data is used for the test set.

            Returns:
                train (dict): Dictionary with keys 'X' and 'y' for training features and labels prepared.

                validate (dict): Dictionary with keys 'X' and 'y' for validation features and labels prepared.

                test (dict): Dictionary with keys 'X' and 'y' for test features and labels prepared.
                
                label_names (list): List of label column names prepared.
            """
            features, labels ,label_names= self.get_train_data(df)  # preprocess first
            n = len(features)
        
            train_end = int(train_size * n)
            validate_end = int((train_size + validate_size) * n)
        
            X_train = features[:train_end].reset_index(drop=True)
            y_train = labels[:train_end].reset_index(drop=True)
            train = {'X': X_train, 'y': y_train}
        
            X_validate = features[train_end:validate_end].reset_index(drop=True)
            y_validate = labels[train_end:validate_end].reset_index(drop=True)
            validate = {'X': X_validate, 'y': y_validate}
        
            X_test = features[validate_end:].reset_index(drop=True)
            y_test = labels[validate_end:].reset_index(drop=True)
            test = {'X': X_test, 'y': y_test}
        
            return train, validate, test,label_names
        
        def split_for_continued_training(self, df):
            """
            Splits data into 90% training and 10% test set for continued training purposes.
        
            This function is designed for scenarios where a model has already been trained
            and is being updated with new data. It performs preprocessing using the same 
            pipeline as in initial training (label generation, encoding, pivoting, etc.).
        
            Parameters:
                df (pd.DataFrame): Raw input DataFrame containing machine signal data.
        
            Returns:
                train (dict): Dictionary with 'X' and 'y' for training data (90%).
                test (dict): Dictionary with 'X' and 'y' for test data (10%).
                label_names (list): List of label column names used in training.
            """
            features, labels, label_names = self.get_train_data(df)
            n = len(features)
            split_idx = int(n * 0.9)
        
            X_train = features[:split_idx].reset_index(drop=True)
            y_train = labels[:split_idx].reset_index(drop=True)
            X_test = features[split_idx:].reset_index(drop=True)
            y_test = labels[split_idx:].reset_index(drop=True)
        
            train = {'X': X_train, 'y': y_train}
            test = {'X': X_test, 'y': y_test}
        
            return train, test, label_names

        def split_yearly_data(self, df, time_column='Timestamp'):
            """
            Splits a full-year DataFrame into:
            - 3 months training data
            - 9 months test data split into 5d, 10d, and 30d batches
        
            Returns:
                {
                    'train_3mo': DataFrame,
                    'test_5d': list of DataFrames,
                    'test_10d': list of DataFrames,
                    'test_30d': list of DataFrames
                }
            """
            df = df.copy()
            df[time_column] = pd.to_datetime(df[time_column])
            df = df.sort_values(by=time_column).reset_index(drop=True)
        
            start_time = df[time_column].min()
            train_end_time = start_time + pd.DateOffset(months=3)
        
            train_df = df[df[time_column] < train_end_time]
            test_df = df[df[time_column] >= train_end_time]
        
            def split_batches(df, days):
                batches = []
                current_start = df[time_column].min()
                current_end = current_start + pd.Timedelta(days=days)
                while current_start < df[time_column].max():
                    batch = df[(df[time_column] >= current_start) & (df[time_column] < current_end)]
                    if not batch.empty:
                        batches.append(batch)
                    current_start = current_end
                    current_end = current_start + pd.Timedelta(days=days)
                return batches
        
            return {
                'train': train_df,
                '1d':split_batches(test_df,1),
                '5d': split_batches(test_df, 5),
                '10d': split_batches(test_df, 10),
                '30d': split_batches(test_df, 30)
            }
    
        def compute_label_weights(self, y_train, smoothing=1.0):
            """
            Computes inverse frequency weights for multi-label classification.

            Args:
                y_train (pd.DataFrame or np.ndarray): Binary label matrix.
                smoothing (float): Added to avoid division by zero.

            Returns:
                np.ndarray: Array of label weights (higher for rare labels).
            """
            if isinstance(y_train, pd.DataFrame):
                y_train = y_train.to_numpy()

            label_freq = y_train.sum(axis=0)  # Number of positives per label
            label_weights = 1.0 / (label_freq + smoothing)  # Inverse freq
            label_weights = label_weights / label_weights.sum() * len(label_weights)  # Normalize to mean ~1

            scale_tool = 7.0
            scale_quality = 0.001

            scaled_weights = label_weights.copy()
            scaled_weights[:6] *= scale_tool
            scaled_weights[6:] *= scale_quality

            return scaled_weights

    class BaseModel:
        def __init__(self):
            pass

        def generate_sequence(self, X, y):
            X = X.reset_index(drop=True)
            y = y.reset_index(drop=True)

             # Drop Timestamp if present
            if 'Timestamp' in y.columns:
                y = y.drop(columns=['Timestamp'])

            X_seq, y_seq = [], []
            number_of_sequences = len(X) - self.window_size - self.forecast_horizon  + 1 
            for i in range(number_of_sequences):
                X_window = X.iloc[i:i+self.window_size].to_numpy(dtype=np.float32)
                y_window = y.iloc[i+self.window_size + self.forecast_horizon -1].to_numpy(dtype=np.float32)

                X_seq.append(X_window)
                y_seq.append(y_window)
            return np.array(X_seq), np.array(y_seq)
        
        @staticmethod
        def safe_get(config_dict, key, default):
            value = config_dict.get(key, default)
            return default if value is None else value

        def save(self, path_prefix):
            if self.model:
                self.model.save(f"{path_prefix}_model.h5")

            # Save encoders if available
            if self.data_preparation and hasattr(self.data_preparation, 'encoders'):
                joblib.dump(self.data_preparation.encoders, f"{path_prefix}_encoders.pkl")

            # Save model configuration
            config = {
                "window_size": self.window_size,
                "forecast_horizon": self.forecast_horizon,
                "kernel_size": self.kernel_size,
                "num_filters": self.num_filters,
                "dropout_rate": self.dropout_rate,
                "num_stacks": self.num_stacks,
                "dilations": self.dilations,
                "padding": self.padding,
                "use_skip_connections": self.use_skip_connections,
                "activation": self.activation,
                "optimizer": self.optimizer,
                "loss": self.loss,
                "batch_size": self.batch_size,
                "epochs": self.epochs
            }

            with open(f"{path_prefix}_model_info.json", "w") as f:
                json.dump(config, f, indent=4)
        
        def load(self, path_prefix, data_preparation):
            self.model = load_model(f"{path_prefix}_model.h5")
            encoders = joblib.load(f"{path_prefix}_encoders.pkl")
            data_preparation.encoders = encoders
            self.data_preparation = data_preparation

        def predict(self, x):
            self.history_buffer.append(x)
            if len(self.history_buffer) > self.window_size:
                self.history_buffer.pop(0)
            if len(self.history_buffer) == self.window_size:
                X_input = np.stack(self.history_buffer)[np.newaxis, ...]
                prediction = self.model.predict(X_input, verbose=0)[0]
                return prediction
            return None

    class LSTMModel(BaseModel):
        def __init__(self, config=None, **kwargs):
            config = config or {}
            config.update(kwargs)

            self.window_size = self.safe_get(config, 'window_size', 30)
            self.forecast_horizon = self.safe_get(config, 'forecast_horizon', 1)
            self.num_units = self.safe_get(config, 'num_units', 64)
            self.dropout_rate = self.safe_get(config, 'dropout_rate', 0.2)
            self.num_layers = self.safe_get(config, 'num_layers', 1)
            self.return_sequences = self.safe_get(config, 'return_sequences', False)
            self.bidirectional = self.safe_get(config, 'bidirectional', False)
            self.version = self.safe_get(config, 'version', 'V1')

            self.optimizer = self.safe_get(config, 'optimizer', 'adam')
            self.loss = self.safe_get(config, 'loss', 'binary_crossentropy')
            self.batch_size = self.safe_get(config, 'batch_size', 32)
            self.epochs = self.safe_get(config, 'epochs', 20)
            self.lstm_activation = self.safe_get(config, 'lstm_activation', 'tanh')  
            self.output_activation = self.safe_get(config, 'output_activation', 'sigmoid')  
            self.label_weights = self.safe_get(config, 'label_weights', None)
            self.return_state = self.safe_get(config, 'return_state', False)

            self.model = None
            self.history_buffer = []

        def build(self, features, labels):
            X_seq, y_seq = self.generate_sequence(features, labels)
            input_shape = X_seq.shape[1:]
            num_labels = y_seq.shape[1]
            self.V1(input_shape, num_labels)

        def V1(self, input_shape, num_labels):
            inputs = Input(shape=input_shape)

            # Configure LSTM (return state for flexibility)
            lstm = LSTM(
                units=self.num_units,
                return_sequences=self.return_sequences,
                return_state=self.return_state,
                dropout=self.dropout_rate,
                activation=self.lstm_activation,
                recurrent_activation='sigmoid'
            )

            if self.bidirectional:
                lstm = Bidirectional(lstm)

            # Apply LSTM layer(s)
            outputs = lstm(inputs)

            # Handle outputs based on return_state
            if self.return_state:
                if self.bidirectional:
                    whole_seq_output, forward_h, forward_c, backward_h, backward_c = outputs
                    x = tf.concat([forward_h, backward_h], axis=-1)
                else:
                    whole_seq_output, h, c = outputs
                    x = h
            else:
                x = outputs if not self.return_sequences else outputs

            x = Dropout(self.dropout_rate)(x)
            #x = Dense(num_labels, activation=self.activation)(x)
            x = Dense(num_labels, activation=self.output_activation)(x)

            model = Model(inputs, x)
            model.compile(
                optimizer=self.optimizer,
                loss=self.loss,
                metrics=['accuracy']
            )

            self.model = model


        def predict(self, x):
            self.history_buffer.append(x)
            if len(self.history_buffer) > self.window_size:
                self.history_buffer.pop(0)
        
            if len(self.history_buffer) == self.window_size:
                X_input = np.stack(self.history_buffer)[np.newaxis, ...]
                y_prob = self.model.predict(X_input, verbose=0)[0]
                return (y_prob > 0.5).astype(int)  # Return binary predictions
            return None


    class TCNModel(BaseModel):
        
        def __init__(self, config=None, **kwargs):

            config = config or {}
            config.update(kwargs)


            self.window_size = self.safe_get(kwargs, 'window_size', 30)
            self.forecast_horizon = self.safe_get(kwargs, 'forecast_horizon', 144)
            self.kernel_size = self.safe_get(kwargs, 'kernel_size', 3)
            self.num_filters = self.safe_get(kwargs, 'num_filters', 64)
            self.dropout_rate = self.safe_get(kwargs, 'dropout_rate', 0.2)
            self.num_stacks = self.safe_get(kwargs, 'num_stacks', 1)
            self.dilations = self.safe_get(kwargs, 'dilations', [1, 2, 4, 8])
            self.padding = self.safe_get(kwargs, 'padding', 'causal')
            self.use_skip_connections = self.safe_get(kwargs, 'use_skip_connections', True)
            # Non-model config
            self.verbose = self.safe_get(kwargs, 'verbose', True)
            self.history_buffer = []
            # Hyperparameters
            self.activation= self.safe_get(kwargs, 'activation', 'relu')
            self.return_sequences=self.safe_get(kwargs, 'return_sequences',False)
            self.optimizer= self.safe_get(kwargs,'optimizer', 'adam')
            self.loss=self.safe_get(kwargs,'loss','binary_crossentropy')
            self.batch_size=self.safe_get(kwargs,'batch_size',32)
            self.epochs=self.safe_get(kwargs,'epochs',20)
            self.label_weights =self.safe_get(kwargs,'label_weights',None) 
#
            self.model = None  # Will be built in .build_model()
            self.history_buffer = []

        def build(self, features, labels):
            X_seq, y_seq  = self.generate_sequence(features,labels)
            input_shape = X_seq.shape[1:]
            print("TCN input_shape:", input_shape)
            num_labels = y_seq.shape[1]
            return self.V3(input_shape, num_labels)
        
        def V1(self, input_shape, num_labels):
            inputs = Input(shape=input_shape)

            x = Conv1D(self.num_filters, kernel_size=self.kernel_size, padding=self.padding, dilation_rate=1, activation=self.activation)(inputs)
            x = Dropout(self.dropout_rate)(x)

            x = Conv1D(self.num_filters, kernel_size=self.kernel_size, padding=self.padding, dilation_rate=2, activation=self.activation)(x)
            x = Dropout(self.dropout_rate)(x)

            x = Flatten()(x)
            outputs = Dense(num_labels, activation='sigmoid')(x)

            model = Model(inputs, outputs)
            model.compile(optimizer=Adam(),
                          loss=self.weighted_binary_crossentropy if self.label_weights is not None else self.loss,
                          metrics=['accuracy'])  # You can extend this later

            self.model = model

        def V2(self, input_shape, num_labels):
            inputs = Input(shape=input_shape)
            x = inputs
        
            # Progressive dilation stack
            for dilation in self.dilations:
                x = Conv1D(self.num_filters,
                           kernel_size=self.kernel_size,
                           padding=self.padding,
                           dilation_rate=dilation)(x)
                x = BatchNormalization()(x)
                x = Activation(self.activation)(x)
                x = Dropout(self.dropout_rate)(x)
        
            # Compact output
            x = GlobalAveragePooling1D()(x)
            outputs = Dense(num_labels, activation='sigmoid')(x)
        
            model = Model(inputs, outputs)
            model.compile(optimizer=self.optimizer,
                          loss=self.weighted_binary_crossentropy if self.label_weights is not None else self.loss,
                          metrics=['binary_accuracy'])
        
            self.model = model
    
        def V3(self, input_shape, num_labels):
            inputs = Input(shape=input_shape)
            x = inputs
            skip_connections = []

            for i, dilation in enumerate(self.dilations):
                conv = Conv1D(filters=self.num_filters,
                              kernel_size=self.kernel_size,
                              padding=self.padding,
                              dilation_rate=dilation,
                              activation=self.activation)(x)
                conv = Dropout(self.dropout_rate)(conv)
                conv = BatchNormalization()(conv)
                skip_connections.append(conv)
                x = conv

            if self.use_skip_connections and skip_connections:
                x = Add()(skip_connections)

            x = GlobalAveragePooling1D()(x)
            outputs = Dense(num_labels, activation='sigmoid')(x)

            model = Model(inputs, outputs)
            model.compile(optimizer=self.optimizer,
                          loss=self.weighted_binary_crossentropy if self.label_weights is not None else self.loss,
                          metrics=['accuracy'])

            self.model = model

        def V4(self, input_shape, num_labels):
            inputs = Input(shape=input_shape)
            x = inputs
            skip_connections = []

            for dilation in self.dilations:
                residual = x

                x = Conv1D(filters=self.num_filters,
                           kernel_size=self.kernel_size,
                           padding=self.padding,
                           dilation_rate=dilation)(x)
                x = BatchNormalization()(x)
                x = Activation(self.activation)(x)
                x = Dropout(self.dropout_rate)(x)

                if residual.shape[-1] == x.shape[-1]:
                    x = Add()([x, residual])

                skip_connections.append(x)

            if self.use_skip_connections:
                x = Add()(skip_connections)

            x = GlobalAveragePooling1D()(x)
            outputs = Dense(num_labels, activation='sigmoid')(x)

            self.model = Model(inputs, outputs)
            self.model.compile(optimizer=self.optimizer,
                               loss=self.loss,
                               metrics=['accuracy'])

        def show(self):
            conf = ""
            conf += f"window_size: {self.window_size} "
            conf += f"forecast_horizon: {self.forecast_horizon} "
            conf += f"kernel_size: {self.kernel_size} "
            conf += f"num_filters: {self.num_filters} "
            conf += f"dropout_rate: {self.dropout_rate} "
            conf += f"num_stacks: {self.num_stacks} "
            conf += f"dilations: {self.dilations} "
            conf += f"padding: {self.padding} "
            conf += f"use_skip_connections: {self.use_skip_connections} "
            conf += f"verbose : {self.verbose }"
            conf += f"activation: {self.activation}"
            conf += f"return_sequences: {self.return_sequences}"
            conf += f"optimizer: {self.optimizer}"
            conf += f"loss: {self.loss}"
            conf += f"batch_size: {self.batch_size}"
            conf += f"epochs: {self.epochs}"
            conf += f"label_weights: {self.label_weights}"
            print(conf)
    
    class CNNModel(BaseModel):

        def __init__(self, config=None, **kwargs):
            config = config or {}
            config.update(kwargs)

            # Sequence parameters
            self.window_size = self.safe_get(config, 'window_size', 30)
            self.forecast_horizon = self.safe_get(config, 'forecast_horizon', 144)

            # CNN architecture parameters
            self.num_conv_layers = self.safe_get(config, 'num_conv_layers', 2)
            self.num_filters = self.safe_get(config, 'num_filters', 64)
            self.kernel_size = self.safe_get(config, 'kernel_size', 3)
            self.dropout_rate = self.safe_get(config, 'dropout_rate', 0.3)
            self.activation = self.safe_get(config, 'activation', 'relu')
            self.use_batch_norm = self.safe_get(config, 'use_batch_norm', True)
            self.use_global_pooling = self.safe_get(config, 'use_global_pooling', True)
            self.version = self.safe_get(config, 'version', 'V1')  # support multiple architectures

            # Training parameters
            self.optimizer = self.safe_get(config, 'optimizer', 'adam')
            self.loss = self.safe_get(config, 'loss', 'binary_crossentropy')
            self.batch_size = self.safe_get(config, 'batch_size', 32)
            self.epochs = self.safe_get(config, 'epochs', 20)
            self.label_weights = self.safe_get(config, 'label_weights', None)

            # Runtime config
            self.verbose = self.safe_get(config, 'verbose', True)
            self.model = None
            self.history_buffer = []

        def build(self, features, labels):
            X_seq, y_seq = self.generate_sequence(features, labels)
            input_shape = X_seq.shape[1:]
            num_labels = y_seq.shape[1]
            if self.version == 'V1':
                self.V1(input_shape, num_labels)
            elif self.version == 'V2':
                print("V2 is trainnig")
                self.V2(input_shape, num_labels)
            else:
                raise ValueError(f"Unsupported CNN version: {self.version}")
            self.model.fit(X_seq, y_seq, batch_size=self.batch_size, epochs=self.epochs, verbose=self.verbose)

        def V1(self, input_shape, num_labels):
            inputs = Input(shape=input_shape)
            x = inputs

            for _ in range(self.num_conv_layers):
                x = Conv1D(filters=self.num_filters,
                           kernel_size=self.kernel_size,
                           activation=self.activation,
                           padding='same')(x)
                if self.use_batch_norm:
                    x = BatchNormalization()(x)
                x = Dropout(self.dropout_rate)(x)

            if self.use_global_pooling:
                x = GlobalAveragePooling1D()(x)
            else:
                x = Flatten()(x)

            outputs = Dense(num_labels, activation='sigmoid')(x)

            model = Model(inputs, outputs)
            model.compile(
                optimizer=self.optimizer,
                loss=self.loss,
                metrics=['accuracy']
            )

            self.model = model


        def V2(self, input_shape, num_labels):
            """
            Builds a CNN model for sequence data with shape (window_size, num_features).
            Expected input shape: (30, 37)
            """
            inputs = Input(shape=input_shape)
            x = inputs
        
            for _ in range(self.num_conv_layers):
                x = Conv1D(filters=self.num_filters,
                           kernel_size=self.kernel_size,
                           activation=self.activation,
                           padding='same')(x)
                if self.use_batch_norm:
                    x = BatchNormalization()(x)
                x = Dropout(self.dropout_rate)(x)
        
            if self.use_global_pooling:
                x = GlobalAveragePooling1D()(x)
            else:
                x = Flatten()(x)
        
            outputs = Dense(num_labels, activation='sigmoid')(x)
        
            model = Model(inputs, outputs)
            model.compile(
                optimizer=self.optimizer,
                loss=self.loss,
                metrics=['accuracy']
            )
        
            self.model = model

    class Evaluating:
        
        def __init__(self):
            pass

        def evaluate_multilabel_metrics(self, y_true, y_pred, threshold=0.5):
            y_pred_binary = (y_pred > threshold).astype(int)
            label_names = None
            if hasattr(y_true, 'columns'):
                label_names = y_true.columns
                y_true = y_true.to_numpy()
                y_pred_binary = pd.DataFrame(y_pred_binary, columns=label_names).to_numpy()

            print("\n📊 Evaluation Metrics Summary")
            print("=" * 40)

            print(f"✅ Exact Match Accuracy : {accuracy_score(y_true, y_pred_binary):.4f}\n")

            print("🔹 Micro-Averaged Scores")
            print(f"   Precision            : {precision_score(y_true, y_pred_binary, average='micro', zero_division=0):.4f}")
            print(f"   Recall               : {recall_score(y_true, y_pred_binary, average='micro', zero_division=0):.4f}")
            print(f"   F1 Score             : {f1_score(y_true, y_pred_binary, average='micro', zero_division=0):.4f}\n")

            print("🔹 Macro-Averaged Scores")
            print(f"   Precision            : {precision_score(y_true, y_pred_binary, average='macro', zero_division=0):.4f}")
            print(f"   Recall               : {recall_score(y_true, y_pred_binary, average='macro', zero_division=0):.4f}")
            print(f"   F1 Score             : {f1_score(y_true, y_pred_binary, average='macro', zero_division=0):.4f}")

            print("\n📋 Full Classification Report")
            print("-" * 40)
            print(classification_report(y_true, y_pred_binary, target_names=label_names,zero_division=0))

        def remove_low_metric_rows(self, df, columns, threshold=2):
            """
            Remove rows where the number of zero values in the specified columns
            is greater than or equal to the threshold.
        
            Parameters:
                df (pd.DataFrame): Input DataFrame.
                columns (list): List of columns to check for zero values.
                threshold (int): Minimum number of zeros to trigger removal.
        
            Returns:
                clean_df (pd.DataFrame): DataFrame with low-performing rows removed.
            """
            # Only keep columns that actually exist
            valid_columns = [col for col in columns if col in df.columns]
        
            if not valid_columns:
                raise ValueError("None of the specified columns exist in the DataFrame.")
        
            zero_count = (df[valid_columns] == 0).sum(axis=1)
        
            mask = zero_count < threshold
            clean_df = df[mask].reset_index(drop=True)
        
            return clean_df

        def select_best_models(self, df, filter_by: dict = None,group_by: list = None,sort_by: list = ['macro_f1', 'micro_f1','hamming_score'],sort_ascending: list = [True, True, True],top_n: int = 1):
            """
            Select best-performing models based on filters, groupings, and sort criteria.

            Parameters:
                df (pd.DataFrame): DataFrame with model evaluation results.
                filter_by (dict): Optional. Column-value filters, e.g., {'forecast_horizon': [90, 144]}.
                group_by (list): Optional. Columns to group by, e.g., ['version', 'forecast_horizon'].
                sort_by (list): Columns to sort by within each group. Defaults to F1 scores.
                sort_ascending (list): Sort direction per column in sort_by. If None, defaults to all False.
                top_n (int): Number of top models to return per group.

            Returns:
                pd.DataFrame: Best model(s) per group.
            """

            # Fix decimal formatting
            for col in df.columns:
                if df[col].dtype == 'object':
                    df[col] = df[col].str.replace(',', '.', regex=False)

            # Convert numeric columns
            numeric_cols = [
                "forecast_horizon", "macro_f1", "micro_f1", "macro_precision",
                "micro_precision", "macro_recall", "micro_recall", "window_size",
                "num_filters", "kernel_size", "dropout_rate", "batch_size", "epochs", "hamming_score"
            ]
            for col in numeric_cols:
                if col in df.columns:
                    df[col] = pd.to_numeric(df[col], errors='coerce')

            # Apply filters
            if filter_by:
                for key, value in filter_by.items():
                    if isinstance(value, list):
                        df = df[df[key].isin(value)]
                    else:
                        df = df[df[key] == value]

            # Handle sort direction
            if sort_ascending is None:
                sort_ascending = [False] * len(sort_by)

            # Sort and select best per group
            df_sorted = df.sort_values(by=sort_by, ascending=sort_ascending)
            if group_by:
                best_df = df_sorted.groupby(group_by).head(top_n).reset_index(drop=True)
            else:
                best_df = df_sorted.head(top_n).reset_index(drop=True)

            return best_df

    
    class Ploting:
        
        def __init__(self):
            pass
        
        def plot_confusion_matrices(self, y_true, y_pred, labels=None, figsize=(20, 8)):
            """
            Plots 12 confusion matrices (2 rows × 6 columns) for each machine's tool change and quality check labels.

            Args:
                y_true: Ground truth (DataFrame or array)
                y_pred: Predicted binary labels
                labels: Optional list of label names (in order)
                figsize: Size of the entire figure
            """
            if hasattr(y_true, 'columns'):
                label_names = y_true.columns.tolist()
            elif labels is not None:
                label_names = labels
            else:
                label_names = [f"Label {i}" for i in range(y_true.shape[1])]

            n_labels = min(len(label_names), y_true.shape[1])
            y_true = np.array(y_true)
            y_pred = np.array(y_pred)

            fig, axes = plt.subplots(2, 6, figsize=figsize)
            fig.suptitle("Confusion Matrices for Tool Change (Top) and Quality Check (Bottom)", fontsize=16)

            for i in range(n_labels):
                label = label_names[i]
                true_col = y_true[:, i]
                pred_col = y_pred[:, i]

                row = 0 if "tool" in label.lower() else 1
                col = i // 2  # assuming label order is [MC1_tool, MC1_quality, MC2_tool, MC2_quality, ...]

                if true_col.sum() == 0 and pred_col.sum() == 0:
                    axes[row, col].axis("off")
                    axes[row, col].text(0.5, 0.5, f"{label}\n(no data)", ha='center', va='center')
                    continue
                
                cm = confusion_matrix(true_col, pred_col, labels=[0, 1])
                disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[0, 1])
                disp.plot(ax=axes[row, col], values_format='d', colorbar=False)
                axes[row, col].set_title(label, fontsize=10)
                axes[row, col].grid(False)

            plt.tight_layout(rect=[0, 0, 1, 0.95])
            plt.show()


        def plot_training_history(self, history):
            if not hasattr(self, 'history'):
                print("No training history found.")
                return
            plt.figure(figsize=(12, 4))

            plt.subplot(1, 2, 1)
            plt.plot(history['loss'], label='loss')
            if 'val_loss' in history:
                plt.plot(history['val_loss'], label='val_loss')
            plt.title("Loss")
            plt.legend()

            if self.metrics:
                plt.subplot(1, 2, 2)
                metric = self.metrics[0]
                plt.plot(history[metric], label=metric)
                if f'val_{metric}' in history:
                    plt.plot(history[f'val_{metric}'], label=f'val_{metric}')
                plt.title(metric.capitalize())
                plt.legend()

            plt.tight_layout()
            plt.show()

        def cl_test_result_model(self, results, model, exclude_keys=None):
            """
            Plots multiple metrics over runs using Seaborn for better visuals.
        
            Parameters:
                results (list of dict): List of dictionaries with metric names as keys and float values.
                model (str): Model name (e.g., 'TCN', 'LSTM') for plot titles and filenames.
                exclude_keys (list of str): Optional. Additional keys to exclude from plotting.
            """
            # Always exclude these
            default_exclude = ['train_duration', 'predict_duration','run']
            exclude_keys = default_exclude + (exclude_keys or [])
        
            df = pd.DataFrame(results)
        
            # Drop excluded columns
            df = df.drop(columns=[k for k in exclude_keys if k in df.columns], errors='ignore')
        
            # Melt for Seaborn
            df_melted = df.reset_index().melt(id_vars='index', var_name='Metric', value_name='Value')
        
            plt.figure(figsize=(12, 6))
            sns.lineplot(data=df_melted, x='index', y='Value', hue='Metric', marker='o')
            plt.xlabel('Run Index')
            plt.ylabel('Metric Value')
            plt.title(f'{model} Model Performance Trends During Continuous Learning')
            plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
            plt.tight_layout()
            plt.savefig(f'plots/{model}performContinuousLearning.pdf')
            plt.show()

        def plot_binary_prediction_distribution(self, predictions, model_names=None):
            """
            Plots side-by-side bar charts showing binary prediction counts (0s and 1s) for each model.

            Parameters:
                predictions (list of np.ndarray): List of binary prediction arrays (values: 0 or 1)
                model_names (list of str): Names corresponding to each model
            """
            if model_names is None:
                model_names = [f"Model {i+1}" for i in range(len(predictions))]

            counts = []
            for pred in predictions:
                flat = np.round(np.array(pred).flatten())
                zeros = np.sum(flat == 0)
                ones = np.sum(flat == 1)
                counts.append([zeros, ones])

            counts = np.array(counts)
            x = np.arange(len(model_names))
            width = 0.35

            fig, ax = plt.subplots(figsize=(8, 5))
            ax.bar(x - width/2, counts[:, 0], width, label='Predicted 0')
            ax.bar(x + width/2, counts[:, 1], width, label='Predicted 1')

            ax.set_xticks(x)
            ax.set_xticklabels(model_names)
            ax.set_ylabel("Count")
            ax.set_title("Binary Prediction Distribution by Model")
            ax.legend()
            plt.grid(True, axis='y', linestyle='--', alpha=0.5)
            plt.tight_layout()
            fig.savefig(f'plots/models_prediction_distribution.pdf')
            plt.show()

        def plot_hyperparameter_distribution(self, df, model):
            """
            Plots a stacked bar chart of hyperparameter value distributions,
            and prints only the hyperparameters with more than one unique value.
            """
            df = df.copy()

            # Convert lists/arrays to strings so they can be counted
            for col in df.columns:
                if df[col].apply(lambda x: isinstance(x, (list, tuple)) or hasattr(x, '__array__')).any():
                    df[col] = df[col].apply(lambda x: str(x))

            # Filter to columns with multiple values
            variable_columns = [col for col in df.columns if df[col].nunique() > 1]

            if not variable_columns:
                print("ℹ️ All hyperparameters have a single unique value — nothing to plot.")
                return

            print(f"📊 Hyperparameters with >1 value ({len(variable_columns)}):")
            for col in variable_columns:
                unique_vals = df[col].unique()
                print(f" - {col}: {list(unique_vals)}")

            # Plot
            palette = sns.color_palette("muted", 10)
            fig_width = max(1.8 * len(variable_columns), 12)
            fig, ax = plt.subplots(figsize=(fig_width, 6), constrained_layout=True)

            bar_width = 0.8
            x_positions = range(len(variable_columns))

            for idx, col in enumerate(variable_columns):
                counts = df[col].value_counts()
                bottom = 0
                for i, (val, count) in enumerate(counts.items()):
                    ax.bar(idx, count, bottom=bottom, color=palette[i % len(palette)], width=bar_width)
                    ax.text(idx, bottom + count / 2, f'{val} ({count})',
                            ha='center', va='center', fontsize=10, color='black', fontweight='bold')
                    bottom += count

            ax.set_xticks(x_positions)
            ax.set_xticklabels(variable_columns, rotation=45, ha='right')
            ax.set_ylabel("Count")
            ax.set_title(f"Distribution of Hyperparameter Values for {model}")
            fig.savefig(f'plots/{model}paramdistro.pdf')

            plt.show()

        def calculate_metrics(models, predictions, y_true, threshold=0.5, show_plot=True):
            """
            Calculate and compare model performance metrics.

            Parameters:
                models (list of str): Names of models
                predictions (list of np.ndarray): Predictions for each model (probabilities or binary)
                y_true (np.ndarray): Ground truth labels
                threshold (float): Threshold to binarize predictions if needed
                show_plot (bool): Whether to display a bar chart of the results

            Returns:
                pd.DataFrame: DataFrame with metrics for each model

            Usage:
                model_names = ['TCN', 'LSTM', 'CNN']
                preds = [tcn_y_pred, lsmt_y_pred, cnn_y_pred]

                df_metrics = calculate_metrics(model_names, preds, y_true)
            """
            results = []

            for name, pred in zip(models, predictions):
                pred_bin = (np.array(pred) > threshold).astype(int)

                metrics = {
                    "Model": name,
                    "Macro F1": f1_score(y_true, pred_bin, average='macro', zero_division=0),
                    "Micro F1": f1_score(y_true, pred_bin, average='micro', zero_division=0),
                    "Macro Precision": precision_score(y_true, pred_bin, average='macro', zero_division=0),
                    "Micro Precision": precision_score(y_true, pred_bin, average='micro', zero_division=0),
                    "Macro Recall": recall_score(y_true, pred_bin, average='macro', zero_division=0),
                    "Micro Recall": recall_score(y_true, pred_bin, average='micro', zero_division=0),
                    "Hamming Score": 1 - hamming_loss(y_true, pred_bin)
                }
                results.append(metrics)

            df = pd.DataFrame(results)

            if show_plot:
                df_melted = df.melt(id_vars="Model", var_name="Metric", value_name="Score")
                plt.figure(figsize=(12, 6))
                sns.barplot(data=df_melted, x="Metric", y="Score", hue="Model")
                plt.title("Model Performance Comparison")
                plt.xticks(rotation=30, ha='right')
                plt.tight_layout()
                plt.savefig(f'plots/metricsbarchart.pdf')
                plt.show()

            return df
        

        def calculate_per_label_f1(self, models, predictions, y_true, threshold=0.5):
            """
            Plots per-label macro F1 scores for multiple models.

            Parameters:
                models (list of str): List of model names
                predictions (list of np.ndarray): List of prediction arrays, shape (n_samples, n_labels)
                y_true (np.ndarray): Ground truth array, shape (n_samples, n_labels)
                threshold (float): Binarization threshold

            Returns:
                pd.DataFrame: Per-label F1 scores for each model

            Usage: 
                model_names = ['TCN', 'LSTM', 'CNN']
                preds = [tcn_y_pred, lsmt_y_pred, cnn_y_pred]
                
                df_label_f1 = model.plot.calculate_per_label_f1(model_names, preds, y_true)
            """
            label_names = [
                "MC1_tool", "MC1_quality", "MC2_tool", "MC2_quality",
                "MC3_tool", "MC3_quality", "MC4_tool", "MC4_quality",
                "MC5_tool", "MC5_quality", "MC6_tool", "MC6_quality"
            ]
            results = []

            for name, pred in zip(models, predictions):
                pred_bin = (np.array(pred) > threshold).astype(int)
                for i, label in enumerate(label_names):
                    score = f1_score(y_true[:, i], pred_bin[:, i], zero_division=0)
                    results.append({
                        'Model': name,
                        'Label': label,
                        'F1 Score': score
                    })

            df = pd.DataFrame(results)


            # Plot
            plt.figure(figsize=(14, 6))
            sns.barplot(data=df, x='Label', y='F1 Score', hue='Model')
            plt.title("Per-Label F1 Score Comparison Across Models")
            plt.xticks(rotation=45, ha='right')
            plt.tight_layout()
            plt.savefig(f'plots/f1scoreovemachineandmodel.pdf')
            plt.show()

            return df


    def __init__(self, config=None):
        config = config or {}
        self.arch = config.get("arch", "tcn").lower()

        self.data_preparation = self.DataPreparation()
        self.plot = self.Ploting()
        self.evaluating = self.Evaluating()

        if self.arch == "tcn":
            self.model = self.TCNModel(config=config)
        elif self.arch == "lstm":
            self.model = self.LSTMModel(config=config)
        elif self.arch == "cnn":
            self.model = self.CNNModel(config=config)
        else:
            raise ValueError(f"Unsupported architecture: {arch}")

    def fit(self, data):
        features, labels = self.data_preparation.get_train_data(data)
        self.model.build(features, labels)

    def fit_prepared(self,data):
            self.model.build(data['X'], data['y'])
      

    def predict(self, x):
        #x = self.data_preparation.get_predict_row(x)
        return self.model.model.predict(x)
    

    def train_validate(self, df, train_size=0.7, validate_size=0.15):
        train, validate, test, label_names = self.data_preparation.split_data(df, train_size, validate_size)
    
        self.model.build(train['X'], train['y'])
    
        X_val_seq, y_val_seq = self.model.generate_sequence(validate['X'], validate['y'])
        val_preds = self.model.predict(X_val_seq)
    
        X_test_seq, y_test_seq = self.model.generate_sequence(test['X'], test['y'])
        test_preds = self.model.predict(X_test_seq)
    
        return {
            'validate': {'y_true': y_val_seq, 'y_pred': val_preds, 'label_names': label_names},
            'test': {'y_true': y_test_seq, 'y_pred': test_preds, 'label_names': label_names}
        }
    
    def train_validate_prepared(self, train, validate, label_names):
       
        self.model.build(train['X'], train['y'])
    
        X_val_seq, y_val_seq = self.model.generate_sequence(validate['X'], validate['y'])
        val_preds = self.model.model.predict(X_val_seq)
    
        return {
            'validate': {'y_true': y_val_seq, 'y_pred': val_preds, 'label_names': label_names},
        }

    def confusion_matrices(self,y_true, y_pred, label_names):
        self.evaluating.evaluate_multilabel_metrics(y_true=y_true,y_pred=y_pred)
        self.plot.plot_confusion_matrices(y_true=y_true,y_pred=y_pred,labels=label_names)


    def grid_search(self,  train, validate, label_names, param_grid, threshold=0.5, save_path=None):
        """
        Runs grid search on TCN model parameters using pre-split training and validation sets.

        Args:
            train (dict): Dictionary containing training data with keys 'X' and 'y'
            validate (dict): Dictionary containing validation data with keys 'X' and 'y'
            label_names (list): List of label column names
            param_grid (dict): Dict of parameters to test (e.g. {'window_size': [30, 60], 'num_filters': [32, 64]})
            threshold (float): Threshold for binarizing predictions
            save_path (str): Optional path to save results as CSV

        Returns:
            List[dict]: A list of result dictionaries for each parameter combination, including macro/micro metrics.
        """
        results = []
        param_combos = list(itertools.product(*param_grid.values()))
        param_names = list(param_grid.keys())
        total = 192
        current = 1

        for combo in param_combos:
            config = dict(zip(param_names, combo))
            print(f"🔍model {current}/{total} Testing config: {config}")
            current += 1

            result = self._evaluate_single_config(config, train, validate, label_names, threshold)
            results.append(result)

        if save_path:
            df = pd.DataFrame(results)
            df.to_csv(save_path, index=False)
            print(f"📁 Results saved to: {save_path}")

        return results


    def _evaluate_single_config(self, config, train, validate, label_names, threshold):
        try:
            model = ModelFactory(config)
            res = model.train_validate_prepared(train, validate, label_names)
            y_true = res['validate']['y_true']
            y_pred = (res['validate']['y_pred'] > threshold).astype(int)
            if self.arch == 'lstm':
                y_pred = (y_pred > 0.5).astype(int)

            metrics = {
                'macro_f1': f1_score(y_true, y_pred, average='macro', zero_division=0),
                'micro_f1': f1_score(y_true, y_pred, average='micro', zero_division=0),
                'macro_precision': precision_score(y_true, y_pred, average='macro', zero_division=0),
                'micro_precision': precision_score(y_true, y_pred, average='micro', zero_division=0),
                'macro_recall': recall_score(y_true, y_pred, average='macro', zero_division=0),
                'micro_recall': recall_score(y_true, y_pred, average='micro', zero_division=0),
                'hamming_score': 1 - hamming_loss(y_true, y_pred),
                'accuracy': accuracy_score(y_true, y_pred),
               
            }

            # Flatten config into top-level keys
            combined_result = {**config, **metrics}
            return combined_result

        except Exception as e:
            print(f"❌ Failed for config {config}: {e}")
            return {**config, 'error': str(e)}
        
    def model_perform(self, path, top_n: int = 1):
        columns_to_check = [
        'accuracy',
        'micro_f1',
        'macro_f1',
        'macro_precision',
        'micro_precision',
        'macro_recall',
        'micro_recall',
        'hamming_score',
        'label_weights',
        ]
        df = pd.read_csv(path)   
        res = self.evaluating.remove_low_metric_rows(df,columns_to_check)
        config = self.evaluating.select_best_models(res, top_n=top_n)
        configs = config.drop(columns_to_check, axis=1, errors='ignore')
        if top_n > 1:
            self.plot.plot_hyperparameter_distribution(configs, self.arch)

        return config, configs

    def run_continual_training_with_labels(self,cl_train, cl_test, cl_list):
        """
        Run continual learning evaluation and return both overall and per-label results.

        Parameters:
            model_factory: Instance of ModelFactory (e.g., cl_lstm)
            cl_test: dict with 'X' and 'y' for initial evaluation and update
            cl_list: list of (train, test) batches
            label_names: list of str, column names for labels
            model_name: str, used for labeling plots and output

        Returns:
            overall_results: list of dicts, metrics for each run
            label_results: list of dicts, per-label F1/Precision/Recall/Accuracy per run
        """
        overall_results = []
        label_results = []

        def evaluate(run_idx, y_true, y_pred, label_names):
            y_pred_bin = (y_pred > 0.5).astype(int)
            metrics = {
                'run': run_idx,
                'macro_f1': f1_score(y_true, y_pred_bin, average='macro', zero_division=0),
                'micro_f1': f1_score(y_true, y_pred_bin, average='micro', zero_division=0),
                'macro_precision': precision_score(y_true, y_pred_bin, average='macro', zero_division=0),
                'micro_precision': precision_score(y_true, y_pred_bin, average='micro', zero_division=0),
                'macro_recall': recall_score(y_true, y_pred_bin, average='macro', zero_division=0),
                'micro_recall': recall_score(y_true, y_pred_bin, average='micro', zero_division=0),
                'accuracy': accuracy_score(y_true, y_pred_bin)
            }

            # Add learning rate
            try:
                optimizer = self.model.model.optimizer
                if hasattr(optimizer, '_decayed_lr'):
                    lr = optimizer._decayed_lr(tf.float32).numpy()
                else:
                    lr = optimizer.learning_rate.numpy()
                metrics['learning_rate'] = lr
            except Exception as e:
                metrics['learning_rate'] = None  # fallback if something goes wrong

            # Per-label breakdown
            for i, label in enumerate(label_names):
                label_metrics = {
                    'run': run_idx,
                    'label': label,
                    'f1': f1_score(y_true[:, i], y_pred_bin[:, i], zero_division=0),
                    'precision': precision_score(y_true[:, i], y_pred_bin[:, i], zero_division=0),
                    'recall': recall_score(y_true[:, i], y_pred_bin[:, i], zero_division=0),
                    'accuracy': accuracy_score(y_true[:, i], y_pred_bin[:, i])
                }
                label_results.append(label_metrics)

            return metrics

        # Initial evaluation before any training
        self.fit_prepared(cl_train)
        X_val_seq, y_true = self.model.generate_sequence(cl_test['X'], cl_test['y'])
        y_pred = self.model.model.predict(X_val_seq)
        label_names = [col for col in cl_list[0][2] if col != "Timestamp"]
        overall_results.append(evaluate(-1, y_true, y_pred,label_names))
        self.fit_prepared(cl_test)

        # Run over batches
        for i, (train, test,_) in enumerate(cl_list):
            self.fit_prepared(train)
            X_val_seq, y_true = self.model.generate_sequence(test['X'], test['y'])
            y_pred = self.model.model.predict(X_val_seq)


            metrics = evaluate(i, y_true, y_pred,label_names)

            overall_results.append(metrics)
            self.fit_prepared(test)

        self.plot.cl_test_result_model(overall_results, self.arch)
        os.makedirs("results", exist_ok=True)
        overall_df = pd.DataFrame(overall_results)
        label_df = pd.DataFrame(label_results)

        overall_df.to_csv(f"results/{ self.arch}_overall_results.csv", index=False)
        label_df.to_csv(f"results/{ self.arch}_label_results.csv", index=False)

        return overall_results, label_results


## Data


In [2]:
import pickle
import time
import os
def load_pickle(path):
    with open(path, 'rb')as f:
        return pickle.load(f) 

In [39]:
label_names = [
    "MC1_tool", "MC1_quality", "MC2_tool", "MC2_quality",
    "MC3_tool", "MC3_quality", "MC4_tool", "MC4_quality",
    "MC5_tool", "MC5_quality", "MC6_tool", "MC6_quality"
]

In [None]:
tools = ModelFactory()
year_data = pd.read_csv('../simulations/logs/year/line_log.csv')


FileNotFoundError: [Errno 2] No such file or directory: '../simulations/logs/year/linelog.csv'

In [None]:
gs_train = load_pickle("data/gs_train.pkl")
gs_validate = load_pickle("data/gs_validate.pkl")
gs_test = load_pickle("data/gs_test.pkl")
labels = load_pickle("data/labels.pkl")

## Models and Configurations

In [None]:
base = ModelFactory()
label = pd.DataFrame(gs_train['y'])
label.drop(columns=['Timestamp'], inplace=True)
lw = base.data_preparation.compute_label_weights(label)

## TCN

In [None]:
tcn_base_config = {
    'arch':'tcn',
    'window_size': 30, 
    'forecast_horizon': 144, 
    'num_filters': 32, 
    'kernel_size': 2, 
    'dropout_rate': 0.3, 
    'activation': 'relu', 
    'dilations': '[1. 2. 4. 8.]'
}

### Grid Search Test

In [None]:
tcn_param_grid = {
    
    'window_size': [30, 60],                      
    'forecast_horizon': [90, 144],       
    'num_filters': [32, 64],                     
    'kernel_size': [2, 3],                        
    'dropout_rate': [0.1, 0.3],                   
    'activation': ['relu', 'tanh', 'elu'],        
    'dilations': [[1,2,4,8],[1,2,4,8,16,32]],
    'label_weights': [lw],

}

In [None]:
tcn = ModelFactory(tcn_base_config)


save_dir = "grid_search_results"
os.makedirs(save_dir, exist_ok=True)  

save_path = os.path.join(save_dir, "gs_tcn_res_test.csv")
results = tcn.grid_search(gs_train, gs_validate, labels, tcn_base_config ,save_path=f'grid_search_results/gs_tcn_res_test.csv')

# Sort by best macro F1
sorted_results = sorted(results, key=lambda x: x.get('macro_f1', 0), reverse=True)

def safe_fmt(x):
    return f"{x:.4f}" if isinstance(x, (float, int)) else str(x)

for r in sorted_results[:5]:  # Top 5 configs
    print(f"   Macro F1: {safe_fmt(r.get('macro_f1', '-'))}, "
          f"Precision: {safe_fmt(r.get('macro_precision', '-'))}, "
          f"Recall: {safe_fmt(r.get('macro_recall', '-'))}")
    print(f"   Micro F1: {safe_fmt(r.get('micro_f1', '-'))}, "
          f"Precision: {safe_fmt(r.get('micro_precision', '-'))}, "
          f"Recall: {safe_fmt(r.get('micro_recall', '-'))}")
    print(f"Hamming : {safe_fmt(r.get('hamming_score', '-'))}")

In [None]:
tcn.model_perform('grid_search_results/gs_tcn_res_test.csv',10)

In [None]:


best_conf = tcn.model_perform('grid_search_results/gs_tcn_res_test.csv',1)
best_conf[0].drop(['macro_f1','micro_f1','macro_precision', 'micro_precision', 
              'macro_recall', 'micro_recall','hamming_score', 'accuracy'], 
             axis=1, inplace=True)
best_conf[0]['arch'] = 'tcn'


tcn_conf = best_conf[0].iloc[0].to_dict()
print(tcn_conf)

In [None]:
tcn_compare = ModelFactory(tcn_conf)
tcn_compare.fit_prepared(gs_train)
with open('models/tcn.pkl','wb') as f:
    pickle.dump(tcn_compare, f)

### Continual Learning Test

In [None]:
cl_tcn = ModelFactory(tcn_conf)
cl_tcn.fit_prepared(cl_train)
results = []
X_val_seq, y_true = cl_tcn.model.generate_sequence(cl_test['X'], cl_test['y'])
y_pred = cl_tcn.model.model.predict(X_val_seq)

metrics = {
                'macro_f1': f1_score(y_true, y_pred, average='macro', zero_division=0),
                'micro_f1': f1_score(y_true, y_pred, average='micro', zero_division=0),
                'macro_precision': precision_score(y_true, y_pred, average='macro', zero_division=0),
                'micro_precision': precision_score(y_true, y_pred, average='micro', zero_division=0),
                'macro_recall': recall_score(y_true, y_pred, average='macro', zero_division=0),
                'micro_recall': recall_score(y_true, y_pred, average='micro', zero_division=0),
                'hamming_score': 1 - hamming_loss(y_true, y_pred),
                'accuracy': accuracy_score(y_true, y_pred),
               
            }
results.append(metrics)
cl_tcn.fit_prepared(cl_test)

for batch in cl_list:
    train = batch[0]
    test = batch[1]

    start_train = time.time()
    cl_tcn.fit_prepared(train)
    mark = time.time()

    X_val_seq, y_true = cl_tcn.model.generate_sequence(test['X'], test['y'])
    y_pred = cl_tcn.model.model.predict(X_val_seq)
    end_predict = time.time() - mark
    metrics = {
                'macro_f1': f1_score(y_true, y_pred, average='macro', zero_division=0),
                'micro_f1': f1_score(y_true, y_pred, average='micro', zero_division=0),
                'macro_precision': precision_score(y_true, y_pred, average='macro', zero_division=0),
                'micro_precision': precision_score(y_true, y_pred, average='micro', zero_division=0),
                'macro_recall': recall_score(y_true, y_pred, average='macro', zero_division=0),
                'micro_recall': recall_score(y_true, y_pred, average='micro', zero_division=0),
                'hamming_score': 1 - hamming_loss(y_true, y_pred),
                'train_duration':mark-start_train,
                'predict_duration':end_predict
               
            }
    results.append(metrics)
    cl_tcn.fit_prepared(test)

cl_tcn.plot.cl_test_result_model(results,'TCN')

In [None]:
cl_tcn = None
cl_tcn = ModelFactory(tcn_conf)
cl_tcn.run_continual_training_with_labels(cl_train, cl_test, cl_list)

In [None]:
cl_tcn = None
cl_tcn = ModelFactory(tcn_conf)
cl_tcn.run_continual_training_with_labels(cl_train,cl_test,cl_list)


In [None]:
tcn = ModelFactory(tcn_config)
tcn.fit_prepared(gs_train)

with open('models/tcn.pkl','wb') as f:
    pickle.dump(tcn, f)

## CNN

In [None]:
cnn_base_config = {
    'arch': 'cnn',
    'window_size': 30,
    'forecast_horizon': 1,
    'num_conv_layers': 2,
    'num_filters': 64,
    'kernel_size': 3,
    'dropout_rate': 0.3,
    'activation': 'relu',
    'use_batch_norm': True,
    'use_global_pooling': True,
    'version': 'V1',
    'optimizer': 'adam',
    'loss': 'binary_crossentropy',
    'batch_size': 32,
    'epochs': 20,
    'label_weights': None,
    'verbose': True
}

### Grid Search Test

In [None]:
cnn_param_grid = {
    'arch': ['cnn'],
    'window_size': [30, 60],

    'num_conv_layers': [1, 2, 3],
    'num_filters': [32, 64],
    'kernel_size': [2, 3],
    'dropout_rate': [0.1, 0.3],
    'activation': ['relu', 'elu'],
    'use_batch_norm': [True, False],
    'use_global_pooling': [True], 
    'version': ['V1'],

    'batch_size': [32],
    'epochs': [10],  
    'label_weights': [lw],
}


In [None]:
cnn = ModelFactory(cnn_base_config)


save_dir = "grid_search_results"
os.makedirs(save_dir, exist_ok=True)  

save_path = os.path.join(save_dir, "gs_cnn_res_test.csv")
results = cnn.grid_search(gs_train, gs_validate, labels, cnn_param_grid ,save_path=f'grid_search_results/gs_cnn_res_test.csv')

sorted_results = sorted(results, key=lambda x: x.get('macro_f1', 0), reverse=True)

def safe_fmt(x):
    return f"{x:.4f}" if isinstance(x, (float, int)) else str(x)

for r in sorted_results[:5]: 
    print(f"   Macro F1: {safe_fmt(r.get('macro_f1', '-'))}, "
          f"Precision: {safe_fmt(r.get('macro_precision', '-'))}, "
          f"Recall: {safe_fmt(r.get('macro_recall', '-'))}")
    print(f"   Micro F1: {safe_fmt(r.get('micro_f1', '-'))}, "
          f"Precision: {safe_fmt(r.get('micro_precision', '-'))}, "
          f"Recall: {safe_fmt(r.get('micro_recall', '-'))}")
    print(f"Hamming : {safe_fmt(r.get('hamming_score', '-'))}")

In [None]:
_ = cnn.model_perform('grid_search_results/gs_cnn_res_test.csv',10)

In [None]:
best_conf = cnn.model_perform('grid_search_results/gs_cnn_res_test.csv',1)
best_conf[0].drop(['macro_f1','micro_f1','macro_precision', 'micro_precision', 
              'macro_recall', 'micro_recall','hamming_score', 'accuracy'], 
             axis=1, inplace=True)
best_conf[0]['arch'] = 'cnn'


cnn_conf = best_conf[0].iloc[0].to_dict()
print(cnn_conf)

In [None]:
cnn_compare = ModelFactory(cnn_conf)
cnn_compare.fit_prepared(gs_train)
with open('models/cnn.pkl','wb') as f:
    pickle.dump(cnn_compare, f)

### Continual Learning Test

In [None]:
cl_cnn = ModelFactory(cnn_conf)

cl_cnn.fit_prepared(cl_train)

results = []
X_val_seq, y_true = cl_cnn.model.generate_sequence(cl_test['X'], cl_test['y'])
y_pred = cl_cnn.model.model.predict(X_val_seq)
y_pred = (y_pred > 0.5).astype(int)

metrics = {
                'macro_f1': f1_score(y_true, y_pred, average='macro', zero_division=0),
                'micro_f1': f1_score(y_true, y_pred, average='micro', zero_division=0),
                'macro_precision': precision_score(y_true, y_pred, average='macro', zero_division=0),
                'micro_precision': precision_score(y_true, y_pred, average='micro', zero_division=0),
                'macro_recall': recall_score(y_true, y_pred, average='macro', zero_division=0),
                'micro_recall': recall_score(y_true, y_pred, average='micro', zero_division=0),
                'hamming_score': 1 - hamming_loss(y_true, y_pred),
                
            }

results.append(metrics)
cl_cnn.fit_prepared(cl_test)

for batch in cl_list:
    train = batch[0]
    test = batch[1]

    start_train = time.time()
    cl_cnn.fit_prepared(train)
    mark = time.time()

    X_val_seq, y_true = cl_cnn.model.generate_sequence(test['X'], test['y'])
    y_pred = cl_cnn.model.model.predict(X_val_seq)
    end_predict = time.time() - mark
    y_pred = (y_pred > 0.5).astype(int)
    metrics = {
                'macro_f1': f1_score(y_true, y_pred, average='macro', zero_division=0),
                'micro_f1': f1_score(y_true, y_pred, average='micro', zero_division=0),
                'macro_precision': precision_score(y_true, y_pred, average='macro', zero_division=0),
                'micro_precision': precision_score(y_true, y_pred, average='micro', zero_division=0),
                'macro_recall': recall_score(y_true, y_pred, average='macro', zero_division=0),
                'micro_recall': recall_score(y_true, y_pred, average='micro', zero_division=0),
                'hamming_score': 1 - hamming_loss(y_true, y_pred),
                'train_duration':mark-start_train,
                'predict_duration':end_predict
               
            }
    results.append(metrics)
    cl_cnn.fit_prepared(test)

cl_cnn.plot.cl_test_result_model(results,'CNN')

In [None]:
cl_cnn = None
cl_cnn = ModelFactory(cnn_conf)
cl_cnn.run_continual_training_with_labels(cl_train, cl_test, cl_list)

## LSTM

In [None]:
lstm_base_config = {
    "arch": "lstm",
    "window_size": 30,
    "forecast_horizon": 1,
    "num_units": 64,
    "dropout_rate": 0.3,
    "num_layers": 1,
    "bidirectional": True,
    "return_sequences": False,
    "activation": "tanh",  
    "optimizer": "adam",
    "loss": "binary_crossentropy",
    "batch_size": 32,
    "epochs": 20,
    "label_weights": None,
    "verbose": True,
}

### Grid Search Test

In [None]:
lstm_param_grid = {
    'arch': ['lstm'],
    'window_size': [30, 60],
    'forecast_horizon': [1, 10],
    'num_units': [32, 64, 128],
    'dropout_rate': [0.2, 0.4],
    'num_layers': [1, 2],
    'bidirectional': [False, True],
    'activation': ['tanh'],        
    'optimizer': ['adam'],
    'loss': ['binary_crossentropy'],
    'batch_size': [32],
    'epochs': [20]
}


In [None]:
lstm = ModelFactory(lstm_base_config)


save_dir = "grid_search_results"
os.makedirs(save_dir, exist_ok=True)  

save_path = os.path.join(save_dir, "gs_lstm_res_test.csv")
results = lstm.grid_search(gs_train, gs_validate, labels, lstm_param_grid ,save_path=f'grid_search_results/gs_lstm_res_test.csv')

# Sort by best macro F1
sorted_results = sorted(results, key=lambda x: x.get('macro_f1', 0), reverse=True)

def safe_fmt(x):
    return f"{x:.4f}" if isinstance(x, (float, int)) else str(x)

for r in sorted_results[:5]:  # Top 5 configs
    print(f"   Macro F1: {safe_fmt(r.get('macro_f1', '-'))}, "
          f"Precision: {safe_fmt(r.get('macro_precision', '-'))}, "
          f"Recall: {safe_fmt(r.get('macro_recall', '-'))}")
    print(f"   Micro F1: {safe_fmt(r.get('micro_f1', '-'))}, "
          f"Precision: {safe_fmt(r.get('micro_precision', '-'))}, "
          f"Recall: {safe_fmt(r.get('micro_recall', '-'))}")
    print(f"Hamming : {safe_fmt(r.get('hamming_score', '-'))}")

In [None]:
_ = lstm.model_perform('grid_search_results/gs_lstm_res_test.csv',10)

In [None]:
best_conf = lstm.model_perform('grid_search_results/gs_lstm_res_test.csv',1)
best_conf[0].drop(['macro_f1','micro_f1','macro_precision', 'micro_precision', 
              'macro_recall', 'micro_recall','hamming_score', 'accuracy'], 
             axis=1, inplace=True)
best_conf[0]['arch'] = 'lstm'


lstm_conf = best_conf[0].iloc[0].to_dict()

In [None]:
lstm_compare = ModelFactory(lstm_conf)
lstm_compare.fit_prepared(gs_train)
with open('models/lstm.pkl','wb') as f:
    pickle.dump(lstm_compare, f)

### Continual Learning Test

In [None]:
cl_lstm = ModelFactory(lstm_conf)
cl_lstm.fit_prepared(cl_train)
results = []
X_val_seq, y_true = cl_lstm.model.generate_sequence(cl_test['X'], cl_test['y'])
y_pred = cl_lstm.model.model.predict(X_val_seq)
y_pred = (y_pred > 0.5).astype(int)
metrics = {
                'run': -1,
                'macro_f1': f1_score(y_true, y_pred, average='macro', zero_division=0),
                'micro_f1': f1_score(y_true, y_pred, average='micro', zero_division=0),
                'macro_precision': precision_score(y_true, y_pred, average='macro', zero_division=0),
                'micro_precision': precision_score(y_true, y_pred, average='micro', zero_division=0),
                'macro_recall': recall_score(y_true, y_pred, average='macro', zero_division=0),
                'micro_recall': recall_score(y_true, y_pred, average='micro', zero_division=0),
                'hamming_score': 1 - hamming_loss(y_true, y_pred),
                
            }

results.append(metrics)
cl_lstm.fit_prepared(cl_test)


for i, batch in enumerate(cl_list):
    train = batch[0]
    test = batch[1]

    start_train = time.time()
    cl_lstm.fit_prepared(train)
    mark = time.time()

    X_val_seq, y_true = cl_lstm.model.generate_sequence(test['X'], test['y'])
    y_pred = cl_lstm.model.model.predict(X_val_seq)
    end_predict = time.time() - mark
    y_pred = (y_pred > 0.5).astype(int)
    metrics = {
                'run' : i,
                'macro_f1': f1_score(y_true, y_pred, average='macro', zero_division=0),
                'micro_f1': f1_score(y_true, y_pred, average='micro', zero_division=0),
                'macro_precision': precision_score(y_true, y_pred, average='macro', zero_division=0),
                'micro_precision': precision_score(y_true, y_pred, average='micro', zero_division=0),
                'macro_recall': recall_score(y_true, y_pred, average='macro', zero_division=0),
                'micro_recall': recall_score(y_true, y_pred, average='micro', zero_division=0),
                'hamming_score': 1 - hamming_loss(y_true, y_pred),
                'train_duration':mark-start_train,
                'predict_duration':end_predict
               
            }
    results.append(metrics)
    cl_lstm.fit_prepared(test)

cl_lstm.plot.cl_test_result_model(results,'LSTM')

In [35]:
cnn_conf = {'arch': 'cnn', 'window_size': 60, 'num_conv_layers': 2, 'num_filters': 32, 'kernel_size': 2, 'dropout_rate': 0.1, 'activation': 'relu', 'use_batch_norm': True, 'use_global_pooling': True, 'version': 'V1', 'batch_size': 32, 'epochs': 10, 'label_weights': lw}
lstm_conf = {'arch': 'lstm','window_size': 30,'forecast_horizon': 144,'num_units': 32,'dropout_rate': 0.2,'num_layers': 2,'bidirectional': True,'activation': 'tanh','optimizer': 'adam','loss': 'binary_crossentropy','batch_size': 32,'epochs': 20}
tcn_conf = {'arch':'tcn','window_size': 30, 'forecast_horizon': 144, 'num_filters': 32, 'kernel_size': 2, 'dropout_rate': 0.3, 'activation': 'elu', 'dilations': '[1. 2. 4. 8. 16. 32]','label_weights': lw}

In [None]:
cl_lstm = None
cl_lstm = ModelFactory(lstm_conf)
cl_lstm.run_continual_training_with_labels(cl_train,cl_test,cl_list)

# Model Comparition

In [None]:
X_val_seq, y_true = cnn.model.generate_sequence(gs_test['X'], gs_test['y'])
tcn_y_pred = tcn.model.model.predict(X_val_seq)
lsmt_y_pred = lstm.model.model.predict(X_val_seq)
lsmt_y_pred = (lsmt_y_pred > 0.5).astype(int)
cnn_y_pred = cnn.model.model.predict(X_val_seq)
cnn_y_pred = (cnn_y_pred > 0.5).astype(int)

In [None]:
model_names = ['TCN', 'LSTM', 'CNN']
preds = [tcn_y_pred, lsmt_y_pred, cnn_y_pred]

In [None]:
df_metrics = tcn.plot.calculate_metrics(model_names, preds, y_true)
display(df_metrics)

In [None]:
df_label_f1 = tcn.plot.calculate_per_label_f1(model_names, preds, y_true)

In [None]:
tcn.plot.plot_binary_prediction_distribution(preds, model_names)


In [None]:
import matplotlib.pyplot as plt

def compare_models_combined(df, metric_cols,path=None):
    """
    Plot selected metrics on the same figure for each model across runs.

    Parameters:
        df (pd.DataFrame): DataFrame containing 'run', 'model', and the metric columns.
        metric_cols (list of str): List of metric column names to plot (e.g., ['macro_f1', 'accuracy'])
    """
    required_cols = {'run', 'model'}
    missing = [col for col in required_cols if col not in df.columns]
    if missing:
        raise ValueError(f"DataFrame is missing required columns: {missing}")

    plt.figure(figsize=(12, 6))
    
    for model_name, group in df.groupby('model'):
        for metric in metric_cols:
            if metric not in group.columns:
                print(f"⚠️ Skipping: '{metric}' not found in DataFrame for model '{model_name}'")
                continue
            label = f"{model_name} - {metric}"
            plt.plot(group['run'], group[metric], marker='o', label=label)

    plt.title('Model Comparison Over Runs')
    plt.xlabel('Run')
    plt.ylabel('Metric Value')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    if path:
        plt.savefig(f'plots/{path}.pdf')
    plt.show()

In [None]:
cl_res_1 = pd.read_csv('results/cnn_overall_results.csv')
cl_res_1['model'] = 'cnn'
cl_res_2 = pd.read_csv('results/tcn_overall_results.csv')
cl_res_2['model'] = 'tcn'
cl_res_3 = pd.read_csv('results/lstm_overall_results.csv')
cl_res_3['model'] = 'lstm'

cl_total_res = pd.concat([cl_res_1,cl_res_2,cl_res_3], ignore_index=True)

Index(['run', 'macro_f1', 'micro_f1', 'macro_precision', 'micro_precision',
       'macro_recall', 'micro_recall', 'accuracy', 'model'],
      dtype='object')

In [None]:
compare_models_combined(cl_total_res,['macro_f1'] ,'Macrof1contlearning')

In [None]:
compare_models_combined(cl_total_res,['micro_f1'] ,'Macrof1contlearning')