<a href="https://colab.research.google.com/github/OneFineStarstuff/Pinn/blob/main/AGISystemSTEM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import logging
from datetime import datetime
from typing import Any, Callable, Dict, Optional, Tuple

import numpy as np
import tensorflow as tf
from tensorflow.keras import Model, layers
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.distribute import MultiWorkerMirroredStrategy

# --- Custom Exceptions for better error handling ---
class ModelInitializationError(Exception):
    """Custom exception for errors during model initialization."""
    pass

class ModelNotFoundError(Exception):
    """Custom exception for when a model cannot be found."""
    pass

class AGISystemSTEM:
    """
    An advanced framework for managing the lifecycle of machine learning models,
    featuring versioning, distributed training support, and TensorBoard logging.
    """
    def __init__(self, model_loader: Optional[Callable] = None, distributed_training: bool = False) -> None:
        """
        Initializes the AGI system.

        Args:
            model_loader (Optional[Callable]): A function to load or create model architectures.
                                               This allows for flexible model definition.
            distributed_training (bool): If True, enables distributed training using
                                         TensorFlow's MultiWorkerMirroredStrategy.
                                         Note: This requires setting up the TF_CONFIG
                                         environment variable on each worker node.
        """
        self.models: Dict[str, Model] = {}  # Caches loaded models in memory
        self.model_loader = model_loader
        self.distributed_training = distributed_training
        self.strategy = self._initialize_strategy()

        logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
        logging.info("AGISystemSTEM initialized.")
        logging.info(f"Using TensorFlow version: {tf.__version__}")
        logging.info(f"Distributed training enabled: {self.distributed_training}")

    def _initialize_strategy(self) -> Optional[MultiWorkerMirroredStrategy]:
        """Initializes the distribution strategy if enabled."""
        if self.distributed_training:
            # For MultiWorkerMirroredStrategy to work, the TF_CONFIG environment
            # variable must be configured on each machine in the cluster.
            # See: https://www.tensorflow.org/tutorials/distribute/multi_worker_with_keras
            try:
                return MultiWorkerMirroredStrategy()
            except RuntimeError as e:
                logging.error(f"Failed to initialize MultiWorkerMirroredStrategy. Ensure TF_CONFIG is set correctly. Error: {e}")
                return None
        return None

    def get_model(self, model_name: str) -> Model:
        """
        Retrieves a model from the cache or loads it using the model_loader.

        Args:
            model_name (str): The name of the model to retrieve or load.

        Returns:
            Model: The compiled Keras model.

        Raises:
            ModelInitializationError: If no model_loader is provided.
            ModelNotFoundError: If the model_loader fails to provide a model.
        """
        if model_name in self.models:
            logging.info(f"Loading model '{model_name}' from cache.")
            return self.models[model_name]

        logging.info(f"Model '{model_name}' not in cache. Attempting to load using model_loader.")
        if not self.model_loader:
            raise ModelInitializationError("A `model_loader` function must be provided to create new models.")

        model = self.model_loader(model_name)
        if not isinstance(model, Model):
            raise ModelNotFoundError(f"The model_loader did not return a valid Keras Model for '{model_name}'.")

        self.models[model_name] = model
        logging.info(f"Successfully loaded and cached new model: '{model_name}'.")
        return model

    def save_model(self, model: Model, model_name: str) -> str:
        """
        Saves a model with a timestamp-based version.

        Args:
            model (Model): The Keras model to save.
            model_name (str): The base name for the model.

        Returns:
            str: The full path where the model was saved.
        """
        version = datetime.now().strftime("%Y%m%d-%H%M%S")
        save_path = os.path.join("models", model_name, version)
        try:
            os.makedirs(save_path, exist_ok=True)
            model.save(save_path)
            logging.info(f"Model '{model_name}' saved successfully to: {save_path}")
            return save_path
        except IOError as e:
            logging.error(f"Error saving model '{model_name}' to {save_path}: {e}")
            raise

    def train(self,
              model_name: str,
              data: Tuple[np.ndarray, np.ndarray],
              epochs: int = 10,
              batch_size: int = 32,
              optimizer: str = 'adam',
              loss: str = 'mse',
              metrics: list = None,
              validation_data: Optional[Tuple[np.ndarray, np.ndarray]] = None) -> None:
        """
        Trains a specified model with configurable parameters.

        Args:
            model_name (str): The name of the model to train.
            data (Tuple[np.ndarray, np.ndarray]): A tuple of (features, labels) for training.
            epochs (int): The number of training epochs.
            batch_size (int): The size of each training batch.
            optimizer (str): The optimizer to use for training.
            loss (str): The loss function.
            metrics (list): A list of metrics to evaluate during training.
            validation_data (Optional): Data for validation, as a tuple of (features, labels).
        """
        log_dir = os.path.join("logs", "fit", model_name + "_" + datetime.now().strftime("%Y%m%d-%H%M%S"))
        tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)
        logging.info(f"TensorBoard logs will be saved to: {log_dir}")

        if self.strategy:
            # Distributed training context
            with self.strategy.scope():
                model = self.get_model(model_name)
                model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
        else:
            # Single-device training
            model = self.get_model(model_name)
            model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

        logging.info(f"Starting training for model '{model_name}' for {epochs} epochs.")
        model.fit(
            data[0], data[1],
            epochs=epochs,
            batch_size=batch_size,
            validation_data=validation_data,
            callbacks=[tensorboard_callback],
            verbose=1
        )
        logging.info(f"Training finished for model '{model_name}'.")

        # Save the trained model
        self.save_model(model, model_name)

# --- Example Usage ---

def simple_model_loader(model_name: str) -> Optional[Model]:
    """
    A simple function to define and return Keras models based on a name.
    """
    if model_name == "regression_model":
        model = tf.keras.Sequential([
            layers.Dense(64, activation='relu', input_shape=(10,)),
            layers.Dense(64, activation='relu'),
            layers.Dense(1)
        ])
        return model
    else:
        # Return None or raise an error for unknown models
        return None

if __name__ == '__main__':
    # 1. Generate some dummy data for the example
    print("Generating dummy data for training and validation...")
    x_train = np.random.rand(1000, 10)
    y_train = np.random.rand(1000, 1)
    x_val = np.random.rand(200, 10)
    y_val = np.random.rand(200, 1)
    print("Dummy data generated.")

    # 2. Initialize the AGI System with our model loader
    # Set distributed_training=True if you have a multi-worker environment configured
    agi_system = AGISystemSTEM(model_loader=simple_model_loader, distributed_training=False)

    # 3. Train the model
    try:
        agi_system.train(
            model_name="regression_model",
            data=(x_train, y_train),
            epochs=5,
            batch_size=64,
            optimizer='adam',
            loss='mean_squared_error',
            metrics=['mae'],
            validation_data=(x_val, y_val)
        )
    except (ModelInitializationError, ModelNotFoundError, IOError) as e:
        logging.error(f"An error occurred during the training process: {e}")