In [None]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import classification_report, confusion_matrix

class PoliticalBiasDNN:
    """
    Deep Neural Network model for political bias classification.
    """

    def __init__(self, input_dim, hidden_layers=[128, 64], dropout_rate=0.3, random_state=42):
        """
        Initialize the DNN model.

        Args:
            input_dim (int): Input feature dimension
            hidden_layers (list): List of hidden layer sizes
            dropout_rate (float): Dropout rate for regularization
            random_state (int): Random seed for reproducibility
        """
        # Set random seeds for reproducibility
        np.random.seed(random_state)
        import tensorflow as tf
        tf.random.set_seed(random_state)

        self.input_dim = input_dim
        self.hidden_layers = hidden_layers
        self.dropout_rate = dropout_rate
        self.model = self._build_model()
        self.lb = LabelBinarizer()
        self.history = None

    def _build_model(self):
        """
        Build the DNN model architecture.

        Returns:
            keras.Model: Compiled model
        """
        model = Sequential()
        model.add(Input(shape=(self.input_dim,)))

        for units in self.hidden_layers:
            model.add(Dense(units, activation='relu'))
            model.add(Dropout(self.dropout_rate))

        model.add(Dense(3, activation='softmax'))  # 3 classes: left, center, right

        model.compile(
            optimizer='adam',
            loss='categorical_crossentropy',
            metrics=['accuracy']
        )

        return model

    def train(self, X_train, y_train, validation_data=None, epochs=10, batch_size=32, patience=2):
        """
        Train the DNN model.

        Args:
            X_train: Training features
            y_train: Training labels
            validation_data (tuple): Optional validation data (X_val, y_val)
            epochs (int): Number of training epochs
            batch_size (int): Training batch size
            patience (int): Early stopping patience

        Returns:
            self: Trained model instance
        """
        # Convert labels to binary format
        y_train_bin = self.lb.fit_transform(y_train)

        # Prepare validation data if provided
        val_data = None
        if validation_data is not None:
            X_val, y_val = validation_data
            y_val_bin = self.lb.transform(y_val)
            val_data = (X_val.toarray(), y_val_bin)

        # Early stopping callback
        early_stop = EarlyStopping(patience=patience, restore_best_weights=True)

        # Train the model
        self.history = self.model.fit(
            X_train.toarray(), y_train_bin,
            validation_data=val_data,
            epochs=epochs,
            batch_size=batch_size,
            callbacks=[early_stop],
            verbose=1
        )

        return self

    def predict(self, X):
        """
        Make predictions with the trained model.

        Args:
            X: Feature matrix to predict on

        Returns:
            array: Predicted class labels
        """
        # Convert predictions to label indices
        pred_probs = self.model.predict(X.toarray())
        return np.argmax(pred_probs, axis=1)

    def predict_proba(self, X):
        """
        Predict class probabilities.

        Args:
            X: Feature matrix to predict on

        Returns:
            array: Class probabilities
        """
        return self.model.predict(X.toarray())

    def evaluate(self, X_test, y_test, target_names=["Left", "Center", "Right"]):
        """
        Evaluate the model and print classification report.

        Args:
            X_test: Test features
            y_test: True test labels
            target_names (list): Names of target classes

        Returns:
            dict: Dictionary containing evaluation metrics
        """
        y_pred = self.predict(X_test)
        y_pred_proba = self.predict_proba(X_test)

        report = classification_report(y_test, y_pred, target_names=target_names, output_dict=True)
        print(f"📊 DNN Results:")
        print(classification_report(y_test, y_pred, target_names=target_names))

        return {
            'y_pred': y_pred,
            'probabilities': y_pred_proba,
            'confusion_matrix': confusion_matrix(y_test, y_pred),
            'report': report,
            'history': self.history.history,
            'accuracy': report['accuracy'],
            'f1': report['macro avg']['f1-score'],
            'precision': report['macro avg']['precision'],
            'recall': report['macro avg']['recall']
        }

    def save_model(self, filepath):
        """
        Save the trained model to disk.

        Args:
            filepath (str): Path to save the model

        Returns:
            str: Path to the saved model
        """
        self.model.save(filepath)
        return filepath

    @classmethod
    def load_model(cls, filepath, input_dim=5000):
        """
        Load a trained model from disk.

        Args:
            filepath (str): Path to the saved model
            input_dim (int): Input dimension for initializing the model object

        Returns:
            PoliticalBiasDNN: Loaded model instance
        """
        from tensorflow.keras.models import load_model

        # Create an instance
        instance = cls(input_dim=input_dim)

        # Replace the model with the loaded one
        instance.model = load_model(filepath)

        # Return the instance
        return instance

    def get_model_summary(self):
        """
        Get a summary of the model architecture.

        Returns:
            str: String containing model summary
        """
        # Redirect model.summary() output to a string
        from io import StringIO
        import sys

        # Create a StringIO object to capture output
        old_stdout = sys.stdout
        string_io = StringIO()
        sys.stdout = string_io

        # Print model summary
        self.model.summary()

        # Restore stdout and get the captured output
        sys.stdout = old_stdout
        summary_string = string_io.getvalue()

        return summary_string

    def plot_history(self):
        """
        Plot training history.

        Returns:
            matplotlib.figure.Figure: Training history plot
        """
        if self.history is None:
            raise ValueError("Model hasn't been trained yet. No history to plot.")

        import matplotlib.pyplot as plt

        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

        # Plot accuracy
        ax1.plot(self.history.history['accuracy'], label='Train Accuracy')
        if 'val_accuracy' in self.history.history:
            ax1.plot(self.history.history['val_accuracy'], label='Validation Accuracy')
        ax1.set_xlabel('Epoch')
        ax1.set_ylabel('Accuracy')
        ax1.set_title('Training and Validation Accuracy')
        ax1.legend()
        ax1.grid(True)

        # Plot loss
        ax2.plot(self.history.history['loss'], label='Train Loss')
        if 'val_loss' in self.history.history:
            ax2.plot(self.history.history['val_loss'], label='Validation Loss')
        ax2.set_xlabel('Epoch')
        ax2.set_ylabel('Loss')
        ax2.set_title('Training and Validation Loss')
        ax2.legend()
        ax2.grid(True)

        plt.tight_layout()

        return fig