In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

class PoliticalBiasLogisticRegression:
    """
    Logistic Regression model for political bias classification.
    """

    def __init__(self, max_iter=1000, random_state=42):
        """
        Initialize the logistic regression model.

        Args:
            max_iter (int): Maximum number of iterations for optimization
            random_state (int): Random seed for reproducibility
        """
        self.model = LogisticRegression(max_iter=max_iter, random_state=random_state)

    def train(self, X_train, y_train):
        """
        Train the logistic regression model.

        Args:
            X_train: Training features
            y_train: Training labels

        Returns:
            self: The trained model instance
        """
        self.model.fit(X_train, y_train)
        return self

    def predict(self, X):
        """
        Make predictions with the trained model.

        Args:
            X: Feature matrix to predict on

        Returns:
            array: Predicted class labels
        """
        return self.model.predict(X)

    def predict_proba(self, X):
        """
        Predict class probabilities for the input features.

        Args:
            X: Feature matrix to predict on

        Returns:
            array: Predicted class probabilities
        """
        return self.model.predict_proba(X)

    def evaluate(self, X_test, y_test, target_names=["Left", "Center", "Right"]):
        """
        Evaluate the model and print classification report.

        Args:
            X_test: Test features
            y_test: True test labels
            target_names (list): Names of target classes

        Returns:
            dict: Dictionary containing evaluation metrics
        """
        y_pred = self.predict(X_test)
        report = classification_report(y_test, y_pred, target_names=target_names, output_dict=True)
        print(f"📊 Logistic Regression Results:")
        print(classification_report(y_test, y_pred, target_names=target_names))
        return {
            'y_pred': y_pred,
            'probabilities': self.predict_proba(X_test),
            'confusion_matrix': confusion_matrix(y_test, y_pred),
            'report': report,
            'accuracy': report['accuracy'],
            'f1': report['macro avg']['f1-score'],
            'precision': report['macro avg']['precision'],
            'recall': report['macro avg']['recall']
        }

    def get_feature_importance(self, feature_names=None, top_n=20):
        """
        Get the most important features for each class.

        Args:
            feature_names (list): Names of the features (e.g., from vectorizer.get_feature_names_out())
            top_n (int): Number of top features to return

        Returns:
            dict: Dictionary with top features for each class
        """
        if not hasattr(self.model, 'coef_'):
            raise ValueError("Model doesn't have coefficients. Make sure it's trained.")

        if feature_names is None:
            feature_names = [f"feature_{i}" for i in range(self.model.coef_.shape[1])]

        # Get coefficients for each class
        coef = self.model.coef_

        # Map class indices to names
        class_names = self.model.classes_.tolist()
        if hasattr(self.model, 'target_names_'):
            class_name_map = {i: self.model.target_names_[i] for i in range(len(self.model.target_names_))}
        else:
            class_name_map = {i: f"Class {i}" for i in range(len(class_names))}

        # Create dictionary to hold top features for each class
        top_features = {}

        # For each class, get the top positive features
        for i, class_coef in enumerate(coef):
            class_name = class_name_map.get(class_names[i], f"Class {class_names[i]}")

            # Sort features by coefficient value (descending)
            sorted_indices = class_coef.argsort()[::-1]

            # Get top positive and negative features
            top_pos_indices = sorted_indices[:top_n]
            top_neg_indices = sorted_indices[-top_n:]

            # Map indices to feature names and coefficients
            top_pos_features = [(feature_names[idx], class_coef[idx]) for idx in top_pos_indices]
            top_neg_features = [(feature_names[idx], class_coef[idx]) for idx in top_neg_indices]

            # Store in dictionary
            top_features[class_name] = {
                'positive': top_pos_features,
                'negative': top_neg_features
            }

        return top_features

    def save_model(self, filepath):
        """
        Save the trained model to disk.

        Args:
            filepath (str): Path to save the model file

        Returns:
            str: Path to the saved model
        """
        import pickle
        with open(filepath, 'wb') as f:
            pickle.dump(self.model, f)
        return filepath

    @classmethod
    def load_model(cls, filepath):
        """
        Load a trained model from disk.

        Args:
            filepath (str): Path to the saved model file

        Returns:
            PoliticalBiasLogisticRegression: Loaded model instance
        """
        import pickle
        instance = cls()
        with open(filepath, 'rb') as f:
            instance.model = pickle.load(f)
        return instance