In [None]:
import pandas as pd
import numpy as np

class InterpretabilityExplainer:
    """
    کلاس تفسیرپذیری برای مدل.

    Parameters
    ----------
    method : str
        نام متد: 'SHAPExplainer', 'LIMEExplainer', 'FeatureImportance', 'PartialDependence'
    config : dict
        تنظیمات مربوط به متد انتخابی.
    """
    def __init__(self, method, config):
        self.method = method
        self.config = config
        self.explainer_ = None

    def transform(self, dataset, model):
        """
        اعمال متد تفسیرپذیری روی دیتاست.

        Parameters
        ----------
        dataset : pd.DataFrame
            دیتاست ورودی (شامل ویژگی‌ها، احتمالاً بدون هدف).
        model : fitted model
            مدل آموزش‌دیده.

        Returns
        -------
        result : pd.DataFrame
            دیتاست با ستون‌های اضافی حاوی نتایج تفسیر.
        """
        if not isinstance(dataset, pd.DataFrame):
            raise TypeError("dataset must be a pandas DataFrame")

        X = dataset.copy()

        if self.method == 'SHAPExplainer':
            return self._shap_explain(X, model)
        elif self.method == 'LIMEExplainer':
            return self._lime_explain(X, model)
        elif self.method == 'FeatureImportance':
            return self._feature_importance(X, model)
        elif self.method == 'PartialDependence':
            return self._partial_dependence(X, model)
        else:
            raise ValueError(f"Unknown interpretability method: {self.method}")

    def _shap_explain(self, X, model):
        """محاسبه مقادیر SHAP و اضافه کردن به دیتاست."""
        try:
            import shap
        except ImportError:
            raise ImportError("shap package is required for SHAPExplainer.")

        explainer_type = self.config.get('explainer_type', None)
        if explainer_type is None:
            # تشخیص خودکار نوع explainer
            if hasattr(model, 'predict_proba'):
                if hasattr(model, 'tree_'):
                    explainer = shap.TreeExplainer(model)
                elif hasattr(model, 'coef_'):
                    explainer = shap.LinearExplainer(model, X)
                else:
                    explainer = shap.KernelExplainer(model.predict_proba, X)
            else:
                if hasattr(model, 'tree_'):
                    explainer = shap.TreeExplainer(model)
                elif hasattr(model, 'coef_'):
                    explainer = shap.LinearExplainer(model, X)
                else:
                    explainer = shap.KernelExplainer(model.predict, X)
        else:
            if explainer_type == 'tree':
                explainer = shap.TreeExplainer(model)
            elif explainer_type == 'linear':
                explainer = shap.LinearExplainer(model, X)
            elif explainer_type == 'kernel':
                pred_fn = model.predict_proba if hasattr(model, 'predict_proba') else model.predict
                explainer = shap.KernelExplainer(pred_fn, X)
            else:
                raise ValueError(f"Unsupported explainer_type: {explainer_type}")

        shap_values = explainer.shap_values(X)

        if isinstance(shap_values, list):
            class_index = self.config.get('class_index', 0)
            shap_vals = shap_values[class_index]
        else:
            shap_vals = shap_values

        shap_cols = [f"shap_{col}" for col in X.columns]
        shap_df = pd.DataFrame(shap_vals, columns=shap_cols, index=X.index)
        result = pd.concat([X, shap_df], axis=1)

        if self.config.get('add_base_value', False):
            if hasattr(explainer, 'expected_value'):
                base_val = explainer.expected_value
                if isinstance(base_val, list):
                    class_index = self.config.get('class_index', 0)
                    base_val = base_val[class_index]
                result['shap_base_value'] = base_val

        return result

    def _lime_explain(self, X, model):
        """محاسبه توضیحات LIME برای هر نمونه."""
        try:
            from lime import lime_tabular
        except ImportError:
            raise ImportError("lime package is required for LIMEExplainer.")

        mode = self.config.get('mode', 'classification')
        feature_names = list(X.columns)
        class_names = self.config.get('class_names', None)

        explainer = lime_tabular.LimeTabularExplainer(
            X.values,
            feature_names=feature_names,
            class_names=class_names,
            mode=mode,
            **self.config.get('explainer_kwargs', {})
        )

        if mode == 'classification':
            predict_fn = model.predict_proba
        else:
            predict_fn = model.predict

        explanations = []
        for idx in range(len(X)):
            exp = explainer.explain_instance(
                X.iloc[idx].values,
                predict_fn,
                num_features=self.config.get('num_features', len(feature_names)),
                **self.config.get('explain_instance_kwargs', {})
            )
            exp_dict = {f"lime_{feat}": weight for feat, weight in exp.as_list()}
            explanations.append(exp_dict)

        lime_df = pd.DataFrame(explanations, index=X.index).fillna(0)
        result = pd.concat([X, lime_df], axis=1)
        return result

    def _feature_importance(self, X, model):
        """محاسبه اهمیت ویژگی‌ها از مدل."""
        if hasattr(model, 'feature_importances_'):
            importances = model.feature_importances_
        elif hasattr(model, 'coef_'):
            coef = model.coef_
            if coef.ndim > 1:
                agg = self.config.get('coef_aggregation', 'mean')
                if agg == 'mean':
                    importances = np.mean(np.abs(coef), axis=0)
                elif agg == 'max':
                    importances = np.max(np.abs(coef), axis=0)
                else:
                    importances = np.abs(coef[0])
            else:
                importances = np.abs(coef)
        else:
            raise AttributeError("Model does not have feature_importances_ or coef_.")

        importance_dict = {f"importance_{col}": imp for col, imp in zip(X.columns, importances)}
        importance_row = pd.Series(importance_dict)
        importance_df = pd.DataFrame([importance_row] * len(X), index=X.index)
        result = pd.concat([X, importance_df], axis=1)
        return result

    def _partial_dependence(self, X, model):
        """محاسبه وابستگی جزئی برای ویژگی‌های مشخص."""
        try:
            from sklearn.inspection import partial_dependence
        except ImportError:
            raise ImportError("sklearn.inspection is required for PartialDependence.")

        features = self.config.get('features')
        if features is None:
            raise ValueError("For PartialDependence, 'features' must be specified in config (list of feature names or indices).")

        if all(isinstance(f, str) for f in features):
            feature_indices = [X.columns.get_loc(f) for f in features]
        else:
            feature_indices = features

        kind = self.config.get('kind', 'average')
        pdp_results = partial_dependence(
            model, X, features=feature_indices,
            kind=kind, **self.config.get('pdp_kwargs', {})
        )

        if kind == 'individual':
            # در حالت individual، مقادیر میانگین را برمی‌گردانیم (ساده‌سازی)
            pdp_avg = partial_dependence(model, X, features=feature_indices, kind='average')['average']
            pdp_dict = {}
            for i, feat in enumerate(features):
                pdp_dict[f"pdp_{feat}_avg"] = pdp_avg[i]
            pdp_df = pd.DataFrame([pdp_dict] * len(X), index=X.index)
        else:
            pdp_avg = pdp_results['average']
            pdp_dict = {}
            for i, feat in enumerate(features):
                pdp_dict[f"pdp_{feat}"] = pdp_avg[i]
            pdp_df = pd.DataFrame([pdp_dict] * len(X), index=X.index)

        result = pd.concat([X, pdp_df], axis=1)
        return result