In [None]:
!pip install ucimlrepo 



In [None]:
from ucimlrepo import fetch_ucirepo
import pandas as pd
import numpy as np


In [None]:
# fetch dataset
credit_approval = fetch_ucirepo(id=14)

# data (as pandas dataframes)
X = credit_approval.data.features
y = credit_approval.data.targets

# metadata
print(credit_approval.metadata)

# variable information
print(credit_approval.variables)

# Combine features and target into a single dataframe
credit_approval= pd.concat([X, y], axis=1)

{'uci_id': 14, 'name': 'Breast Cancer', 'repository_url': 'https://archive.ics.uci.edu/dataset/14/breast+cancer', 'data_url': 'https://archive.ics.uci.edu/static/public/14/data.csv', 'abstract': 'This breast cancer domain was obtained from the University Medical Centre, Institute of Oncology, Ljubljana, Yugoslavia. This is one of three domains provided by the Oncology Institute that has repeatedly appeared in the machine learning literature. (See also lymphography and primary-tumor.)', 'area': 'Health and Medicine', 'tasks': ['Classification'], 'characteristics': ['Multivariate'], 'num_instances': 286, 'num_features': 9, 'feature_types': ['Categorical'], 'demographics': ['Age'], 'target_col': ['Class'], 'index_col': None, 'has_missing_values': 'yes', 'missing_values_symbol': 'NaN', 'year_of_dataset_creation': 1988, 'last_updated': 'Thu Mar 07 2024', 'dataset_doi': '10.24432/C51P4M', 'creators': ['Matjaz Zwitter', 'Milan Soklic'], 'intro_paper': None, 'additional_info': {'summary': 'Thi

In [None]:
from sklearn.model_selection import train_test_split

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    credit_approval.drop(columns=['A16']),  # Assuming 'A16' is the target variable
    credit_approval['A16'],
    test_size=0.2,  # Adjust test_size as needed (e.g., 0.2 for 20% test data)
    random_state=42  # Set a random state for reproducibility
)

# Now you have X_train, X_test, y_train, and y_test dataframes
# X_train and X_test contain the features
# y_train and y_test contain the target variable

print("Training data shape:", X_train.shape, y_train.shape)
print("Testing data shape:", X_test.shape, y_test.shape)

Training data shape: (552, 15) (552,)
Testing data shape: (138, 15) (138,)


In [None]:
stable_attributes = []
flexible_attributes = list(credit_approval.drop(columns=['A16']).columns)

print("Stable Attributes:",stable_attributes)
print("Flexible Attributes:",flexible_attributes)

Stable Attributes: []
Flexible Attributes: ['A15', 'A14', 'A13', 'A12', 'A11', 'A10', 'A9', 'A8', 'A7', 'A6', 'A5', 'A4', 'A3', 'A2', 'A1']


In [None]:
import pandas as pd
import numpy as np
from collections import defaultdict

class ActionRulesDiscovery:
    def __init__(self, data, stable_attrs, flexible_attrs, decision_attr):
        """Initialize the Action Rules Discovery system."""
        self.data = data
        self.stable_attrs = stable_attrs
        self.flexible_attrs = flexible_attrs
        self.decision_attr = decision_attr
        self.rules = []
        self.action_rules = []

    def discretize_numeric_columns(self, n_bins=5):
        """Discretize numeric columns for better rule discovery."""
        for col in self.data.columns:
            if self.data[col].dtype in ['float64', 'int64']:
                # Count unique values
                unique_values = self.data[col].nunique()

                # Skip if column is already categorical or has too few unique values
                if unique_values <= n_bins:
                    continue

                try:
                    # Handle missing values before discretization
                    column_data = self.data[col].copy()
                    column_data = column_data.fillna(column_data.median())

                    # Use equal-frequency binning (quantiles)
                    self.data[col] = pd.qcut(
                        column_data,
                        q=min(n_bins, unique_values-1),  # Ensure we don't create more bins than unique values
                        labels=[f'bin_{i}' for i in range(min(n_bins, unique_values-1))],
                        duplicates='drop'
                    )
                except Exception as e:
                    print(f"Warning: Could not discretize column {col}. Error: {str(e)}")
                    continue

    def extract_classification_rules(self, min_support=0.1, min_confidence=0.6):
        """Extract classification rules for each decision class."""
        decision_values = self.data[self.decision_attr].unique()

        for decision_value in decision_values:
            # Get records for current decision value
            target_records = self.data[self.data[self.decision_attr] == decision_value]

            # Calculate support for each attribute value combination
            for attr in self.flexible_attrs + self.stable_attrs:
                value_counts = target_records[attr].value_counts()
                support = value_counts / len(self.data)

                # Filter by minimum support
                valid_values = support[support >= min_support]

                for value in valid_values.index:
                    # Calculate confidence
                    total_with_value = len(self.data[self.data[attr] == value])
                    support_count = len(target_records[target_records[attr] == value])
                    confidence = support_count / total_with_value

                    if confidence >= min_confidence:
                        rule = {
                            'conditions': [(attr, value)],
                            'decision': decision_value,
                            'support': support[value],
                            'confidence': confidence
                        }
                        self.rules.append(rule)

    def generate_action_rules(self, desired_effect):
        """Generate action rules that lead to the desired effect."""
        source_rules = [r for r in self.rules if r['decision'] != desired_effect]
        target_rules = [r for r in self.rules if r['decision'] == desired_effect]

        for source_rule in source_rules:
            for target_rule in target_rules:
                action_rule = self._compare_rules(source_rule, target_rule)
                if action_rule:
                    self.action_rules.append(action_rule)

    def _compare_rules(self, source_rule, target_rule):
        """Compare two rules to generate an action rule."""
        # Check stable attributes match if present
        for attr, value in source_rule['conditions']:
            if attr in self.stable_attrs:
                target_value = next((v for a, v in target_rule['conditions'] if a == attr), None)
                if target_value and value != target_value:
                    return None

        # Generate action suggestions for flexible attributes
        actions = []
        for attr, source_value in source_rule['conditions']:
            if attr in self.flexible_attrs:
                target_value = next((v for a, v in target_rule['conditions'] if a == attr), None)
                if target_value and source_value != target_value:
                    actions.append((attr, f"{source_value} → {target_value}"))

        if actions:
            return {
                'actions': actions,
                'source_decision': source_rule['decision'],
                'target_decision': target_rule['decision'],
                'support': min(source_rule['support'], target_rule['support']),
                'confidence': min(source_rule['confidence'], target_rule['confidence'])
            }
        return None

    def get_top_action_rules(self, n=10):
        """Get top n action rules sorted by support and confidence."""
        sorted_rules = sorted(self.action_rules,
                            key=lambda x: (x['support'] * x['confidence']),
                            reverse=True)
        return sorted_rules[:n]

# Initialize and use the system
action_discovery = ActionRulesDiscovery(
    data=credit_approval,
    stable_attrs=stable_attributes,
    flexible_attrs=flexible_attributes,
    decision_attr='A16'
)

# Print data info before discretization
print("Data types before discretization:")
print(action_discovery.data.dtypes)
print("\nNumber of unique values per column:")
print(action_discovery.data.nunique())

# Preprocess the data
# action_discovery.discretize_numeric_columns(n_bins=5)

# Print data info after discretization
print("\nData types after discretization:")
print(action_discovery.data.dtypes)

# Extract classification rules
action_discovery.extract_classification_rules(min_support=0.1, min_confidence=0.6)

# Generate action rules for positive credit approval
action_discovery.generate_action_rules(desired_effect='+')

# Get and print top action rules
top_rules = action_discovery.get_top_action_rules(n=5)

print("\nTop 5 Action Rules:")
for i, rule in enumerate(top_rules, 1):
    print(f"\nAction Rule {i}:")
    print(f"Actions needed: {rule['actions']}")
    print(f"To change decision from {rule['source_decision']} to {rule['target_decision']}")
    print(f"Support: {rule['support']:.3f}")
    print(f"Confidence: {rule['confidence']:.3f}")

Data types before discretization:
A15      int64
A14    float64
A13     object
A12     object
A11      int64
A10     object
A9      object
A8     float64
A7      object
A6      object
A5      object
A4      object
A3     float64
A2     float64
A1      object
A16     object
dtype: object

Number of unique values per column:
A15    240
A14    170
A13      3
A12      2
A11     23
A10      2
A9       2
A8     132
A7       9
A6      14
A5       3
A4       3
A3     215
A2     349
A1       2
A16      2
dtype: int64

Data types after discretization:
A15      int64
A14    float64
A13     object
A12     object
A11      int64
A10     object
A9      object
A8     float64
A7      object
A6      object
A5      object
A4      object
A3     float64
A2     float64
A1      object
A16     object
dtype: object

Top 5 Action Rules:

Action Rule 1:
Actions needed: [('A9', 'f → t')]
To change decision from - to +
Support: 0.412
Confidence: 0.787

Action Rule 2:
Actions needed: [('A10', 'f → t')]
To change de

In [None]:
import pandas as pd
import numpy as np
from copy import deepcopy

class ActionRulesTransform:
    def __init__(self, action_rules, original_data):
        self.action_rules = action_rules
        self.original_data = original_data
        self.transformed_data = None

    def count_rules(self):
        """Count total rules and breakdown by decision transitions."""
        total_rules = len(self.action_rules)
        transition_counts = defaultdict(int)

        for rule in self.action_rules:
            transition = (rule['source_decision'], rule['target_decision'])
            transition_counts[transition] += 1

        return {
            'total_rules': total_rules,
            'transition_counts': dict(transition_counts)
        }

    def apply_rules(self, confidence_threshold=0.6):
        """Apply action rules to transform the dataset."""
        self.transformed_data = self.original_data.copy()
        changes_made = 0

        # Track which records were modified
        modified_records = set()

        for rule in self.action_rules:
            if rule['confidence'] >= confidence_threshold:
                # Find records matching source conditions
                mask = pd.Series(True, index=self.transformed_data.index)

                # Check source conditions
                for attr, value_change in rule['actions']:
                    source_value = value_change.split(' → ')[0]
                    mask &= (self.transformed_data[attr] == source_value)

                mask &= (self.transformed_data['A16'] == rule['source_decision'])

                # Apply transformations to matching records
                matching_records = self.transformed_data[mask].index
                for record_idx in matching_records:
                    if record_idx not in modified_records:
                        for attr, value_change in rule['actions']:
                            target_value = value_change.split(' → ')[1]
                            self.transformed_data.at[record_idx, attr] = target_value
                        self.transformed_data.at[record_idx, 'A16'] = rule['target_decision']
                        modified_records.add(record_idx)
                        changes_made += 1

        return {
            'records_modified': changes_made,
            'percentage_modified': (changes_made / len(self.original_data)) * 100
        }

    def get_transformation_summary(self):
        """Get summary of changes between original and transformed datasets."""
        if self.transformed_data is None:
            return "No transformation has been applied yet."

        summary = {
            'original_class_distribution': self.original_data['A16'].value_counts().to_dict(),
            'transformed_class_distribution': self.transformed_data['A16'].value_counts().to_dict(),
            'attribute_changes': {}
        }

        # Analyze changes in each attribute
        for column in self.transformed_data.columns:
            if column != 'A16':  # Skip decision attribute
                changes = (self.transformed_data[column] != self.original_data[column]).sum()
                if changes > 0:
                    summary['attribute_changes'][column] = changes

        return summary

# Use the transformations
transformer = ActionRulesTransform(action_discovery.action_rules, credit_approval)

# Get rule counts
rule_counts = transformer.count_rules()
print("\nAction Rules Summary:")
print(f"Total number of rules: {rule_counts['total_rules']}")
print("\nTransition counts:")
for (source, target), count in rule_counts['transition_counts'].items():
    print(f"From {source} to {target}: {count} rules")

# Apply transformations
transformation_stats = transformer.apply_rules(confidence_threshold=0.6)
print("\nTransformation Statistics:")
print(f"Number of records modified: {transformation_stats['records_modified']}")
print(f"Percentage of dataset modified: {transformation_stats['percentage_modified']:.2f}%")

# Get transformation summary
summary = transformer.get_transformation_summary()
print("\nTransformation Summary:")
print("\nClass Distribution Changes:")
print("Original:", summary['original_class_distribution'])
print("Transformed:", summary['transformed_class_distribution'])
print("\nAttribute Changes:")
for attr, changes in summary['attribute_changes'].items():
    print(f"{attr}: {changes} changes")

# Save transformed dataset
transformed_dataset = transformer.transformed_data

# Display sample of changes
print("\nSample of Changed Records:")
changed_mask = (transformed_dataset != credit_approval).any(axis=1)
print(transformed_dataset[changed_mask].head())


Action Rules Summary:
Total number of rules: 2

Transition counts:
From - to +: 2 rules

Transformation Statistics:
Number of records modified: 362
Percentage of dataset modified: 52.46%

Transformation Summary:

Class Distribution Changes:
Original: {'-': 383, '+': 307}
Transformed: {'+': 669, '-': 21}

Attribute Changes:
A14: 13 changes
A10: 297 changes
A9: 65 changes
A7: 9 changes
A6: 9 changes
A5: 6 changes
A4: 6 changes
A2: 12 changes
A1: 12 changes

Sample of Changed Records:
    A15    A14 A13 A12  A11 A10 A9      A8  A7  A6 A5 A4   A3     A2 A1 A16
70    0  420.0   s   t    0   t  t   1.585  bb   e  g  u  7.5  32.33  b   +
71    0    NaN   g   t    0   t  t  12.500  bb   d  g  u  4.0  34.83  b   +
72    0  980.0   g   t    0   t  t  13.500   v  cc  g  u  5.0  38.58  a   +
73    0  400.0   s   f    0   t  t  10.750   v   m  g  u  0.5  44.25  b   +
74    2  160.0   g   f    0   t  f   1.625   v   c  p  y  7.0  44.83  b   +


In [None]:
transformed_dataset

Unnamed: 0,A15,A14,A13,A12,A11,A10,A9,A8,A7,A6,A5,A4,A3,A2,A1,A16
0,0,202.0,g,f,1,t,t,1.25,v,w,g,u,0.000,30.83,b,+
1,560,43.0,g,f,6,t,t,3.04,h,q,g,u,4.460,58.67,a,+
2,824,280.0,g,f,0,f,t,1.50,h,q,g,u,0.500,24.50,a,+
3,3,100.0,g,t,5,t,t,3.75,v,w,g,u,1.540,27.83,b,+
4,0,120.0,s,f,0,f,t,1.71,v,w,g,u,5.625,20.17,b,+
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
685,0,260.0,g,f,0,t,f,1.25,h,e,p,y,10.085,21.08,b,+
686,394,200.0,g,t,2,t,t,2.00,v,c,g,u,0.750,22.67,a,+
687,1,200.0,g,t,1,t,t,2.00,ff,ff,p,y,13.500,25.25,a,+
688,750,280.0,g,f,0,t,f,0.04,v,aa,g,u,0.205,17.92,b,+


In [None]:
# prompt: Using dataframe transformed_dataset: split into train and test dataset

from sklearn.model_selection import train_test_split

# Assuming 'A16' is the target variable
X = transformed_dataset.drop('A16', axis=1)
y = transformed_dataset['A16']

# Split data into training and testing sets
Xt_train, Xt_test, yt_train, yt_test = train_test_split(X, y, test_size=0.2, random_state=42) # 80% train, 20% test

# Print the shapes of the resulting datasets to verify the split
print("Xt_train shape:", Xt_train.shape)
print("Xt_test shape:", Xt_test.shape)
print("yt_train shape:", yt_train.shape)
print("yt_test shape:", yt_test.shape)


Xt_train shape: (552, 15)
Xt_test shape: (138, 15)
yt_train shape: (552,)
yt_test shape: (138,)


In [None]:
!pip install category_encoders

Collecting category_encoders
  Downloading category_encoders-2.8.0-py3-none-any.whl.metadata (7.9 kB)
Downloading category_encoders-2.8.0-py3-none-any.whl (85 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.7/85.7 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: category_encoders
Successfully installed category_encoders-2.8.0


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
import category_encoders as ce # Import category_encoders


# Create a OneHotEncoder object
encoder = ce.OneHotEncoder(handle_unknown='ignore', use_cat_names=True)

# Fit the encoder to your training data and transform it
Xo_train_encoded = encoder.fit_transform(X_train)

# Transform your test data using the same encoder
Xo_test_encoded = encoder.transform(X_test)


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification


clf = RandomForestClassifier(max_depth=12, random_state=0)
clf.fit(Xo_train_encoded, y_train)

In [None]:
# Predict on the test set
y_pred = clf.predict(Xo_test_encoded)

# Evaluate the model (example: accuracy)
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy of the Random Forest Classifier: {accuracy}")
print("confusion matrix\n",confusion_matrix(y_test, y_pred))

Accuracy of the Random Forest Classifier: 0.8695652173913043
confusion matrix
 [[59 11]
 [ 7 61]]


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
import category_encoders as ce # Import category_encoders


# # Create a OneHotEncoder object
# encoder = ce.OneHotEncoder(handle_unknown='ignore', use_cat_names=True)

# Fit the encoder to your training data and transform it
# X_train_encoded = encoder.fit_transform(Xt_train)

# Transform your test data using the same encoder
X_test_encoded = encoder.transform(Xt_test)


In [None]:
# Predict on the test set
y_pred = clf.predict(X_test_encoded)

# Evaluate the model (example: accuracy)
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy of the Random Forest Classifier: {accuracy}")
print("confusion matrix\n",confusion_matrix(y_test, y_pred))

Accuracy of the Random Forest Classifier: 0.7608695652173914
confusion matrix
 [[59 11]
 [22 46]]


In [None]:
import pandas as pd
import numpy as np
from collections import defaultdict
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
import category_encoders as ce

class ActionRulesDiscovery:
    def __init__(self, data, stable_attrs, flexible_attrs, decision_attr):
        self.data = data
        self.stable_attrs = stable_attrs
        self.flexible_attrs = flexible_attrs
        self.decision_attr = decision_attr
        self.rules = []
        self.action_rules = []

    def extract_classification_rules(self, min_support=0.1, min_confidence=0.4):
        """Extract classification rules for each decision class."""
        decision_values = self.data[self.decision_attr].unique()

        for decision_value in decision_values:
            target_records = self.data[self.data[self.decision_attr] == decision_value]

            for attr in self.flexible_attrs + self.stable_attrs:
                value_counts = target_records[attr].value_counts()
                support = value_counts / len(self.data)
                valid_values = support[support >= min_support]

                for value in valid_values.index:
                    total_with_value = len(self.data[self.data[attr] == value])
                    support_count = len(target_records[target_records[attr] == value])
                    confidence = support_count / total_with_value

                    if confidence >= min_confidence:
                        rule = {
                            'conditions': [(attr, value)],
                            'decision': decision_value,
                            'support': support[value],
                            'confidence': confidence
                        }
                        self.rules.append(rule)

    def generate_action_rules(self, desired_effect):
        """Generate action rules that lead to the desired effect."""
        source_rules = [r for r in self.rules if r['decision'] != desired_effect]
        target_rules = [r for r in self.rules if r['decision'] == desired_effect]

        for source_rule in source_rules:
            for target_rule in target_rules:
                action_rule = self._compare_rules(source_rule, target_rule)
                if action_rule:
                    self.action_rules.append(action_rule)

    def _compare_rules(self, source_rule, target_rule):
        """Compare two rules to generate an action rule."""
        for attr, value in source_rule['conditions']:
            if attr in self.stable_attrs:
                target_value = next((v for a, v in target_rule['conditions'] if a == attr), None)
                if target_value and value != target_value:
                    return None

        actions = []
        for attr, source_value in source_rule['conditions']:
            if attr in self.flexible_attrs:
                target_value = next((v for a, v in target_rule['conditions'] if a == attr), None)
                if target_value and source_value != target_value:
                    actions.append((attr, f"{source_value} → {target_value}"))

        if actions:
            return {
                'actions': actions,
                'source_decision': source_rule['decision'],
                'target_decision': target_rule['decision'],
                'support': min(source_rule['support'], target_rule['support']),
                'confidence': min(source_rule['confidence'], target_rule['confidence'])
            }
        return None

class ActionRulesTransform:
    def __init__(self, action_rules, original_data):
        self.action_rules = action_rules
        self.original_data = original_data
        self.transformed_data = None

    def apply_rules(self, confidence_threshold=0.1):
        """Apply action rules to transform the dataset."""
        self.transformed_data = self.original_data.copy()
        changes_made = 0
        modified_records = set()

        for rule in self.action_rules:
            if rule['confidence'] >= confidence_threshold:
                mask = pd.Series(True, index=self.transformed_data.index)

                for attr, value_change in rule['actions']:
                    source_value = value_change.split(' → ')[0]
                    mask &= (self.transformed_data[attr] == source_value)

                mask &= (self.transformed_data['A16'] == rule['source_decision'])

                matching_records = self.transformed_data[mask].index
                for record_idx in matching_records:
                    if record_idx not in modified_records:
                        for attr, value_change in rule['actions']:
                            target_value = value_change.split(' → ')[1]
                            self.transformed_data.at[record_idx, attr] = target_value
                        self.transformed_data.at[record_idx, 'A16'] = rule['target_decision']
                        modified_records.add(record_idx)
                        changes_made += 1

        return self.transformed_data

def train_evaluate_model(original_data, transformed_data):
    """Train and evaluate Random Forest models on both original and transformed data."""
    # Split original data
    X = original_data.drop('A16', axis=1)
    y = original_data['A16']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Split transformed data
    X_t = transformed_data.drop('A16', axis=1)
    y_t = transformed_data['A16']
    X_t_train, X_t_test, y_t_train, y_t_test = train_test_split(X_t, y_t, test_size=0.2, random_state=42)

    # Initialize encoder and model
    encoder = ce.OneHotEncoder(handle_unknown='ignore', use_cat_names=True)
    clf = RandomForestClassifier(max_depth=24, random_state=0)

    # Train and evaluate on original data
    X_train_encoded = encoder.fit_transform(X_train)
    X_test_encoded = encoder.transform(X_test)

    X_t_encoded = encoder.transform(X_t)
    X_encoded = encoder.transform(X)

    clf.fit(X_train_encoded, y_train)
    y_pred = clf.predict(X_test_encoded)
    original_accuracy = accuracy_score(y_test, y_pred)
    original_confusion = confusion_matrix(y_test, y_pred)

    full_t_accuracy = accuracy_score(y, clf.predict(X_t_encoded))
    full_t_confusion = confusion_matrix(y, clf.predict(X_t_encoded))

    full_accuracy = accuracy_score(y, clf.predict(X_encoded))
    full_confusion = confusion_matrix(y, clf.predict(X_encoded))

    # Train and evaluate on transformed data
    X_t_test_encoded = encoder.transform(X_t_test)

    y_t_pred = clf.predict(X_t_test_encoded)
    transformed_accuracy = accuracy_score(y_test, y_t_pred)
    transformed_confusion = confusion_matrix(y_test, y_t_pred)

    return {
        'original': {'accuracy': original_accuracy, 'confusion_matrix': original_confusion},
        'transformed': {'accuracy': transformed_accuracy, 'confusion_matrix': transformed_confusion},
        'full_transformed': {'accuracy': full_accuracy, 'confusion_matrix': full_confusion},
        'full': {'accuracy': full_t_accuracy, 'confusion_matrix': full_t_confusion}
    }

In [None]:
def print_action_rules(action_rules):
    """
    Print action rules in a clear, formatted way.

    Args:
        action_rules: List of action rule dictionaries from ActionRulesDiscovery
    """
    if not action_rules:
        print("No action rules found.")
        return

    print("\n=== Action Rules ===")
    for i, rule in enumerate(action_rules, 1):
        print(f"\nRule {i}:")
        print("Actions:")
        for attr, change in rule['actions']:
            print(f"  • {attr}: {change}")

        print(f"Source Decision: {rule['source_decision']}")
        print(f"Target Decision: {rule['target_decision']}")
        print(f"Support: {rule['support']:.3f}")
        print(f"Confidence: {rule['confidence']:.3f}")
        print("-" * 40)


In [None]:
# Initialize ActionRulesDiscovery
action_discovery = ActionRulesDiscovery(
    data=credit_approval,
    stable_attrs=[],  # Add stable attributes if any
    flexible_attrs=list(credit_approval.drop(columns=['A16']).columns),
    decision_attr='A16'
)

# Generate action rules
action_discovery.extract_classification_rules()
action_discovery.generate_action_rules(desired_effect='+')

print_action_rules(action_discovery.action_rules)
# Transform the dataset
transformer = ActionRulesTransform(action_discovery.action_rules, credit_approval)
transformed_dataset = transformer.apply_rules(0.75)


# Evaluate the models
results = train_evaluate_model(credit_approval, transformed_dataset)
print("Original data accuracy:", results['original']['accuracy'])
print("Transformed data accuracy:", results['transformed']['accuracy'])

print("Original data confusion_matrix:\n", results['original']['confusion_matrix'])
print("Transformed data confusion_matrix:\n", results['transformed']['confusion_matrix'])

print("Full data accuracy:", results['full']['accuracy'])
print("Full data confusion_matrix:\n", results['full']['confusion_matrix'])

print("Full transformed data accuracy:", results['full_transformed']['accuracy'])
print("Full transformed data confusion_matrix:\n", results['full_transformed']['confusion_matrix'])


=== Action Rules ===

Rule 1:
Actions:
  • A12: f → t
Source Decision: -
Target Decision: +
Support: 0.212
Confidence: 0.462
----------------------------------------

Rule 2:
Actions:
  • A12: t → f
Source Decision: -
Target Decision: +
Support: 0.233
Confidence: 0.430
----------------------------------------

Rule 3:
Actions:
  • A10: f → t
Source Decision: -
Target Decision: +
Support: 0.303
Confidence: 0.708
----------------------------------------

Rule 4:
Actions:
  • A9: f → t
Source Decision: -
Target Decision: +
Support: 0.412
Confidence: 0.787
----------------------------------------

Rule 5:
Actions:
  • A7: v → h
Source Decision: -
Target Decision: +
Support: 0.126
Confidence: 0.576
----------------------------------------

Rule 6:
Actions:
  • A5: p → g
Source Decision: -
Target Decision: +
Support: 0.171
Confidence: 0.493
----------------------------------------

Rule 7:
Actions:
  • A4: y → u
Source Decision: -
Target Decision: +
Support: 0.171
Confidence: 0.493
--------