In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris

# Load dataset (Iris for simplicity)
iris = load_iris()
X = pd.DataFrame(iris.data, columns=iris.feature_names)
y = iris.target

# For simplicity, we will only consider two classes (class 0 and class 1)
X = X[y != 2]
y = y[y != 2]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Function to generate a counterfactual manually
def generate_counterfactual(instance, model, desired_class, feature_names):
    counterfactual = instance.copy()
    
    # Convert instance to a DataFrame to retain feature names
    instance_df = pd.DataFrame([instance], columns=feature_names)
    print(instance)
    
    # Predict the class of the original instance
    original_prediction = model.predict(instance_df)[0]
    print(f"Original Prediction: {original_prediction}")
    
    # Try to flip the prediction by changing one feature at a time
    for i, feature in enumerate(instance):
        if original_prediction != desired_class:
            # Perturb the feature to see if it changes the prediction
            counterfactual[i] = feature + np.random.uniform(-0.5, 0.5)
            print(counterfactual)
            
            # Convert counterfactual back to DataFrame for prediction
            counterfactual_df = pd.DataFrame([counterfactual], columns=feature_names)
            new_prediction = model.predict(counterfactual_df)[0]
            
            print(f"Trying to modify feature '{feature_names[i]}'. New Prediction: {new_prediction}")
            
            # If the new prediction is the desired class, stop and return the counterfactual
            if new_prediction == desired_class:
                print(f"Counterfactual found by changing '{feature_names[i]}'.")
                break
        else:
            break
    
    return counterfactual

# Select an instance from the test set to explain (e.g., the first one)
instance = X_test.iloc[0].values
desired_class = 1 if model.predict(pd.DataFrame([instance], columns=iris.feature_names))[0] == 0 else 0  # Flip the prediction

# Generate a counterfactual that flips the prediction
counterfactual = generate_counterfactual(instance, model, desired_class, iris.feature_names)

# Show original and counterfactual instances
print("\nOriginal instance:")
print(instance)
print("\nCounterfactual instance:")
print(counterfactual)



[6.  2.7 5.1 1.6]
Original Prediction: 1
[6.45117669 2.7        5.1        1.6       ]
Trying to modify feature 'sepal length (cm)'. New Prediction: 1
[6.45117669 2.841756   5.1        1.6       ]
Trying to modify feature 'sepal width (cm)'. New Prediction: 1
[6.45117669 2.841756   4.94401445 1.6       ]
Trying to modify feature 'petal length (cm)'. New Prediction: 1
[6.45117669 2.841756   4.94401445 2.09838619]
Trying to modify feature 'petal width (cm)'. New Prediction: 1

Original instance:
[6.  2.7 5.1 1.6]

Counterfactual instance:
[6.45117669 2.841756   4.94401445 2.09838619]
