In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder

# Load the Red Wine Quality dataset with the correct delimiter
data = pd.read_csv("winequality-red.csv", delimiter=';')  # Change delimiter to ';'

# Print the first few rows and the column names to verify
print(data.head())
print(data.columns)

# Optional: Clean column names (remove extra spaces or quotes)
data.columns = data.columns.str.strip().str.replace('"', '', regex=False)

# Separate features and target
X = data.drop('quality', axis=1)  # Adjust if 'quality' is not the correct name
y = data['quality']  # Adjust if 'quality' is not the correct name

# Encode the target variable (quality) if necessary
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train a Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Function to generate a counterfactual manually
def generate_counterfactual(instance, model, desired_class, feature_names):
    counterfactual = instance.copy()
    
    # Convert instance to a DataFrame to retain feature names
    instance_df = pd.DataFrame([instance], columns=feature_names)
    
    # Predict the class of the original instance
    original_prediction = model.predict(instance_df)[0]
    print(f"Original Prediction: {original_prediction}")
    
    # Try to flip the prediction by changing one feature at a time
    for i, feature in enumerate(instance):
        if original_prediction != desired_class:
            # Perturb the feature to see if it changes the prediction
            counterfactual[i] = feature + np.random.uniform(-0.5, 0.5)  # Adjust the perturbation range if needed
            
            # Convert counterfactual back to DataFrame for prediction
            counterfactual_df = pd.DataFrame([counterfactual], columns=feature_names)
            new_prediction = model.predict(counterfactual_df)[0]
            
            print(f"Trying to modify feature '{feature_names[i]}'. New Prediction: {new_prediction}")
            
            # If the new prediction is the desired class, stop and return the counterfactual
            if new_prediction == desired_class:
                print(f"Counterfactual found by changing '{feature_names[i]}'.")
                break
        else:
            break
    
    return counterfactual

# Select an instance from the test set to explain (e.g., the first one)
instance = X_test.iloc[0].values
desired_class = 1 if model.predict(pd.DataFrame([instance], columns=X.columns))[0] == 0 else 0  # Flip the prediction

# Generate a counterfactual that flips the prediction
counterfactual = generate_counterfactual(instance, model, desired_class, X.columns)

# Show original and counterfactual instances
print("\nOriginal instance:")
print(instance)
print("\nCounterfactual instance:")
print(counterfactual)


   fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
0            7.4              0.70         0.00             1.9      0.076   
1            7.8              0.88         0.00             2.6      0.098   
2            7.8              0.76         0.04             2.3      0.092   
3           11.2              0.28         0.56             1.9      0.075   
4            7.4              0.70         0.00             1.9      0.076   

   free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
0                 11.0                  34.0   0.9978  3.51       0.56   
1                 25.0                  67.0   0.9968  3.20       0.68   
2                 15.0                  54.0   0.9970  3.26       0.65   
3                 17.0                  60.0   0.9980  3.16       0.58   
4                 11.0                  34.0   0.9978  3.51       0.56   

   alcohol  quality  
0      9.4        5  
1      9.8        5  
2      9.8        5 