# Counterfactuals
## Current model

### Load current model

In [1]:
import joblib


model = joblib.load(filename="../../saved_models/current_model.pkl")

### Load dataset

In [4]:
import numpy as np
import pandas as pd
import sys

import os
import pathlib

sys.path.append("../..")
from training.creating_dataset import load_and_preprocess_data

current_path = os.getcwd()
DATASETS_DIR = pathlib.Path(current_path).parent.parent / "pathfinder_2e_data"
DATASET_FILES = [
    "pathfinder-bestiary.db",
    "pathfinder-bestiary-2.db",
    "pathfinder-bestiary-3.db",
]
DATASET_PATHS = [f"{DATASETS_DIR}/{file}" for file in DATASET_FILES]
characteristics = [
    "cha",
    "con",
    "dex",
    "int",
    "str",
    "wis",
    "ac",
    "hp",
]

df = load_and_preprocess_data(
    DATASET_PATHS,
    characteristics=[
        "cha",
        "con",
        "dex",
        "int",
        "str",
        "wis",
        "ac",
        "hp",
    ],
)

In [5]:
df.head()

Unnamed: 0,book,str,wis,dex,hp,con,cha,level,ac,int
0,Pathfinder Bestiary,7,2,2,215,5,1,10,29,1
1,Pathfinder Bestiary,9,2,0,220,5,1,10,28,-4
2,Pathfinder Bestiary,7,3,3,175,6,-2,8,25,-4
3,Pathfinder Bestiary,7,5,4,315,5,6,17,41,6
4,Pathfinder Bestiary,-5,1,3,20,1,1,1,16,-1


In [6]:
from serving.backend.constants import ORDERED_CHARACTERISTICS


df = df[ORDERED_CHARACTERISTICS + ["book", "level"]]

In [7]:
df.head()

Unnamed: 0,cha,con,dex,int,str,wis,ac,hp,book,level
0,1,5,2,1,7,2,29,215,Pathfinder Bestiary,10
1,1,5,0,-4,9,2,28,220,Pathfinder Bestiary,10
2,-2,6,3,-4,7,3,25,175,Pathfinder Bestiary,8
3,6,5,4,6,7,5,41,315,Pathfinder Bestiary,17
4,1,1,3,-1,-5,1,16,20,Pathfinder Bestiary,1


### Counterfactuals

In [8]:
dataset = df.drop(columns=["book"])
continuous_features = dataset.drop(columns=["level"]).columns.tolist()

In [9]:
continuous_features

['cha', 'con', 'dex', 'int', 'str', 'wis', 'ac', 'hp']

In [10]:
dataset.head()

Unnamed: 0,cha,con,dex,int,str,wis,ac,hp,level
0,1,5,2,1,7,2,29,215,10
1,1,5,0,-4,9,2,28,220,10
2,-2,6,3,-4,7,3,25,175,8
3,6,5,4,6,7,5,41,315,17
4,1,1,3,-1,-5,1,16,20,1


In [11]:
# dice imports
import dice_ml
from dice_ml import Dice

In [13]:
d = dice_ml.Data(
    dataframe=dataset, continuous_features=continuous_features, outcome_name="level"
)

m = dice_ml.Model(model=model, backend="sklearn", model_type="regressor")

In [15]:
exp = Dice(d, m, method="genetic")

In [25]:
from serving.backend.calculate_level import calculate_level


query = dataset.drop(columns=["level"])[10:11]
query_dict = query.loc[10].to_dict()
query

Unnamed: 0,cha,con,dex,int,str,wis,ac,hp
10,-1,2,2,-4,3,1,17,17


In [22]:
dataset.loc[10]

cha      -1
con       2
dex       2
int      -4
str       3
wis       1
ac       17
hp       17
level     1
Name: 10, dtype: int64

In [26]:
level = calculate_level(monster_stats=query_dict, model=model)
level

1

In [27]:
genetic = exp.generate_counterfactuals(
    query, total_CFs=10, desired_range=[-1.33, 0.33], verbose=True
)

  0%|          | 0/1 [00:00<?, ?it/s]

Initializing initial parameters to the genetic algorithm...


100%|██████████| 1/1 [00:12<00:00, 12.62s/it]

Initialization complete! Generating counterfactuals...
Diverse Counterfactuals found! total time taken: 00 min 12 sec





In [28]:
genetic.visualize_as_dataframe(show_only_changes=True)

Query instance (original outcome : 1)


Unnamed: 0,cha,con,dex,int,str,wis,ac,hp,level
0,-1,2,2,-4,3,1,17,17,0.772387



Diverse Counterfactual set (new outcome: [-1.33, 0.33])


Unnamed: 0,cha,con,dex,int,str,wis,ac,hp,level
0,-,-,-,-,1.0,-,15.0,8.0,-1.0263326412076983
0,-4.0,-,-,-,1.0,-,15.0,4.0,-0.940142096408842
0,0.0,-,-,-5.0,-,2.0,16.0,15.0,0.0711599957826996
0,0.0,3.0,-,-1.0,-,-,15.0,15.0,-0.0132959055319367
0,0.0,-,3.0,-,0.0,-,15.0,7.0,-1.0497470056277742
0,-3.0,-,3.0,-,1.0,-,15.0,8.0,-0.8130331715727404
0,0.0,3.0,-,-,-,2.0,14.0,16.0,-0.0132678808624416
0,0.0,-,3.0,-,-1.0,-,16.0,16.0,0.3254938802151024
0,-2.0,0.0,3.0,-,-1.0,-,-,8.0,-0.7550590424250586
0,-4.0,-,-,-,1.0,-2.0,15.0,4.0,-0.947126954965424


In [29]:
js = genetic.to_json()

In [30]:
js

'{"test_data": [[[-1.0, 2.0, 2.0, -4.0, 3.0, 1.0, 17.0, 17.0, 0.7723868003696082]]], "cfs_list": [[[-1.0, 2.0, 2.0, -4.0, 1.0, 1.0, 15.0, 8.0, -1.0263326412076985], [-4.0, 2.0, 2.0, -4.0, 1.0, 1.0, 15.0, 4.0, -0.940142096408842], [0.0, 2.0, 2.0, -5.0, 3.0, 2.0, 16.0, 15.0, 0.0711599957826996], [0.0, 3.0, 2.0, -1.0, 3.0, 1.0, 15.0, 15.0, -0.013295905531936773], [0.0, 2.0, 3.0, -4.0, 0.0, 1.0, 15.0, 7.0, -1.0497470056277742], [-3.0, 2.0, 3.0, -4.0, 1.0, 1.0, 15.0, 8.0, -0.8130331715727404], [0.0, 3.0, 2.0, -4.0, 3.0, 2.0, 14.0, 16.0, -0.01326788086244167], [0.0, 2.0, 3.0, -4.0, -1.0, 1.0, 16.0, 16.0, 0.32549388021510245], [-2.0, 0.0, 3.0, -4.0, -1.0, 1.0, 17.0, 8.0, -0.7550590424250586], [-4.0, 2.0, 2.0, -4.0, 1.0, -2.0, 15.0, 4.0, -0.9471269549654241]]], "local_importance": null, "summary_importance": null, "data_interface": {"outcome_name": "level", "data_df": "dummy_data"}, "feature_names": ["cha", "con", "dex", "int", "str", "wis", "ac", "hp"], "feature_names_including_target": ["cha