# Model creation

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
import lime
import lime.lime_tabular
import keras
from lime_evaluator import LimeEval
import joblib

# disable user warnings
import warnings

warnings.filterwarnings("ignore")

## Dataset

In [2]:
df = pd.read_csv(
    "drug_consumption_3.txt",
    header=None,
    names=[
        "idx",
        "N-Score",
        "E-Score",
        "O-Score",
        "A-Score",
        "C-Score",
        "Impusiveness",
        "Sensation seeking",
        "Amphet",
        "Benzo",
        "Cannabis",
        "Heroin",
        "Ketamine",
        "Methadone",
        "Semeron",
    ],
)

df.head()

Unnamed: 0,idx,N-Score,E-Score,O-Score,A-Score,C-Score,Impusiveness,Sensation seeking,Amphet,Benzo,Cannabis,Heroin,Ketamine,Methadone,Semeron
0,1,0.31287,-0.57545,-0.58331,-0.91699,-0.00665,-0.21712,-1.18084,0,0,0,0,0,0,0
1,2,-0.67825,1.93886,1.43533,0.76096,-0.14277,-0.71126,-0.21575,0,0,1,0,0,0,0
2,3,-0.46725,0.80523,-0.84732,-1.6209,-1.0145,-1.37983,0.40148,0,0,0,0,0,0,0
3,4,-0.14882,-0.80615,-0.01928,0.59042,0.58489,-1.37983,-1.18084,0,0,0,0,0,0,0
4,5,0.73545,-1.6334,-0.45174,-0.30172,1.30612,-0.21712,-0.21575,0,0,0,0,0,0,0


## Models

In [3]:
# load all randomforest joblib models
models = {}
metrics = {}

model = joblib.load("./models/random_forest_cannabis.joblib")

model

ValueError: node array from the pickle has an incompatible dtype:
- expected: [('left_child', '<i8'), ('right_child', '<i8'), ('feature', '<i8'), ('threshold', '<f8'), ('impurity', '<f8'), ('n_node_samples', '<i8'), ('weighted_n_node_samples', '<f8')]
- got     : {'names': ['left_child', 'right_child', 'feature', 'threshold', 'impurity', 'n_node_samples', 'weighted_n_node_samples', 'missing_go_to_left'], 'formats': ['<i8', '<i8', '<i8', '<f8', '<f8', '<i8', '<f8', 'u1'], 'offsets': [0, 8, 16, 24, 32, 40, 48, 56], 'itemsize': 64}

# LIME   

In [None]:
feature_names = [
    "N-Score",
    "E-Score",
    "O-Score",
    "A-Score",
    "C-Score",
    "Impusiveness",
    "Sensation seeking",
]
# X = df[feature_names]

In [None]:
def get_lime(model, x_train, x_test):
    explainer = lime.lime_tabular.LimeTabularExplainer(
        x_train.values,
        feature_names=x_train.columns,
        class_names=["No", "Yes"],
        discretize_continuous=True,
    )

    explanation = explainer.explain_instance(x_test.values[0], model.predict_proba)
    return explanation, explainer

In [None]:
stab = []

# Get train and test data splits, stratisfy for target.
target_train_df, target_test_df = train_test_split(
    df, train_size=0.8, shuffle=True, stratify=df["Cannabis"], random_state=0
)
# Get input and target from the data split.
target_x_train = target_train_df.iloc[:, 1:8]
target_x_test = target_test_df.iloc[:, 1:8]

lime_explanations, explainer = get_lime(model, target_x_train, target_x_test)
print(f"Target: cann")
lime_explanations.show_in_notebook(show_table=True)

stab.append(
    LimeEval.evaluate_stability(target_x_test.values[0], model.predict_proba, explainer)
)
print(f"Target: cann done")

LIME STABILITY

In [None]:
print(f"Target: cannabis")
df0mean = (abs(df)).mean(axis=1)
df0mean = pd.DataFrame(df0mean, columns=["mean"])
for column in df0mean.columns:
    print(f"    Mean: {df0mean[column].mean()}")
    print(f"    Std: {df0mean[column].std()}")
    print(f"    Var: {df0mean[column].var()}")