##### Imports

In [4]:
import asvFormula.experiments as exp
from asvFormula.bayesianNetworks import networkSamplesPath
from asvFormula import asvRunResultsPath
from asvFormula.datasetManipulation import *
from asvFormula.digraph import hasUnderlyingTree
from pgmpy.readwrite import BIFReader
from pgmpy.inference import VariableElimination

from importlib import reload 
import asvFormula
import asvFormula.experiments as exp

exp = reload(asvFormula.experiments)

#TODO: Fix the seed and the random so that I can reproduce the results. Right now it's not changing anything the seed method. 

### Running ASV for Cancer bayesian network

#### Train the decision tree model

In [5]:
cancerNetworkPath = networkSamplesPath + "/cancer.bif"
BNmodel = BIFReader(cancerNetworkPath).get_model()
variableToPredict = "Pollution"
numberOfSamples = 2000
treeMaxDepth = 5

BNInference, valuesPerFeature, encodedDataset, dtTreeClassifier, dtAsNetwork = initializeDataAndRemoveVariable(BNmodel, variableToPredict, numberOfSamples, treeMaxDepth)

  0%|          | 0/5 [00:00<?, ?it/s]

The model accuracy is : 0.8925


#### Mean prediction of model

In [6]:
dataNoPrediction = encodedDataset.drop(variableToPredict, axis=1)
first_instance = dataNoPrediction.iloc[0]

asvCalc = exp.ASV(BNmodel, dtTreeClassifier, BNInference, valuesPerFeature, variableToPredict, 'Exact', first_instance)

# Use the complete bayesian network to calculate the mean prediction
completeBNModel = BIFReader(cancerNetworkPath).get_model()
num_variables = len(completeBNModel.nodes())

completeBNInference = VariableElimination(completeBNModel)

exp.showMeanPredictionOfModel(variableToPredict, completeBNInference, valuesPerFeature, dtTreeClassifier, asvCalc, num_variables)

Mean prediction of model for the variable Pollution
Mean prediction value for the decision tree: [0.00203   0.9986805], it took 0.008713950001038029 seconds
Mean prediction value for the probabilities of the decision tree: [0.09486323 0.90513677], it took 0.008713950001038029 seconds
Mean prediction value for possible values of the dataset: [0.007350000000000002, 0.9926500000000001], it took 0.015301499999623047 seconds
Estimated value for shap explainer: [0.09625 0.90375]
Probabilities of the variable in the bayesian network: [0.1, 0.9]


### Running ASV for Child bayesian network

#### Train the decision tree model

In [7]:
childNetworkPath = networkSamplesPath + "/child.bif"

def childBNAsTree(childNetworkPath):
    treeBNChild = BIFReader(childNetworkPath).get_model()

    #I remove this edges so that it is a tree and we can work with it
    removeEdgeAndMarginalizeCPD(treeBNChild, 'LungParench', 'Grunting')
    removeEdgeAndMarginalizeCPD(treeBNChild, 'LungParench', 'HypoxiaInO2')
    removeEdgeAndMarginalizeCPD(treeBNChild, 'HypoxiaInO2', 'LowerBodyO2')
    removeEdgeAndMarginalizeCPD(treeBNChild, 'CardiacMixing', 'HypDistrib')
    removeEdgeAndMarginalizeCPD(treeBNChild, 'Sick', 'Age')
    removeEdgeAndMarginalizeCPD(treeBNChild, 'LungFlow', 'ChestXray')

    assert hasUnderlyingTree(treeBNChild)
    return treeBNChild

treeBNChild = childBNAsTree(childNetworkPath)
variableToPredict = "Age"
numberOfSamples = 10000
treeMaxDepth = 7

BNInference, valuesPerFeature, encodedDataset, dtTreeClassifier, dtAsNetwork = initializeDataAndRemoveVariable(treeBNChild, variableToPredict, numberOfSamples, treeMaxDepth)


  0%|          | 0/20 [00:00<?, ?it/s]



The model accuracy is : 0.68


#### Mean prediction of model

In [8]:
dataNoPrediction = encodedDataset.drop(variableToPredict, axis=1)
first_instance = dataNoPrediction.iloc[0]

asvCalc = exp.ASV(treeBNChild, dtTreeClassifier, BNInference, valuesPerFeature, variableToPredict, 'Exact', first_instance)

# Use the complete bayesian network to calculate the mean prediction
completeBNModel = BIFReader(childNetworkPath).get_model()
completeBNInference = VariableElimination(completeBNModel)
num_variables = len(completeBNModel.nodes())

exp.showMeanPredictionOfModel(variableToPredict, completeBNInference, valuesPerFeature, dtTreeClassifier, asvCalc, 11)

#The prediction for the first instance is 0, so if a lot of the features are fixed, then there is a higher chance that the prediction is 0.


Mean prediction of model for the variable Age
Mean prediction value for the decision tree: [0.99781561 0.         0.00218439], it took 0.025556358999892836 seconds
Mean prediction value for the probabilities of the decision tree: [0.76774309 0.06836589 0.16389102], it took 0.025556358999892836 seconds
Mean prediction value for possible values of the dataset: [0.9947846567967703, 0.0, 0.005215343203230149], it took 380.02609960599875 seconds
Estimated value for shap explainer: [0.691125 0.13775  0.171125]
Probabilities of the variable in the bayesian network: [0.6489918355500001, 0.17148510286900004, 0.17952306158100004]


#### ASV + Shapley

In [9]:
asvCalc = exp.ASV(treeBNChild, dtTreeClassifier, BNInference, valuesPerFeature, variableToPredict, predictionFunction = 'Mean')

dataNoPrediction = encodedDataset.drop(variableToPredict, axis=1)
first_instance = dataNoPrediction.iloc[0]

exp.writeASVAndShapleyIntoFile(first_instance, list(dataNoPrediction.columns), dtTreeClassifier, asvCalc, asvRunResultsPath + "/childASVAndShapleyMean.csv", valuesPerFeature, variableToPredict)

AttributeError: 'DecisionTreeDigraph' object has no attribute 'nodeMeanPrediction'

In [None]:
asvCalc = exp.ASV(treeBNChild, dtTreeClassifier, BNInference, valuesPerFeature, variableToPredict, predictionFunction='Exact')

dataNoPrediction = encodedDataset.drop(variableToPredict, axis=1)
first_instance = dataNoPrediction.iloc[0]

exp.writeASVAndShapleeyIntoFile(first_instance, list(dataNoPrediction.columns), dtTreeClassifier, asvCalc, asvRunResultsPath + "/childASVAndShapleyExact.csv", valuesPerFeature, variableToPredict)

#### Mean Prediction

In [None]:
# Use the complete bayesian network to calculate the mean prediction
completeBNModel = BIFReader(childNetworkPath).get_model()
completeBNInference = VariableElimination(completeBNModel)

In [None]:
exp.showMeanPredictionOfModel(variableToPredict, completeBNInference, valuesPerFeature, dtTreeClassifier)

In [None]:
dtAsNetwork = exp.obtainDecisionTreeDigraph(dtTreeClassifier, dataNoPrediction.columns)
#exp.drawDecisionTree(dtAsNetwork)
#exp.drawGraph(treeBNChild)