In [1]:
import numpy as np
import pandas as pd
from pgmpy.readwrite import XMLBIFReader
import random
import copy
from sklearn.model_selection import train_test_split

# Settings
from discriminativeBN import DiscriminativeBN
from simplexMethod import NelderMeadOptimizer
from hookeJeeves import HookeJeevesOptimizer
from kfold_runner import k_fold_validation
from kfold_runner_lbfgs import k_fold_validation_gradient
from lbfgs import LBFGSOptimizerWrapper

  from .autonotebook import tqdm as notebook_tqdm


# Demo of the discriminative learning with all methods
We will be using autralian disease dataset

In [2]:

xml_file = "models/australianTAN.xml" 
csv_file = "datasets/AustralianDisc.csv" 
target_var = "A15"  

In [3]:
bn = DiscriminativeBN(xml_file, target_var)
df = pd.read_csv(csv_file)

train_df, test_df = train_test_split(
    df,
    test_size=0.2,   # 20% for testing
    random_state=42, # for reproducibility
    shuffle=True
)


def objective(betas):
    cll = bn.calculate_cll(betas, train_df)
    return -cll # Negative because we minimize
    
np.random.seed(42)
start_betas = np.random.normal(0, 0.1, bn.total_params)

--- Starting Hooke-Jeeves Optimization (Dim: 118) ---
393.89707220922975
Initial Score: -393.8971 (CLL)
------------------------------
------------------------------
Optimization Finished. Best Score found: -155.9564

Optimization Complete.
Final Conditional Log Likelihood: -211.6407


## Hooke Jeeves Pattern search

In [None]:
optimizer = HookeJeevesOptimizer(objective, dim=bn.total_params, max_iter=5) # Low iters for demo
best_betas = optimizer.optimize(start_betas)

# Result
print("\nOptimization Complete.")
final_cll = bn.calculate_cll(best_betas, df)
print(f"Final Conditional Log Likelihood: {final_cll:.4f}")

In [4]:
# Show one resulting table
final_tables = bn.betas_to_probabilities(best_betas)
print(f"\nLearned Table for {target_var}:")
print(final_tables[target_var])


Learned Table for A15:
[[0.44112357 0.55887643]]


In [9]:
from sklearn.metrics import accuracy_score, confusion_matrix
from get_accuracy import get_accuracy, calculate_accuracy
# Run prediction
accuracy, conf_matrix, predictions = calculate_accuracy(bn, final_tables, test_df)

print(f"Accuracy: {accuracy * 100:.2f}%")
print("\nConfusion Matrix:")
print(conf_matrix)

# Example of a single prediction comparison
print(f"\nRow 0 Actual: {df.iloc[0][bn.target_col]}")
print(f"Row 0 Pred  : {predictions[0]}")

Accuracy: 86.23%

Confusion Matrix:
[[77 10]
 [ 9 42]]

Row 0 Actual: A
Row 0 Pred  : B


## Nelder Mead (Simplex)

In [10]:
optimizer_nel = NelderMeadOptimizer(objective, dim=bn.total_params, max_iter=5) # Low iters for demo
best_betas_nelder = optimizer_nel.optimize(start_betas)

# Result
print("\nOptimization Complete.")
final_cll_nel = bn.calculate_cll(best_betas, train_df)
print(f"Final Conditional Log Likelihood: {final_cll:.4f}")

--- Starting Simplex Optimization (Dim: 118) ---
Initial Best Score: -390.4083 (CLL)
Initial Worst Score: -397.4935 (CLL)
------------------------------
------------------------------
Optimization Finished. Best Score found: -386.9415

Optimization Complete.
Final Conditional Log Likelihood: -211.6407


In [11]:
# Show one resulting table
final_tables_nel = bn.betas_to_probabilities(best_betas_nelder)
print(f"\nLearned Table for {target_var}:")
print(final_tables[target_var])


Learned Table for A15:
[[0.44112357 0.55887643]]


In [12]:
from sklearn.metrics import accuracy_score, confusion_matrix
from get_accuracy import get_accuracy, calculate_accuracy
# Run prediction
accuracy, conf_matrix, predictions = calculate_accuracy(bn, final_tables_nel, test_df)

print(f"Accuracy: {accuracy * 100:.2f}%")
print("\nConfusion Matrix:")
print(conf_matrix)

# Example of a single prediction comparison
print(f"\nRow 0 Actual: {df.iloc[0][bn.target_col]}")
print(f"Row 0 Pred  : {predictions[0]}")

Accuracy: 50.00%

Confusion Matrix:
[[48 39]
 [30 21]]

Row 0 Actual: A
Row 0 Pred  : B


## L-BFGS

In [16]:
gradient_func = lambda betas: bn.calculate_gradient(betas, train_df)
op_lbfgs = LBFGSOptimizerWrapper(func = objective, gradient_func= gradient_func ,dim=bn.total_params, max_iter=100)
best_betas_lbfgs = op_lbfgs.optimize(start_betas)

--- Starting L-BFGS Optimization (Dim: 118) ---
Initial Best Score: -393.8971 (CLL)
------------------------------
------------------------------
Optimization Success!
Total Iterations: 9
Final Best Score: -153.7694 (CLL)


In [17]:
# Result
print("\nOptimization Complete.")
final_cll_lbfgs = bn.calculate_cll(best_betas_lbfgs, train_df)
print(f"Final Conditional Log Likelihood: {final_cll_lbfgs:.4f}")


Optimization Complete.
Final Conditional Log Likelihood: -153.7694


In [18]:
# Show one resulting table
final_tables_lbfgs = bn.betas_to_probabilities(best_betas_lbfgs)
print(f"\nLearned Table for {target_var}:")
print(final_tables_lbfgs[target_var])


Learned Table for A15:
[[0.50504311 0.49495689]]


In [19]:
from sklearn.metrics import accuracy_score, confusion_matrix
from get_accuracy import get_accuracy, calculate_accuracy
# Run prediction
accuracy, conf_matrix, predictions = calculate_accuracy(bn, final_tables_lbfgs, test_df)

print(f"Accuracy: {accuracy * 100:.2f}%")
print("\nConfusion Matrix:")
print(conf_matrix)

# Example of a single prediction comparison
print(f"\nRow 0 Actual: {df.iloc[0][bn.target_col]}")
print(f"Row 0 Pred  : {predictions[0]}")

Accuracy: 89.13%

Confusion Matrix:
[[82  5]
 [10 41]]

Row 0 Actual: A
Row 0 Pred  : B
