In [1]:
import matplotlib.pyplot as plt
import numpy as np
from scipy.spatial import Voronoi, voronoi_plot_2d
from sklearn import datasets

from dbc.main import KmeansDiscreteBayesianClassifier, KmeansDiscreteMinimaxClassifier, \
    CmeansDiscreteBayesianClassifier, DecisionTreeDiscreteBayesianClassifier, DecisionTreeDiscreteMinimaxClassifier, DiscriminativeDiscreteBayesianClassifier, DiscriminativeMinmaxClassifier
from dbc.utils import compute_conditional_risk

from sklearn.metrics import accuracy_score

In [2]:
# Generate data
X_train, y_train = datasets.make_blobs(n_samples=[125 * 5, 125 * 2], n_features=2, centers=[(9.5, 10), (10, 9.4)],
                                       cluster_std=[[0.6, 0.6], [0.35, 0.3]], shuffle=True)

In [3]:
# Initialize the Kmeans Discrete Bayesian Classifier with 10 clusters and a set random state for reproducibility
DBC_kmeans = DiscriminativeDiscreteBayesianClassifier(n_clusters=15, n_epochs=300)

# Fit the classifier using the training data
DBC_kmeans.fit(X_train, y_train)

# Predict the labels for the training data
y_pred = DBC_kmeans.predict(X_train)

# Compute the conditional risk based on the true labels and predicted labels
conditional_risk = compute_conditional_risk(y_train, y_pred)
print(f'Class condition risk: \n{conditional_risk[0]}')
print(f'\nConfusion matrix: \n{conditional_risk[1]}')

RuntimeError: Expected all tensors to be on the same device, but got mat1 is on cuda:0, different from other tensors on cpu (when checking argument in method wrapper_CUDA_addmm)

In [None]:
# Plot decision boundaries and probability contours for DBC, PDBC with hard clustering, and PDBC with soft clustering

fig, ax = plt.subplots(1, 3, figsize=(18, 5))
x_min, x_max = X_train[:, 0].min() - 1, X_train[:, 0].max() + 1
y_min, y_max = X_train[:, 1].min() - 1, X_train[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01),
                     np.arange(y_min, y_max, 0.01))

Z1 = DBC_kmeans.predict_prob(np.c_[xx.ravel(), yy.ravel()])

Z1 = Z1[:, 1].reshape(xx.shape)


class0_indices = (y_train == 0)
class1_indices = (y_train == 1)
for i in range(3):
    ax[i].scatter(X_train[class0_indices, 0], X_train[class0_indices, 1], color='mediumblue')
    ax[i].scatter(X_train[class1_indices, 0], X_train[class1_indices, 1], color='firebrick')

# contour0 = ax[0].contourf(xx, yy, Z0, alpha=0.7, cmap='coolwarm', levels=np.linspace(0, 1, 11))
contour1 = ax[1].contourf(xx, yy, Z1, alpha=0.7, cmap='coolwarm', levels=np.linspace(0, 1, 11))
# contour2 = ax[2].contourf(xx, yy, Z2, alpha=0.7, cmap='coolwarm', levels=np.linspace(0, 1, 11))

decision_boundary_prob = 0.5

# boundary0 = ax[0].contour(xx, yy, Z0, levels=[decision_boundary_prob], colors='purple')
boundary1 = ax[1].contour(xx, yy, Z1, levels=[decision_boundary_prob], colors='purple')
# boundary2 = ax[2].contour(xx, yy, Z2, levels=[decision_boundary_prob], colors='purple')

# cbar = fig.colorbar(contour2, ax=ax, orientation='vertical')
# cbar.set_label('Probability of class 1')

ax[0].set_title('DBC', fontsize=14)
ax[1].set_title('PDBC with hard clustering', fontsize=14)
ax[2].set_title('PDBC with soft clustering', fontsize=14)

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import torch
DBC_kmeans.discretization_model.eval()
DBC_kmeans.discretization_model.cpu()
with torch.no_grad():
    # 创建网格
    padding=0.1
    x_min, x_max = X_train[:, 0].min() - padding, X_train[:, 0].max() + padding
    y_min, y_max = X_train[:, 1].min() - padding, X_train[:, 1].max() + padding
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),  # 可以调整步长来控制分辨率
                         np.arange(y_min, y_max, 0.02))

    # 将网格点转换为张量
    grid_points = torch.tensor(np.c_[xx.ravel(), yy.ravel()], dtype=torch.float32)

    # 预测网格点的Profile Assignment
    _, q_z_given_x_grid, _ = DBC_kmeans.discretization_model(grid_points).cpu().detach()
    Z_grid_1 = torch.argmax(q_z_given_x_grid, dim=1)
    Z_grid_1 = Z_grid_1.reshape(xx.shape)

    # 预测训练数据的Profile Assignment用于对比
    _, q_z_given_x, _ = DBC_kmeans.discretization_model(X_train)
    Z_val_1 = torch.argmax(q_z_given_x, dim=1)

plt.figure(figsize=(8, 8))
# 使用contourf绘制颜色区域
plt.contourf(xx, yy, Z_grid_1, levels=np.arange(DBC_kmeans.discretization_model.num_profiles + 1) - 0.5,
             cmap="tab10", alpha=0.6)
# 在上面叠加训练数据点
# plt.scatter(X_train[:, 0], X_train[:, 1], c=Z_val_1, cmap="tab10",
#            edgecolors='black', linewidth=0.5, s=20)
# 在上面叠加训练数据点
class_0_mask = (y_train == 0)
class_1_mask = (y_train == 1)
plt.scatter(X_train[class_0_mask, 0], X_train[class_0_mask, 1],
           c='red', edgecolors='black', linewidth=0.5, s=20, label='Class 0')
plt.scatter(X_train[class_1_mask, 0], X_train[class_1_mask, 1],
           c='skyblue', edgecolors='black', linewidth=0.5, s=20, label='Class 1')
plt.title("Learned Profile Assignment (Z) - Space Partition")
plt.xlabel("X1")
plt.ylabel("X2")
# plt.colorbar(label='Profile')
plt.show()

In [None]:
# Initialize the Kmeans Discrete Bayesian Classifier with 10 clusters and a set random state for reproducibility
DBC_kmeans = DiscriminativeMinmaxClassifier(n_clusters=15, random_state=25,n_epochs=300)

# Fit the classifier using the training data
DBC_kmeans.fit(X_train, y_train)

# Predict the labels for the training data
y_pred = DBC_kmeans.predict(X_train)

# Compute the conditional risk based on the true labels and predicted labels
conditional_risk = compute_conditional_risk(y_train, y_pred)
print(f'Class condition risk: \n{conditional_risk[0]}')
print(f'\nConfusion matrix: \n{conditional_risk[1]}')

In [None]:
from dbc.main import CmeansDiscreteMinmaxClassifier
# Initialize the Kmeans Discrete Bayesian Classifier with 10 clusters and a set random state for reproducibility
DBC_kmeans = CmeansDiscreteMinmaxClassifier(n_clusters=15, use_kmeans=True,fuzzifier=1.5)

# Fit the classifier using the training data
DBC_kmeans.fit(X_train, y_train)

# Predict the labels for the training data
y_pred = DBC_kmeans.predict(X_train)

# Compute the conditional risk based on the true labels and predicted labels
conditional_risk = compute_conditional_risk(y_train, y_pred)
print(f'Class condition risk: \n{conditional_risk[0]}')
print(f'\nConfusion matrix: \n{conditional_risk[1]}')

In [None]:
# Initialize the Kmeans Discrete Bayesian Classifier with 10 clusters and a set random state for reproducibility
DBC_kmeans = KmeansDiscreteBayesianClassifier(n_clusters=15, random_state=25)

# Fit the classifier using the training data
DBC_kmeans.fit(X_train, y_train)

# Predict the labels for the training data
y_pred = DBC_kmeans.predict(X_train)

# Compute the conditional risk based on the true labels and predicted labels
conditional_risk = compute_conditional_risk(y_train, y_pred)
print(f'Class condition risk: \n{conditional_risk[0]}')
print(f'\nConfusion matrix: \n{conditional_risk[1]}')

In [None]:
accuracy_score(y_train, y_pred)

In [None]:
# Predict the probability distribution over classes for each sample in the training data
DBC_kmeans.predict_prob(X_train)

In [None]:
# Initialize the C-means Discrete Bayesian Classifier with 10 clusters
# Set the fuzzifier to 1.5 and use the cluster centers from the DBC_kmeans classifier
DBC_fcm = CmeansDiscreteBayesianClassifier(n_clusters=15, fuzzifier=1.5, cluster_centers=DBC_kmeans.cluster_centers, random_state=25)
# DBC_fcm = CmeansDiscreteBayesianClassifier(n_clusters=15, fuzzifier=1.5)

# Fit the classifier using the training data
DBC_fcm.fit(X_train, y_train)

# Predict the labels for the training data
y_pred = DBC_fcm.predict(X_train)

# Compute and return the conditional risk based on the true labels and predicted labels
conditional_risk = compute_conditional_risk(y_train, y_pred)
print(f'Class condition risk: \n{conditional_risk[0]}')
print(f'\nConfusion matrix: \n{conditional_risk[1]}')

In [None]:
# Predict the probability distribution over classes for each sample in the training data
DBC_fcm.predict_prob(X_train)

In [None]:
# Plot decision boundaries and probability contours for DBC, PDBC with hard clustering, and PDBC with soft clustering

fig, ax = plt.subplots(1, 3, figsize=(18, 5))
x_min, x_max = X_train[:, 0].min() - 1, X_train[:, 0].max() + 1
y_min, y_max = X_train[:, 1].min() - 1, X_train[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01),
                     np.arange(y_min, y_max, 0.01))

vor = Voronoi(DBC_kmeans.cluster_centers)
voronoi_plot_2d(vor, show_points=False, show_vertices=False, s=1, ax=ax[0])
voronoi_plot_2d(vor, show_points=False, show_vertices=False, s=1, ax=ax[1])
voronoi_plot_2d(vor, show_points=False, show_vertices=False, s=1, ax=ax[2])

Z1 = DBC_kmeans.predict_prob(np.c_[xx.ravel(), yy.ravel()])

Z0 = np.zeros_like(Z1)

max_indices = np.argmax(Z1, axis=1)

rows = np.arange(Z1.shape[0])

Z0[rows, max_indices] = 1.0

Z1 = Z1[:, 1].reshape(xx.shape)
Z0 = Z0[:, 1].reshape(xx.shape)

Z2 = DBC_fcm.predict_prob(np.c_[xx.ravel(), yy.ravel()])
Z2 = Z2[:, 1].reshape(xx.shape)

class0_indices = (y_train == 0)
class1_indices = (y_train == 1)
for i in range(3):
    ax[i].scatter(X_train[class0_indices, 0], X_train[class0_indices, 1], color='mediumblue')
    ax[i].scatter(X_train[class1_indices, 0], X_train[class1_indices, 1], color='firebrick')

contour0 = ax[0].contourf(xx, yy, Z0, alpha=0.7, cmap='coolwarm', levels=np.linspace(0, 1, 11))
contour1 = ax[1].contourf(xx, yy, Z1, alpha=0.7, cmap='coolwarm', levels=np.linspace(0, 1, 11))
contour2 = ax[2].contourf(xx, yy, Z2, alpha=0.7, cmap='coolwarm', levels=np.linspace(0, 1, 11))

decision_boundary_prob = 0.5

boundary0 = ax[0].contour(xx, yy, Z0, levels=[decision_boundary_prob], colors='purple')
boundary1 = ax[1].contour(xx, yy, Z1, levels=[decision_boundary_prob], colors='purple')
boundary2 = ax[2].contour(xx, yy, Z2, levels=[decision_boundary_prob], colors='purple')

cbar = fig.colorbar(contour2, ax=ax, orientation='vertical')
cbar.set_label('Probability of class 1')

ax[0].set_title('DBC', fontsize=14)
ax[1].set_title('PDBC with hard clustering', fontsize=14)
ax[2].set_title('PDBC with soft clustering', fontsize=14)

In [None]:
# Plot decision boundaries and probability contours for DBC, PDBC with hard clustering, and PDBC with soft clustering

fig, ax = plt.subplots(1, 2, figsize=(12, 5))
x_min, x_max = X_train[:, 0].min() - 1, X_train[:, 0].max() + 1
y_min, y_max = X_train[:, 1].min() - 1, X_train[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01),
                     np.arange(y_min, y_max, 0.01))

vor = Voronoi(DBC_kmeans.cluster_centers)
voronoi_plot_2d(vor, show_points=False, show_vertices=False, s=1, ax=ax[0])
voronoi_plot_2d(vor, show_points=False, show_vertices=False, s=1, ax=ax[1])

Z1 = DBC_kmeans.predict_prob(np.c_[xx.ravel(), yy.ravel()])

Z0 = np.zeros_like(Z1)

max_indices = np.argmax(Z1, axis=1)

rows = np.arange(Z1.shape[0])

Z0[rows, max_indices] = 1.0

Z1 = Z1[:, 1].reshape(xx.shape)
Z0 = Z0[:, 1].reshape(xx.shape)

Z2 = DBC_fcm.predict_prob(np.c_[xx.ravel(), yy.ravel()])
Z2 = Z2[:, 1].reshape(xx.shape)

class0_indices = (y_train == 0)
class1_indices = (y_train == 1)
for i in range(2):
    ax[i].scatter(X_train[class0_indices, 0], X_train[class0_indices, 1], color='mediumblue')
    ax[i].scatter(X_train[class1_indices, 0], X_train[class1_indices, 1], color='firebrick')
    ax[i].set_xticks([])
    ax[i].set_yticks([])

contour0 = ax[0].contourf(xx, yy, Z0, alpha=0.7, cmap='coolwarm', levels=np.linspace(0, 1, 11))
contour1 = ax[1].contourf(xx, yy, Z2, alpha=0.7, cmap='coolwarm', levels=np.linspace(0, 1, 11))


decision_boundary_prob = 0.5

boundary0 = ax[0].contour(xx, yy, Z0, levels=[decision_boundary_prob], colors='purple')
boundary1 = ax[1].contour(xx, yy, Z2, levels=[decision_boundary_prob], colors='purple')

cbar = fig.colorbar(contour2, ax=ax, orientation='vertical')
cbar.set_label('Probability of red class')

ax[0].set_title('DBC', fontsize=14)
ax[1].set_title('SPDBC', fontsize=14)

plt.savefig("DBC_SPDBC.pdf",dpi=300, bbox_inches="tight",transparent=True)


In [None]:
DMC_kmeans = KmeansDiscreteMinimaxClassifier(n_clusters=15, random_state=25)
DMC_kmeans.fit(X_train, y_train)
y_pred = DMC_kmeans.predict(X_train)
conditional_risk = compute_conditional_risk(y_train, y_pred)
print(f'Class condition risk: \n{conditional_risk[0]}')
print(f'\nConfusion matrix: \n{conditional_risk[1]}')

In [None]:
fig, ax = plt.subplots(1, 3, figsize=(18, 5))
x_min, x_max = X_train[:, 0].min() - 1, X_train[:, 0].max() + 1
y_min, y_max = X_train[:, 1].min() - 1, X_train[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01),
                     np.arange(y_min, y_max, 0.01))

vor = Voronoi(DMC_kmeans.cluster_centers)
voronoi_plot_2d(vor, show_points=False, show_vertices=False, s=1, ax=ax[0])
voronoi_plot_2d(vor, show_points=False, show_vertices=False, s=1, ax=ax[1])
voronoi_plot_2d(vor, show_points=False, show_vertices=False, s=1, ax=ax[2])

Z1 = DMC_kmeans.predict_prob(np.c_[xx.ravel(), yy.ravel()])

Z0 = np.zeros_like(Z1)

max_indices = np.argmax(Z1, axis=1)

rows = np.arange(Z1.shape[0])

Z0[rows, max_indices] = 1.0

Z1 = Z1[:, 1].reshape(xx.shape)
Z0 = Z0[:, 1].reshape(xx.shape)

Z2 = DBC_fcm.predict_prob(np.c_[xx.ravel(), yy.ravel()], prior_pred=DMC_kmeans.prior_star)
Z2 = Z2[:, 1].reshape(xx.shape)

class0_indices = (y_train == 0)
class1_indices = (y_train == 1)
for i in range(3):
    ax[i].scatter(X_train[class0_indices, 0], X_train[class0_indices, 1], color='mediumblue')
    ax[i].scatter(X_train[class1_indices, 0], X_train[class1_indices, 1], color='firebrick')

contour0 = ax[0].contourf(xx, yy, Z0, alpha=0.7, cmap='coolwarm', levels=np.linspace(0, 1, 11))
contour1 = ax[1].contourf(xx, yy, Z1, alpha=0.7, cmap='coolwarm', levels=np.linspace(0, 1, 11))
contour2 = ax[2].contourf(xx, yy, Z2, alpha=0.7, cmap='coolwarm', levels=np.linspace(0, 1, 11))

decision_boundary_prob = 0.5

boundary0 = ax[0].contour(xx, yy, Z0, levels=[decision_boundary_prob], colors='purple')
boundary1 = ax[1].contour(xx, yy, Z1, levels=[decision_boundary_prob], colors='purple')
boundary2 = ax[2].contour(xx, yy, Z2, levels=[decision_boundary_prob], colors='purple')

cbar = fig.colorbar(contour2, ax=ax, orientation='vertical')
cbar.set_label('Probability of class 1')

ax[0].set_title('DMC', fontsize=14)
ax[1].set_title('PDMC with hard clustering', fontsize=14)
ax[2].set_title('PDMC with soft clustering', fontsize=14)

In [None]:
DBC_DT = DecisionTreeDiscreteBayesianClassifier(max_depth=10)
DBC_DT.fit(X_train, y_train)
y_pred = DBC_DT.predict(X_train, prior_pred=DBC_DT.prior)
conditional_risk = compute_conditional_risk(y_train, y_pred)
print(f'Class condition risk: \n{conditional_risk[0]}')
print(f'\nConfusion matrix: \n{conditional_risk[1]}')

In [None]:
accuracy_score(y_train, y_pred)

In [None]:
DMC_DT = DecisionTreeDiscreteMinimaxClassifier(max_depth=10)
DMC_DT.fit(X_train, y_train)
y_pred = DMC_DT.predict(X_train)
conditional_risk = compute_conditional_risk(y_train, y_pred)
print(f'Class condition risk: \n{conditional_risk[0]}')
print(f'\nConfusion matrix: \n{conditional_risk[1]}')