Skip to content

Commit

Permalink
test pipeline multiclass
Browse files Browse the repository at this point in the history
  • Loading branch information
joshuawe committed Dec 26, 2023
1 parent 1cc259d commit b5f6466
Show file tree
Hide file tree
Showing 4 changed files with 211 additions and 32 deletions.
191 changes: 163 additions & 28 deletions notebooks/pipeline.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion tests/test_multiclass_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# import plotsandgraphs.binary_classifier as binary
import plotsandgraphs.multiclass_classifier as multiclass

TEST_RESULTS_PATH = Path(r"tests\test_results")
TEST_RESULTS_PATH = Path(r"tests\test_results\multi_class_classifier")


# @pytest.fixture(scope="module")
Expand Down
14 changes: 12 additions & 2 deletions tests/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from plotsandgraphs import pipeline


from .utils import random_data_binary_classifier
from .utils import random_data_binary_classifier, random_data_multiclass_classifier

TEST_RESULTS_PATH = Path("tests/test_results/pipeline")

Expand All @@ -20,4 +20,14 @@ def test_binary_classification_pipeline(random_data_binary_classifier):
The simulated data.
"""
y_true, y_score = random_data_binary_classifier
pipeline.binary_classifier(y_true, y_score, save_fig_path=TEST_RESULTS_PATH / "pipeline.png")
pipeline.binary_classifier(y_true, y_score, save_fig_path=TEST_RESULTS_PATH/ "binary_classifier")


def test_multiclassification_pipeline():
"""
Test multiclassification pipeline.
"""
for num_classes in [3]:
save_fig_path = TEST_RESULTS_PATH / f"multiclass_{num_classes}_classes"
y_true, y_score = random_data_multiclass_classifier(num_classes=num_classes)
pipeline.multiclass_classifier(y_true, y_score, save_fig_path=save_fig_path)
36 changes: 35 additions & 1 deletion tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,38 @@ def random_data_binary_classifier() -> Tuple[np.ndarray, np.ndarray]:
y_score = np.zeros(y_true.shape) # a model's probability of class 1 predictions
y_score[y_true == 1] = np.random.beta(1, 0.6, y_score[y_true == 1].shape)
y_score[y_true == 0] = np.random.beta(0.5, 1, y_score[y_true == 0].shape)
return y_true, y_score
return y_true, y_score


def random_data_multiclass_classifier(num_classes:int = 3) -> Tuple[np.ndarray, np.ndarray]:
"""
Create random data for binary classifier tests.
Returns
-------
Tuple[np.ndarray, np.ndarray]
The simulated data. y_true_one_hot, y_pred
"""
class_labels = np.arange(num_classes)
class_probs = np.random.random(num_classes)
class_probs = class_probs / class_probs.sum() # normalize
# True labels
y_true = np.random.choice(class_labels, p=class_probs, size=1000)
# one hot encoding
y_true_one_hot = np.eye(num_classes)[y_true]

# Predicted labels
y_pred = np.ones(y_true_one_hot.shape)

# parameters for Beta distribution for each label (a0,b0 for class 0, a1,b1 for class 1)
a0, b0 = [0.1, 0.6, 0.3, 0.4, 2]*10, [0.4, 1.2, 0.8, 1, 5]*10
a1, b1 = [0.9, 0.8, 0.9, 1.2, 5]*10, [0.4, 0.1, 0.5, 0.3, 2]*10

# iterate through all the columns/labels and create a beta distribution for each label
for i in range(y_pred.shape[1]):
y = y_pred[:, i]
y_t = y_true_one_hot[:, i]
y[y_t==0] = np.random.beta(a0[i], b0[i], size=y[y_t==0].shape)
y[y_t==1] = np.random.beta(a1[i], b1[i], size=y[y_t==1].shape)

return y_true_one_hot, y_pred

0 comments on commit b5f6466

Please sign in to comment.