Skip to content

Commit

Permalink
Completed first draft
Browse files Browse the repository at this point in the history
  • Loading branch information
LucaCappelletti94 committed Jul 26, 2019
1 parent 55619a8 commit 63dd898
Show file tree
Hide file tree
Showing 12 changed files with 52 additions and 25 deletions.
1 change: 1 addition & 0 deletions .coverage
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
!coverage.py: This is a private format, don't read it directly!{"lines":{"/Users/lucacappelletti/github/miur_daad_balancing/tests/__init__.py":[1],"/Users/lucacappelletti/github/miur_daad_balancing/tests/test_balanced.py":[1,2,3,4,6,7,8,9,10,11,12,13,14],"/Users/lucacappelletti/github/miur_daad_balancing/miur_daad_balancing/__init__.py":[1,2,3,4,7],"/Users/lucacappelletti/github/miur_daad_balancing/miur_daad_balancing/utils/__init__.py":[1,2,3,6],"/Users/lucacappelletti/github/miur_daad_balancing/miur_daad_balancing/utils/load_balanced.py":[1,2,4,6],"/Users/lucacappelletti/github/miur_daad_balancing/miur_daad_balancing/utils/load_data.py":[1,2,3,5,6,7],"/Users/lucacappelletti/github/miur_daad_balancing/miur_daad_balancing/utils/current_path.py":[1,3,5,6],"/Users/lucacappelletti/github/miur_daad_balancing/miur_daad_balancing/utils/load_full_balanced.py":[1,2,4,6],"/Users/lucacappelletti/github/miur_daad_balancing/miur_daad_balancing/utils/get_classes.py":[1,2,4,6],"/Users/lucacappelletti/github/miur_daad_balancing/miur_daad_balancing/umbalanced.py":[1,2,4,6],"/Users/lucacappelletti/github/miur_daad_balancing/miur_daad_balancing/balanced.py":[1,2,3,4,6,8,9],"/Users/lucacappelletti/github/miur_daad_balancing/miur_daad_balancing/originals/__init__.py":[1,2,5],"/Users/lucacappelletti/github/miur_daad_balancing/miur_daad_balancing/originals/sampling_class_portion.py":[1,4,9,10,11,12,15,16,17,18,19,20,22,23,26,27,28,29,30,31,32,35,36,37,39],"/Users/lucacappelletti/github/miur_daad_balancing/miur_daad_balancing/originals/truncate_sample_size.py":[1,4,34,35,36,37,40,41,42,43,45,47,49,51,52,54,55,56,57,58,61,62,63,65],"/Users/lucacappelletti/github/miur_daad_balancing/miur_daad_balancing/full_balanced.py":[1,2,3,4,5,7,9,10,11],"/Users/lucacappelletti/github/miur_daad_balancing/tests/utils/__init__.py":[1,2,5],"/Users/lucacappelletti/github/miur_daad_balancing/tests/utils/sample_data.py":[1,2,3,4,6,8,9,10,11],"/Users/lucacappelletti/github/miur_daad_balancing/tests/utils/compare_tuples.py":[1,2,4,6,7],"/Users/lucacappelletti/github/miur_daad_balancing/tests/test_full_balanced.py":[1,2,3,4,6,7,8,9,10,11,12,13,14,15,16],"/Users/lucacappelletti/github/miur_daad_balancing/tests/test_import.py":[1,3,4],"/Users/lucacappelletti/github/miur_daad_balancing/tests/test_umbalanced.py":[1,2,3,5,6,7,8,9],"/Users/lucacappelletti/github/miur_daad_balancing/tests/test_version.py":[1,2,4,5],"/Users/lucacappelletti/github/miur_daad_balancing/miur_daad_balancing/__version__.py":[1,2]}}
1 change: 1 addition & 0 deletions .coverage.MBP-di-Luca.8199.029523
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
!coverage.py: This is a private format, don't read it directly!{"lines":{}}
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ Testing dataset balancing techniques from previous works.
:alt: Pypi project

.. |downloads| image:: https://pepy.tech/badge/miur_daad_balancing
:target: https://pepy.tech/badge/miur_daad_balancing
:target: https://pepy.tech/badge/miur-daad-balancing
:alt: Pypi total project downloads

.. |codacy| image:: https://api.codacy.com/project/badge/Grade/b4a7c72f058b433597426fa696d71539
Expand Down
4 changes: 3 additions & 1 deletion miur_daad_balancing/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from .utils import get_classes, load_balanced, load_full_balanced
from .umbalanced import umbalanced
from .balanced import balanced
from .full_balanced import full_balanced

__all__ = [
"load_balanced", "load_full_balanced", "get_classes", "umbalanced"
"load_balanced", "load_full_balanced", "get_classes", "umbalanced", "balanced", "full_balanced"
]
7 changes: 4 additions & 3 deletions miur_daad_balancing/balanced.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from typing import Tuple
from .utils import load_balanced
from .originals import truncate_sample_size
import numpy as np

def balanced(training:np.ndarray, testing:np.ndarray)->Tuple[np.ndarray, np.ndarray]:
"""Return balanced training data to the given maximum, leaving testing untouched."""
X_train, y_train = training
unique, counts = np.unique(y_train, return_counts=True)

X_train, y_train, _, _ = truncate_sample_size(*training, max_size_given=load_balanced()["max"])
return (X_train, y_train), testing
11 changes: 11 additions & 0 deletions miur_daad_balancing/full_balanced.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from typing import Tuple
from .utils import load_full_balanced
from .originals import sampling_class_portion
from .balanced import balanced
import numpy as np

def full_balanced(training:np.ndarray, testing:np.ndarray)->Tuple[np.ndarray, np.ndarray]:
"""Return full_balanced training data to the given maximum and testing data with updated proportions."""
training, _ = balanced(training, testing)
X_test, y_test, _, _ = sampling_class_portion(*testing, class_portion=load_full_balanced())
return training, (X_test, y_test)
6 changes: 6 additions & 0 deletions miur_daad_balancing/originals/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from .sampling_class_portion import sampling_class_portion
from .truncate_sample_size import truncate_sample_size

__all__ = [
"sampling_class_portion", "truncate_sample_size"
]
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import numpy as np


def sampling_class_portion(data, classes, others=None, class_portion=None, rng=np.random.RandomState(100)):
def sampling_class_portion(data, classes, others=None, class_portion=None):
"""
Sampling data points in each class to keep a given portion among classes.
class_portion: dict, the portion for each class, each value should be at least 1, e.g. class_portion={"class0":5,"class1":1,"class3":2}
Expand All @@ -27,7 +27,7 @@ def sampling_class_portion(data, classes, others=None, class_portion=None, rng=n
ind_this_num = indices_range[indices == i]
replacetf = True if sample_sizes[u[i]] < (
size_min*class_portion[u[i]]) else False
ind_this_reduced = ind_this_num[rng.choice(
ind_this_reduced = ind_this_num[np.random.choice(
sample_sizes[u[i]], size=size_min*class_portion[u[i]], replace=replacetf)]
indices_all = np.append(indices_all, ind_this_reduced)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import numpy as np


def truncate_sample_size(data, classes, others=None, max_size_given=None, rng=np.random.RandomState(100)):
def truncate_sample_size(data, classes, others=None, max_size_given=None):
"""
Balance sample size of a data set among classes.
Expand Down Expand Up @@ -42,7 +42,6 @@ def truncate_sample_size(data, classes, others=None, max_size_given=None, rng=np
sample_sizes.append(sample_size_this)
sample_sizes = np.array(sample_sizes, dtype=int)

size_min = np.amin(sample_sizes) # smallest sample size
size_max = np.amax(sample_sizes) # largest sample size

if size_max < max_size_given:
Expand All @@ -54,7 +53,7 @@ def truncate_sample_size(data, classes, others=None, max_size_given=None, rng=np

for i in range(num_u):
ind_this_num = indices_range[indices == i]
ind_this_reduced = ind_this_num[rng.choice(
ind_this_reduced = ind_this_num[np.random.choice(
len(ind_this_num), size=sample_sizes[i], replace=False)]
indices_all = np.append(indices_all, ind_this_reduced)

Expand Down
14 changes: 9 additions & 5 deletions tests/test_balanced.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
from miur_daad_balancing import balanced, load_balanced
import numpy as np
from .utils import sample_data, compare_tuples, truncate_sample_size
from miur_daad_balancing.originals import truncate_sample_size, sampling_class_portion
from .utils import sample_data, compare_tuples

def test_umbalanced():
def test_balanced():
training, testing = sample_data()
np.random.seed(42)
X_train, y_train, _, _ = truncate_sample_size(*training, max_size_given=load_balanced()["max"])
balanced_training = (X_train, y_train)
#assert compare_tuples(training, balanced_training)
#assert compare_tuples(testing, balanced_testing)
original_balanced_training = (X_train, y_train)
np.random.seed(42)
balanced_training, balanced_testing = balanced(training, testing)
assert compare_tuples(original_balanced_training, balanced_training)
assert compare_tuples(testing, balanced_testing)
18 changes: 11 additions & 7 deletions tests/test_full_balanced.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
from miur_daad_balancing import full_balanced, load_full_balanced, load_balanced
from miur_daad_balancing import full_balanced, load_balanced, load_full_balanced
import numpy as np
from .utils import sample_data, compare_tuples, sampling_class_portion, truncate_sample_size
from miur_daad_balancing.originals import truncate_sample_size, sampling_class_portion
from .utils import sample_data, compare_tuples

def test_umbalanced():
def test_full_balanced():
training, testing = sample_data()
np.random.seed(42)
X_train, y_train, _, _ = truncate_sample_size(*training, max_size_given=load_balanced()["max"])
balanced_training = (X_train, y_train)
original_balanced_training = (X_train, y_train)
np.random.seed(42)
X_test, y_test, _, _ = sampling_class_portion(*testing, class_portion=load_full_balanced())
balanced_testing = (X_test, y_test)
#assert compare_tuples(training, balanced_training)
#assert compare_tuples(testing, balanced_testing)
original_balanced_testing = (X_test, y_test)
np.random.seed(42)
balanced_training, balanced_testing = full_balanced(training, testing)
assert compare_tuples(original_balanced_training, balanced_training)
4 changes: 1 addition & 3 deletions tests/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
from .sampling_class_portion import sampling_class_portion
from .truncate_sample_size import truncate_sample_size
from .sample_data import sample_data
from .compare_tuples import compare_tuples

__all__ = [
"sampling_class_portion", "truncate_sample_size", "compare_tuples"
"compare_tuples", "sample_data"
]

0 comments on commit 63dd898

Please sign in to comment.