# Dummy boolean data

Regular Python code located in ...

... using these data:

Imports.

In [1]:
# EpyNN/nnlive/dummy_boolean/prepare_dataset.ipynb
# Standard library imports
import random

# Local application/library specific imports
from settings import dataset as se_dataset

Set `random` seed for reproducibility.

In [2]:
random.seed(1)

Dummy string features.

In [3]:
def features_boolean(N_FEATURES=11):
    """Generate dummy string features.

    :param N_FEATURES: Number of features
    :type N_FEATURES: int

    :return: random boolean features of length N_FEATURES
    :rtype: list[bool]
    """
    # Random choice True or False for N_FEATURES iterations
    features = [random.choice([True, False]) for j in range(N_FEATURES)]

    return features

Check the function.

In [4]:
features = features_boolean()
print(features)

[True, True, False, True, False, False, False, False, True, True, False]


Label from features.

In [5]:
def label_features(features):
    """Prepare label associated with features.

    :param features: random boolean features of length N_FEATURES
    :type features: list[bool]
    
    :return: One-hot encoded label
    :rtype: list[int]
    """
    # One-hot encoded positive and negative labels
    p_label = [1, 0]
    n_label = [0, 1]

    # Test if features contains more True (+)
    if features.count(True) > features.count(False):
        label = p_label

    # Test if features contains more False (-)
    elif features.count(True) < features.count(False):
        label = n_label
            
    return label

Check the function.

In [6]:
label = label_features(features)
print(label)

[0, 1]


Prepare labeled dataset.

In [7]:
def labeled_dataset(se_dataset):
    """Prepare a dummy dataset of labeled samples.
    
    One sample is a list such as [features, label].
    
    For one sample, features is a list and label is a list.

    :param se_dataset: Settings for dataset preparation
    :type se_dataset: dict

    :return: A dataset of length N_SAMPLES
    :rtype: list[list[list[bool],list[int]]]
    """
    # See ./settings.py
    N_SAMPLES = se_dataset['N_SAMPLES']

    # Initialize dataset
    dataset = []

    # Iterate over N_SAMPLES
    for i in range(N_SAMPLES):

        # Compute random boolean features
        features = features_boolean()

        # Retrieve label associated with features
        label = label_features(features)
        
        # Define labeled sample
        sample = [features, label]

        # Append sample to dataset
        dataset.append(sample)
        
    # Shuffle dataset
    random.shuffle(dataset)

    return dataset

Check the function.

In [8]:
se_dataset['N_SAMPLES'] = 10

for sample in labeled_dataset(se_dataset):
    features, label = sample
    print(features, label)

[True, False, False, True, False, False, True, True, False, True, True] [1, 0]
[True, True, False, True, False, True, True, False, False, True, False] [1, 0]
[False, True, False, False, False, False, False, True, False, False, True] [0, 1]
[False, True, False, False, False, False, True, False, True, False, False] [0, 1]
[False, True, False, True, True, False, False, True, True, True, False] [1, 0]
[False, False, False, False, True, False, True, True, False, True, False] [0, 1]
[False, True, False, False, True, True, True, True, True, True, False] [1, 0]
[True, False, False, True, False, True, True, False, False, False, True] [0, 1]
[True, True, False, False, True, False, True, True, False, True, False] [1, 0]
[True, True, True, True, False, True, True, True, True, False, True] [1, 0]


Prepare unlabeled dataset.

In [9]:
def unlabeled_dataset(N_SAMPLES=1):
    """Prepare a dummy dataset of unlabeled samples.
    
    One sample is a list such as [features, []].

    For one sample, features is a list and label is an empty list.

    :param N_SAMPLES: Length for unlabeled dataset
    :type N_SAMPLES: int

    :return: A dataset of length N_SAMPLES
    :rtype: list[list[list[bool],list]]
    """
    # Initialize unlabeled_dataset
    unlabeled_dataset = []

    # Iterate over N_SAMPLES
    for i in range(N_SAMPLES):

        # Generate dummy boolean features
        features = features_boolean()

        # Define unlabeled sample
        sample = [features, []]

        # Append to unlabeled_dataset
        unlabeled_dataset.append(sample)

    return unlabeled_dataset

Check the function.

In [10]:
for sample in unlabeled_dataset(N_SAMPLES=5):
    features, label = sample
    print(features, label)

[False, False, True, True, False, False, False, False, True, False, True] []
[False, True, False, True, True, True, False, True, True, True, False] []
[False, True, False, True, True, False, False, False, True, False, True] []
[True, True, False, True, True, False, False, True, False, False, True] []
[True, True, True, False, True, True, False, True, False, True, True] []


Done.