# Simple numpy data generator

## Requried functionality

In [29]:
import numpy as np

def generate_data_samples(
    x_mean: list[float],
    x_cov: list[list[float]],
    target: float,
    n: int = 1000,
) -> tuple[np.ndarray, np.ndarray]:
    """This function generates n-dimensional normal distributed input features and
    a 1-dimensional normal distributed target values.
    This problem are suited for a dummy classification problem. 

    Args:
        x_mean (list[float]): The mean values of each feature.
        x_cov (list[list[float]]): A list of lists which maps the covariance matrix.
        target_mean (float): Target value for the given syntethic data.
        n (int, optional): Number of samples to create. Defaults to 1000.

    Raises:
        ValueError: Raises three different ValueErrors if input arguments diviates 
        from expected.

    Returns:
        tuple[np.ndarray, np.ndarray]: x features, y targets
    """

    if len(x_mean) != len(x_cov):
        raise ValueError(
            "Dimensions of input features and covariance matrix does not match!"
        )

    if any([len(x_mean) != len(i) for i in x_cov]):
        raise ValueError("Dimensions of diagonal covariance elements does not match")

    if n < 1:
        raise ValueError("We need to generate one or more elements")

    # draw random normal dist. feature values.
    x = np.asarray(
        [np.random.multivariate_normal(mean=x_mean, cov=x_cov) for _ in range(n)]
    )
    # draw random normal dist. target values.
    y = np.asarray([target for _ in range(n)])

    return x, y


def concat_data(
    xs: tuple[np.ndarray],
    ys: tuple[np.ndarray],
    shuffle: bool = True,
) -> tuple[np.ndarray, np.ndarray]:
    """Function to concatenate multiple samples into a single set of sample pairs. 

    Args:
        xs (tuple[np.ndarray]): Tuple of feature samples.
        ys (tuple[np.ndarray]): Tuple of target samples.
        shuffle (bool, optional): shuffle the sample pairs. Defaults to True.

    Raises:
        ValueError: Is rasied if number of sample features and sample targets 
        does not match.

    Returns:
        tuple[np.ndarray, np.ndarray]: x features, y targets
    """
    # check
    if any([len(x) != len(y) for x, y in zip(xs, ys)]):
        raise ValueError("Number of samples in x and y does not match!")
    
    x = np.concatenate(xs, axis=0)
    y = np.concatenate(ys, axis=0)
    
    # shuffle if needed
    if shuffle:
        arr = [i for i in range(len(x))]
        np.random.shuffle(arr)
        x = x[arr]
        y = y[arr]
    return x, y


## Generate data samples

In [30]:
n = 100

x_1, y_1 = generate_data_samples(
    x_mean=[2, 2, 2],
    x_cov=[[1, 0, 0], [0, 1, 0], [0, 0, 1]],
    target = 1,
    n=n,
)


x_2, y_2 = generate_data_samples(
    x_mean=[-2, -2, -2],
    x_cov=[[1, 0, 0], [0, 1, 0], [0, 0, 1]],
    target=0,
    n=n,
)

x, y = concat_data(xs = (x_1, x_2), ys = (y_1, y_2), shuffle=True)

x.shape, y.shape


((200, 3), (200,))

In [31]:
x
        

array([[-2.82774834e+00, -2.54396394e+00, -7.67917024e-01],
       [-1.89820983e+00, -1.79177957e+00, -6.24792557e-01],
       [ 2.82753296e+00,  1.44412055e+00,  2.50576115e+00],
       [ 3.17773223e+00,  2.46677538e+00,  1.00269178e+00],
       [ 1.02190744e+00,  1.69853182e+00,  1.65623833e+00],
       [-1.09204320e+00, -1.80734611e+00, -1.23954228e+00],
       [-1.73818528e+00, -3.71389321e-01, -2.07030083e+00],
       [ 1.12879496e+00,  2.27651935e+00,  2.67216882e+00],
       [-3.36592115e+00, -1.74622693e+00, -5.99067236e-01],
       [-9.66918059e-01, -2.05115891e+00, -1.71374728e+00],
       [-1.70808057e+00, -3.59113198e+00, -1.77593486e+00],
       [-1.29442748e+00, -1.97276770e+00, -4.05132869e+00],
       [-2.37014142e+00, -1.31861483e+00, -9.20069582e-01],
       [ 1.51778501e+00,  2.43907495e+00,  3.42998767e+00],
       [ 1.66544081e+00,  9.88514058e-01,  3.26756046e+00],
       [ 1.39724840e+00,  2.88937483e+00,  1.16231649e+00],
       [-3.47071194e+00, -2.22422238e+00

In [32]:
y

array([0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1,
       1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1,
       0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0,
       0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0,
       1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1,
       1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1,
       0, 0])