# Data Generation

> This module generates different types of data

In [None]:
#|default_exp data_gen

In [None]:
#| export
import numpy as np

In [None]:
#| export
def line(x:np.ndarray,
         a=1.0,#Slope
         b=0.5,#Intercept
         interval=[-10.,10.],#Interval for x.
         noise=[0,1E-5],# Noise [$\mu$,$\sigma$] with mean $\mu$ and standard deviation $\sigma$
         nsamples=100# Number of samples
        )-> np.ndarray: # the array $y=ax+b$
    '''Create a dataset of nsamples in the interval following the linear regression $y=a x+b$.'''
    return a*x+b

In [None]:
#| export
def noisy_line(a=1.0,#Slope
               b=0.5,#Intercept
               interval=[-10.,10.],#Interval for x.
               noise=[0,1E-5],# Noise [$\mu$,$\sigma$] with mean $\mu$ and standard deviation $\sigma$
               nsamples=100# Number of samples
              ):
    '''
    Create a dataset of nsamples in the interval following the linear regression $y=a x+b$ and adds a gaussian noise on y.
    
    Returns
    -------
    tuple
        - a random x vector in the interval of size nsamples
        - the noisy vector following $y= ax+b$
    '''
    x = np.random.uniform(low=interval[0], high=interval[1], size=nsamples)
    mu, sigma = noise
    vnoise = np.random.normal(loc=mu, scale=sigma, size=nsamples)
    return x, a*x+b+vnoise

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()

In [None]:
nbdev.nbdev_export??