# 11_data_generate

In this notebook, we will generate dataset including different levels of data with gaussian from 1-4 and noise

In [None]:
#all_no_test
#def_exp data_generate

### package prepare

In [None]:
import deerlab as dl
import numpy as np
import random
import matplotlib.pyplot as plt
import pandas as pd
from scipy.stats import skewnorm, vonmises, ttest_rel


### function prepare

In [None]:
def generate_data_dl( n_duplicate_params: int = 1,
    n_points: int = 192,
    n_dist_pts: int = 128,
    n_gauss_lo : int = 1,
    n_gauss_hi : int = 3,
    dist_mean_lo : float = 15.,
    dist_mean_hi : float = 90.,
    width_lo : float = 0.05,
    width_hi : float = 0.5,
    skew_lo : float = -3.,
    skew_hi : float = 3.,
    rmax_lo : float = 50.,
    rmax_hi : float = 160.,
    tmin_lo = -0.5,
    tmin_hi = 0.,
    tmax_lo : float = 2.,
    tmax_hi : float = 15.,
    dim_lo: float = 2.,
    dim_hi: float = 3.5,
    frac_3d: float = 0.5,
    depth1_lo : float = 0.05,
    depth1_hi : float = 0.6,
    depth2_lo : float = 0.0,
    depth2_hi : float = 0.2,
    depth3_lo : float = 0.0,
    depth3_hi : float = 0.2,
    depth4_lo : float = 0.0,
    depth4_hi : float = 0.01,
    t4_min: float = 0.016,
    t4_max: float = 0.096,
    noise_lo : float = 0.0,
    noise_hi : float = 0.1,
    bckg_lo : float = -6.,
    bckg_hi : float = -0.5,
    vm_kappa: float = 180/np.pi,
    use_pathways: bool = True ):
    '''
    This function can generate simulated signal data based on Pr
    It includes 4 kinds of outputting
    distrs is the pr related with the simulated signal
    decays is the simulated signal and position
    r_list is generated distance
    t_list is the generated time
    
    '''
  
    # decays include the siginal data we simulate
    # distrs is the pr we randomly generate
    # t_list is the time we generate
    # r_list is the distance we generate
    decays = np.zeros( ( n_points, 3 ) )

    # Start counter

        
    # generate random time (t) and distance(r)
    tmin = np.random.uniform( low=tmin_lo, high=tmin_hi )
    tmax = np.random.uniform( low=tmax_lo, high=tmax_hi ) + tmin
    rmax = np.cbrt( tmax * 5.2e4 )

    maxdist = np.random.uniform( rmax_lo, rmax_hi )
        
    r = np.linspace( 15., maxdist, num=n_dist_pts )
    t = np.linspace( tmin, tmax, num=n_points )
        
    # Check whether use pathways
    if use_pathways:
        pathways = [ [ 1., 0. ] ]
        pathways.extend( [ 
        [ np.random.uniform( depth2_lo, depth2_hi ), tmax ],
        [ np.random.uniform( depth3_lo, depth3_hi ), tmax - np.random.uniform( t4_min, t4_max ) ],
        [ np.random.uniform( depth4_lo, depth4_hi ), tmin ] ]
        )

        K = dl.dipolarkernel( t, r / 10., pathways=pathways )

    else:
        K = dl.dipolarkernel( t, r / 10. )
            
            
        # 
    for _ in range( n_duplicate_params ):
        y = n_gauss_hi

        # set pr
        # the length of amps is the number of guassian in pr
        # the value in amps is the weight of different guassian distribution 
        pr = np.zeros( ( n_dist_pts ) )
        amps = np.random.uniform( size =( np.random.randint( n_gauss_lo, n_gauss_hi + 1 ) ) )
        amps /= amps.sum()
        for amp in amps:
            # add different weight pr into final result
            meandist = np.random.uniform( dist_mean_lo, min( rmax, dist_mean_hi, maxdist ) )
            stdev = np.random.uniform( width_lo, width_hi ) * ( meandist / 2 )
            skew = np.random.uniform( skew_lo, skew_hi )
            pr += amp * skewnorm.pdf( r, skew, meandist, stdev )
        # normalize pr
        pr /= pr.sum()
        distrs = pr
            

        # Set up remaining variables
        depth1 = np.random.uniform( depth1_lo, depth1_hi )
        noise = depth1 * np.random.uniform( noise_lo, noise_hi )
        # generate random backgraound
        bckg = 10 ** np.random.uniform( bckg_lo, bckg_hi )
        dim = 3.
        if np.random.uniform( 0., 1. ) > frac_3d:
            dim = np.random.uniform( dim_lo, dim_hi )

        # set V(t)
        O = K@pr
        O /= O.max()
        V = 1. - depth1 * ( 1. - O)
        V *= np.exp( -1 * ( bckg * np.abs( t ) ) ** ( dim / 3. ) )
        # normalize V
        V /= V.max()

        decays[ :, 0 ] = V
        decays[:, 1 ] = ( t / 10. ) - 1. #np.cos( div_term )
        decays[:, 2 ] = ( ( ( t - t.min() ) / 10 ) - 1 )[ ::-1 ] #np.cos( div_term[ ::-1 ] )
        t_list = t
        r_list = r

    return distrs, decays, t_list, r_list
 
    

In [None]:
distrs, decays, t_list, r_list = generate_data_dl()

array([[ 0.99964324, -1.01030391, -0.28076313],
       [ 0.99992205, -1.00653827, -0.28452876],
       [ 1.        , -1.00277263, -0.2882944 ],
       [ 0.99984112, -0.99900699, -0.29206004],
       [ 0.99938169, -0.99524135, -0.29582568],
       [ 0.9987218 , -0.99147572, -0.29959132],
       [ 0.99786243, -0.98771008, -0.30335695],
       [ 0.99680499, -0.98394444, -0.30712259],
       [ 0.99555127, -0.9801788 , -0.31088823],
       [ 0.99410349, -0.97641316, -0.31465387],
       [ 0.99246425, -0.97264752, -0.31841951],
       [ 0.99063652, -0.96888189, -0.32218514],
       [ 0.98862369, -0.96511625, -0.32595078],
       [ 0.98642948, -0.96135061, -0.32971642],
       [ 0.98405798, -0.95758497, -0.33348206],
       [ 0.98151362, -0.95381933, -0.3372477 ],
       [ 0.97880118, -0.9500537 , -0.34101333],
       [ 0.97592573, -0.94628806, -0.34477897],
       [ 0.97289266, -0.94252242, -0.34854461],
       [ 0.96970762, -0.93875678, -0.35231025],
       [ 0.96637655, -0.93499114, -0.356