In [17]:
import numpy as np
import pandas as pd

a_seed: int|None = None
rng = np.random.default_rng(seed=a_seed)  # random number generator

In [25]:
def sim_norm(rng, size:int = 5) -> pd.DataFrame:
    col1 = rng.poisson(lam=5, size=size)
    col2 = rng.random(size=5)
    col3 = rng.choice(a=["a", "b", "c", "d", "e"], size=5, replace=True)
    col4 = rng.choice(a=["blue", "red", "yellow"], size=5, replace=True)
    col5 = rng.normal(loc=0.0, scale=1.0, size=5)
    df = pd.DataFrame(
        {
            "col1": col1,
            "col2": col2,
            "col3": col3,
            "col4": col4,
            "col5": col5
        }
    )
    return df
    

In [27]:
df1 = sim_norm(rng)
df1

Unnamed: 0,col1,col2,col3,col4,col5
0,4,0.264006,c,blue,0.144654
1,4,0.799304,c,blue,-0.070293
2,10,0.78507,d,yellow,-0.292508
3,1,0.975324,d,yellow,-1.002062
4,2,0.962285,a,red,-0.55346


In [20]:
df1.iloc[0]["col4"]

'yellow'

In [21]:
def sim_mvn(rng, size: int = 5) -> pd.DataFrame:
    y = rng.normal(loc=10, scale=2.0, size=size)
    m = np.array([1, 2, 4])  # vector of mean
    a = np.array([0.75, 0.85, 0.95])  # vector of correlations
    # create a symmetric matrix size * size
    # matrix created using multiplication of inverse of a vector is always
    # symmetric and positive definite
    c = np.dot(a[:, None], a[None, :])
    x = rng.multivariate_normal(mean=m, cov=c, size=size)
    # bind the normal (y) and multivariate normal (x) together
    d = np.concatenate((y[:, None], x), axis=1)
    df = pd.DataFrame(data=d, columns=["y", "x1", "x2", "x3"])
    return df

In [22]:
df2 = sim_mvn(rng)
df2

Unnamed: 0,y,x1,x2,x3
0,11.030851,0.908081,1.895825,3.883569
1,10.55355,-1.004513,-0.271781,1.460951
2,8.036172,0.552324,1.492634,3.432943
3,6.896706,2.228606,3.39242,5.556234
4,5.658134,1.248517,2.281653,4.314788


In [23]:
def get_fn(name: str, *suffix, ext: str = ".xlsx", sep: str = "_"):
    fn = sep.join([name, *suffix]) + ext
    return fn

In [24]:
out = get_fn("base", "lg", "ytd", ext=".tmp")
# out = get_fn("base", "lg", "ytd")
# out = get_fn("base", "lg")
# out = get_fn("base", ext = ".tmp")
# out = get_fn("base")
out

'base_lg_ytd.tmp'