# Generating Data w/ Numpy

In [58]:
import numpy as np

### np.empty(), np.zeros(), np.ones(), np.full()

In [59]:
array_empty = np.empty(shape = (2,3))
array_empty

array([[0., 0., 0.],
       [0., 0., 0.]])

In [60]:
# zeros
array_0s = np.zeros(shape  = (2,3))
array_0s

array([[0., 0., 0.],
       [0., 0., 0.]])

In [61]:
array_0s = np.zeros(shape = (2,3), dtype = np.int8) 
array_0s

array([[0, 0, 0],
       [0, 0, 0]], dtype=int8)

In [16]:
# ones
array_1s = np.ones(shape  = (2,3))
array_1s

array([[1., 1., 1.],
       [1., 1., 1.]])

In [17]:
# full
array_full = np.full(shape = (2,3), fill_value = 2) # One additional mandatory argument - fill_value -> scalar
array_full

array([[2, 2, 2],
       [2, 2, 2]])

In [20]:
array_full = np.full(shape = (2,3), fill_value = 'Three-Six-Five')
array_full

array([['Three-Six-Five', 'Three-Six-Five', 'Three-Six-Five'],
       ['Three-Six-Five', 'Three-Six-Five', 'Three-Six-Five']],
      dtype='<U14')

### "_like" functions

In [75]:
matrix_A = np.array([[1,0,9,2,2],[3,23,4,5,1],[0,2,3,4,1]])
matrix_A

array([[ 1,  0,  9,  2,  2],
       [ 3, 23,  4,  5,  1],
       [ 0,  2,  3,  4,  1]])

In [76]:
array_empty_like = np.empty_like(matrix_A)    

# Shape and type are like the prototype. 
# If we want to override this, we can define dtype and shape and pass different values (but why even use empty_like then). 

array_empty_like

array([[      0,       0,       0,       0,       0],
       [      0,       0,       0,       0,       0],
       [   1780,       0,       0, 7274595,       0]])

In [77]:
array_0s_like = np.zeros_like(matrix_A)    
array_0s_like

# We have corresponding functions for 1s and full as well. 

array([[0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0]])

In [78]:
array_1s_like = np.ones_like(matrix_A)    
array_1s_like

array([[1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1]])

In [79]:
array_full_like = np.full_like(matrix_A,-3)    
array_full_like

array([[-3, -3, -3, -3, -3],
       [-3, -3, -3, -3, -3],
       [-3, -3, -3, -3, -3]])

### np.arange()

In [11]:
#range(30)
list(range(30))

# range(30) results in a range object.
# list(range(30)) creates a list with all the values in this range.

[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29]

In [31]:
array_rng = np.arange(30)
array_rng

## Creates an ndarray with the values in this range.

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29])

In [32]:
# array_rng = np.arange(stop =  30)
array_rng = np.arange(start =  30)
array_rng

# The only mandatory argument is "start", rather than stop. 
# If we specify only a start, the function assumes this is the "stop" and starts from the origin (0).

TypeError: arange() requires stop to be specified.

In [33]:
array_rng = np.arange(stop =  30)
# array_rng = np.arange(start =  30)
array_rng

# The only mandatory argument is "start", rather than stop. 
# If we specify only a start, the function assumes this is the "stop" and starts from the origin (0).

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29])

In [14]:
array_rng = np.arange(start = 0, stop =  30)
array_rng

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29])

In [34]:
array_rng = np.arange(start = 0, stop =  30, step = 2.5)
array_rng

# "Step" doesn't have to be the same type as the values of the array. 

array([ 0. ,  2.5,  5. ,  7.5, 10. , 12.5, 15. , 17.5, 20. , 22.5, 25. ,
       27.5])

In [36]:
array_rng = np.arange(start = 0, stop =  30, step = 2.5, dtype = np.float32)
array_rng

# The casting happens after all the computations. 

array([ 0. ,  2.5,  5. ,  7.5, 10. , 12.5, 15. , 17.5, 20. , 22.5, 25. ,
       27.5], dtype=float32)

In [37]:
array_rng = np.arange(start = 0, stop =  30, step = 2.5, dtype = np.int32)  #30/2.5 = 12, so only 12 values are shown here
array_rng

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18, 20, 22])

## Random Generators

### Defining Random Generators

In [1]:
from numpy.random import Generator as gen
from numpy.random import PCG64 as pcg


## We load two functions from the numpy.random module.

In [20]:
array_RG = gen(pcg())

#array_RG.normal()
#array_RG.normal(size = 5)
array_RG.normal(size = (5,5))

# RG is short for Random Generator.

array([[-0.44837358,  0.13235273, -3.01700802,  0.02548304, -0.03416517],
       [-0.89538964, -0.9848236 , -0.38842361,  0.14798483,  0.78223937],
       [ 0.95167509,  0.30954151,  0.77870455, -1.49360528, -0.03424709],
       [-1.18936797, -0.02851544,  0.36827536,  0.28156382, -0.82648709],
       [ 0.54017463,  0.07230061, -0.19643834, -1.57415588,  0.55951386]])

In [3]:
array_RG = gen(pcg())
#array_RG.normal()
array_RG.normal(size = 5)

array([ 0.0792718 , -1.25628625,  1.47027622, -1.53281616,  1.04857966])

In [6]:
array_RG = gen(pcg())
array_RG.normal()
# array_RG.normal(size = 5)

0.9503219830290778

We can set a seed, so that our random values don't change everytime we re-run the code. We'll se the seed equal to 365.

In [18]:
array_RG = gen(pcg(seed = 385)) 
array_RG.normal(size = (5,5))

# Re-running this cell provides a consistent output, since the seed (with fixed starting values) is set. 

array([[ 0.58757155,  0.67786918,  0.32088577,  1.2639898 , -0.79476853],
       [-0.44132012,  0.15732474,  0.28671335,  1.18055171,  0.7857267 ],
       [-0.46498145, -0.57545262, -1.13858152,  0.03915425,  1.12793175],
       [-0.4722205 , -1.23289873, -0.20924805,  0.6882582 , -1.0789966 ],
       [ 0.53947602, -1.12568141,  0.14924496, -0.18714257,  1.32557172]])

In [19]:
array_RG.normal(size = (5,5))

# The seed is fixed for a single itteration. 

array([[ 0.09333917, -0.153469  ,  0.5017806 , -1.12493557,  1.13860241],
       [ 0.70079143,  0.0157305 ,  1.15557812,  0.74234318,  0.41304978],
       [-2.00492366, -1.34015399,  0.47794448,  1.07326768,  0.17614768],
       [ 0.22789123, -1.17715856,  0.68212043,  0.79068907,  3.34631185],
       [-0.48694875, -0.22277874, -0.34737805,  0.0789023 ,  0.20014986]])

### Generating Integers, Probabilities and Random Choices

In [22]:
array_RG = gen(pcg(seed = 365)) 
array_RG.integers(10, size = (5,5))

# Generates integers within a range.

array([[0, 7, 6, 7, 8],
       [6, 6, 2, 0, 6],
       [3, 0, 3, 7, 9],
       [1, 1, 8, 7, 4],
       [4, 8, 6, 4, 9]], dtype=int64)

In [23]:
array_RG = gen(pcg(seed = 365)) 
array_RG.integers(low = 10, high = 100, size = (5,5))

# Generates integers within a range.

array([[18, 78, 64, 78, 84],
       [66, 67, 28, 10, 69],
       [45, 15, 37, 74, 96],
       [19, 21, 89, 73, 54],
       [53, 84, 66, 51, 92]], dtype=int64)

In [22]:
array_RG = gen(pcg(seed = 365)) 
array_RG.random(size = (5,5))

array([[0.75915734, 0.7662218 , 0.6291028 , 0.20336599, 0.66501486],
       [0.06559111, 0.71326309, 0.10812106, 0.87969046, 0.49405844],
       [0.82472673, 0.45652944, 0.07367232, 0.69628564, 0.36690736],
       [0.29787156, 0.4996155 , 0.4865245 , 0.62740703, 0.54952637],
       [0.64894629, 0.04411757, 0.7206516 , 0.84594003, 0.17159792]])

In [41]:
array_RG = gen(pcg(seed = 365)) 
array_RG.choice([1,2,3,4,5], size = (5,5))

# Chooses among a given set (with possible weighted probabilities).

array([[1, 4, 4, 4, 5],
       [4, 4, 2, 1, 4],
       [2, 1, 2, 4, 5],
       [1, 1, 5, 4, 3],
       [3, 5, 4, 3, 5]])

In [88]:
m = [12,3,4,10,-5,9,19,21,100,89]

array_RG = gen(pcg(seed = 64)) 
array_RG.choice(m, p = [0.1,0.1,0.1,0.1,0.4,.4,.2,.2,.2], size = (3,3))

# Chooses among a given set (with possible weighted probabilities).

ValueError: a and p must have same size

In [23]:
#array_RG.choice(matrix_A[0], size = (5,5))
array_RG = gen(pcg(seed = 365)) 
#array_RG.choice([1,2,3,4,5], size = (5,5))
array_RG.choice((1,2,3,4,5), p = [0.1,0.1,0.1,0.1,0.6],size = (5,5))

# Chooses among a given set (with possible weighted probabilities).

array([[5, 5, 5, 3, 5],
       [1, 5, 2, 5, 5],
       [5, 5, 1, 5, 4],
       [3, 5, 5, 5, 5],
       [5, 1, 5, 5, 2]])

### Generating Arrays From Known Distributions

In [53]:
array_RG = gen(pcg(seed = 365)) 
array_RG.poisson(size = (5,5))

# The default Poisson distribution.

array([[2, 0, 1, 1, 2],
       [1, 1, 0, 1, 1],
       [1, 2, 1, 1, 0],
       [0, 1, 0, 2, 1],
       [0, 1, 0, 0, 2]], dtype=int64)

In [25]:
array_RG = gen(pcg(seed = 365)) 
array_RG.poisson(lam = 10,size = (5,5))

# Specifying lambda. 

array([[11, 12, 12, 14, 13],
       [ 9, 10, 11, 11,  8],
       [11,  8, 10,  9, 14],
       [ 7,  8,  9, 15, 15],
       [13,  8,  8,  7,  9]], dtype=int64)

In [54]:
array_RG = gen(pcg(seed = 365)) 
array_RG.binomial(n = 100, p = 0.7, size = (5,5))

# A binomial distribution with p = 0.7 and 100 trials. 
# average is 70% 

array([[68, 67, 66, 79, 73],
       [76, 68, 72, 68, 68],
       [69, 73, 74, 64, 69],
       [68, 64, 62, 75, 63],
       [64, 65, 65, 70, 76]], dtype=int64)

In [55]:
array_RG = gen(pcg(seed = 365)) 
array_RG.logistic(loc = 9, scale = 1.2, size = (5,5))

# A logistic distrib ution with a location = 9 and scale = 1.2.

array([[10.37767822, 10.42451863,  9.63404367,  7.36153427,  9.82286787],
       [ 5.81223125, 10.09354231,  6.46790532, 11.38740256,  8.97147918],
       [10.85844698,  8.79081317,  5.962079  ,  9.99560681,  8.34539118],
       [ 7.97105522,  8.9981544 ,  8.93530194,  9.6253307 ,  9.23850869],
       [ 9.73729284,  5.3090678 , 10.13723528, 11.04372782,  7.11078651]])

### Applications of Random Generators

#### Creating Tests

In [62]:
array_RG = gen(pcg(seed = 365)) 

array_column_1 = array_RG.normal(loc = 2, scale = 3, size = (1000))
array_column_2 = array_RG.normal(loc = 7, scale = 2, size = (1000))
array_column_3 = array_RG.logistic(loc = 11, scale = 3, size = (1000))
array_column_4  = array_RG.exponential(scale = 4, size = (1000))
array_column_5  = array_RG.geometric(p = 0.7, size = (1000))

# Create the individual columns of the dataset we're creating. 

In [69]:
# random_test_data = np.array([array_column_1, array_column_2, array_column_3, array_column_4, array_column_5])
random_test_data = np.array([array_column_1, array_column_2, array_column_3, array_column_4, array_column_5]).transpose()
random_test_data

# Use np.array to generate a new array with the 5 arrays we created earlier. 
# Use the transpose method to make sure our dataset isn't flipped. 

array([[ 1.59077303,  6.42174295, 10.14698427,  6.91500737,  1.        ],
       [ 2.28243293,  8.57902322, 15.93309953,  6.243605  ,  1.        ],
       [ 1.81098674,  5.17270135, -0.46878789,  2.44997251,  1.        ],
       ...,
       [ 0.1973629 ,  4.3465854 ,  2.66485989,  0.80935387,  1.        ],
       [-2.21015722,  8.2176402 , 12.69328115,  0.50644607,  2.        ],
       [ 2.91161235,  7.90337695, 11.79840961,  4.86816939,  1.        ]])

In [68]:
random_test_data.shape

(5, 1000)

In [70]:
np.savetxt("Random-Test-from-NumPy.csv", random_test_data, fmt = '%s', delimiter = ',')


# Saving the arrays to an extrenal file we're creating. 

# file name -> "Random-Test-from-NumPy.csv"
# random_test_data -> data we're exporting (saving to an external file)
# format -> strings
# delimiter ","

# We'll talk more about these in just a bit. 

In [73]:
np.genfromtxt("Random-Test-from-NumPy.csv", delimiter = ',')

# Importing the data from the file we just created. 

array([[ 1.59077303,  6.42174295, 10.14698427,  6.91500737,  1.        ],
       [ 2.28243293,  8.57902322, 15.93309953,  6.243605  ,  1.        ],
       [ 1.81098674,  5.17270135, -0.46878789,  2.44997251,  1.        ],
       ...,
       [ 0.1973629 ,  4.3465854 ,  2.66485989,  0.80935387,  1.        ],
       [-2.21015722,  8.2176402 , 12.69328115,  0.50644607,  2.        ],
       [ 2.91161235,  7.90337695, 11.79840961,  4.86816939,  1.        ]])

In [33]:
rand_test_data = np.genfromtxt("Random-Test-from-NumPy.csv", delimiter = ',')
print(rand_test_data)

[[ 1.59077303  6.42174295 10.14698427  6.91500737  1.        ]
 [ 2.28243293  8.57902322 15.93309953  6.243605    1.        ]
 [ 1.81098674  5.17270135 -0.46878789  2.44997251  1.        ]
 ...
 [ 0.1973629   4.3465854   2.66485989  0.80935387  1.        ]
 [-2.21015722  8.2176402  12.69328115  0.50644607  2.        ]
 [ 2.91161235  7.90337695 11.79840961  4.86816939  1.        ]]
